# Author: Ewen Gallic
# Date: November 17th, 2016

# ----------------------------------- #
# KERNEL DENSITY ESTIMATION FUNCTIONS #
# ----------------------------------- #

# @n:       number of points to define the circle
# @centre:  center of the circle
# @radius:  radius of the circle
sCircle <- function(n = 100, centre = c(0, 0), radius){
  theta <- seq(0, 2*pi, length = n)
  m <- cbind(cos(theta), sin(theta)) * radius
  m[, 1] <- m[, 1] + centre[1]
  m[, 2] <- m[, 2] + centre[2]
  colnames(m) <- c("x", "y")
  m
}# End of sCircle()


# @x:       center of the circle,
# @h:       bandwidth scalar,
# @polygon: polygon on which data points lie.
sWeights <- function(x, h, polygon) {  
  leCercle <- sCircle(centre = x, radius = 1.759*h)
  POLcercle <- as(leCercle[-nrow(leCercle),], "gpc.poly")
  return(area.poly(gpclib::intersect(polygon, POLcercle)) / area.poly(POLcercle))
}# End of sWeights()


# @U:           data points,
# @polygon:     polygon on which points lie,
# @optimal:     if TRUE, uses Hpi() to select the optimal bandwidth,
# @h:           only if optimal=FALSE, scalar bandwidth,
# @parallel:    if TRUE, computes the weights using SNOW clusters,
# @n_clusters:  only if n_clusters=TRUE, defines the number of clusters.
sKDE <- function(U, polygon, optimal = TRUE, h = .1, parallel = FALSE, n_clusters = 4){
  if(!class(polygon) == "gpc.poly") polygon <- as(polygon, "gpc.poly")
  if(class(U) == "data.frame") U <- as.matrix(U)
  IND <- which(is.na(U[, 1]) == FALSE)
  U <- U[IND,]
  n <- nrow(U)
  if(optimal){
    H <- Hpi(U, binned = FALSE)
    H <- matrix(c(sqrt(H[1, 1] * H[2, 2]), 0, 0, sqrt(H[1, 1] * H[2, 2])), 2, 2)
  }
  if(!optimal){
    H <- matrix(c(h, 0, 0, h), 2, 2)
  }
  
  # Help function to compute weights
  poidsU <- function(i, U, h, POL){
    x <- as.numeric(U[i,])
    sWeights(x, h, POL)
  }
  # Use parallel methods to compute if the number of observation is a bit high
  # Change the number of slaves according to the number of cores your processor has
  # It is recommended to use a maximum of the number of cores minus one.
  if(parallel){
    cl <- makeCluster(n_clusters, type = "SOCK")
    clusterEvalQ(cl, library("gpclib"))
    clusterEvalQ(cl, library("sp"))
    clusterExport(cl, c("sCircle", "sWeights"))
    OMEGA <- parLapply(cl, 1:n, poidsU, U = U, h = sqrt(H[1, 1]), POL = polygon)
    OMEGA <- do.call("c", OMEGA)
    stopCluster(cl)
  }else{
    OMEGA <- NULL
    for(i in 1:n){
      OMEGA <- c(OMEGA, poidsU(i, U, h = sqrt(H[1, 1]), POL = polygon))
    }
  }
  
  
  # Kernel Density Estimator
  fhat <- kde(U, H, w = 1/OMEGA,
              xmin = c(min(get.bbox(polygon)$x), min(get.bbox(polygon)$y)),
              xmax = c(max(get.bbox(polygon)$x), max(get.bbox(polygon)$y)))
  fhat$estimate <- fhat$estimate * sum(1/OMEGA) / n
  
  vx <- unlist(fhat$eval.points[1])
  vy <- unlist(fhat$eval.points[2])
  VX <- cbind(rep(vx, each = length(vy)))
  VY <- cbind(rep(vy, length(vx)))
  VXY <- cbind(VX, VY)
  Ind <- matrix(inside.gpc.poly(x = VX, y = VY, polyregion = polygon), length(vy), length(vx))
  f0 <- fhat
  f0$estimate[t(Ind) == 0] <- NA
  
  list(
    X = fhat$eval.points[[1]],
    Y = fhat$eval.points[[2]],
    Z = fhat$estimate,
    ZNA = f0$estimate,
    H = fhat$H,
    W = fhat$w)
}# End of sKDE()

# @U:           data points,
# @polygon:     polygon on which points lie,
# @optimal:     if TRUE, uses Hpi() to select the optimal bandwidth,
# @h:           only if optimal=FALSE, scalar bandwidth,
sKDE_without_c = function(U, polygon, optimal = TRUE, h = .1){
  polygon <- as(polygon, "gpc.poly")
  IND <- which(is.na(U[,1]) == FALSE)
  U <- U[IND,]
  n <- nrow(U)
  if(optimal){
    H <- Hpi(U,binned=FALSE)
    H <- matrix(c(sqrt(H[1, 1] * H[2, 2]), 0, 0, sqrt(H[1, 1] * H[2, 2])), 2, 2)
  }
  if(!optimal){
    H <- matrix(c(h, 0, 0, h), 2, 2)
  }
  
  # Kernel density estimator
  fhat <- kde(U, H,
              xmin = c(min(get.bbox(polygon)$x), min(get.bbox(polygon)$y)),
              xmax = c(max(get.bbox(polygon)$x), max(get.bbox(polygon)$y)))
  
  vx <- unlist(fhat$eval.points[1])
  vy <- unlist(fhat$eval.points[2])
  VX <- cbind(rep(vx, each = length(vy)))
  VY <- cbind(rep(vy, length(vx)))
  VXY <- cbind(VX,VY)
  Ind <- matrix(inside.gpc.poly(x = VX, y = VY, polyregion = polygon), length(vy), length(vx))
  f0 <- fhat
  f0$estimate[t(Ind) == 0] <- NA
  
  list(
    X = fhat$eval.points[[1]],
    Y = fhat$eval.points[[2]],
    Z = fhat$estimate,
    ZNA = f0$estimate,
    H = fhat$H,
    W = fhat$W)
}# End of sKDE_without_c()

# @smooth       : result from sKDE() or sKDE_without_c();
# @breaks       : breaks for the legend (seq(min(smooth$Z)*.95,max(smooth$Z)*1.05,length=21) by default);
# @polygon      : polygon on which data points lie;
# @coord        : coordinates (X, Y) of data points;
# @alpha_coords : transparency for data points (.8 by default);
# @size_coords  : size for data points (.8 by default);
# @many_points  : if TRUE, @coord must be the result of condense() (package bigvis). It is helpful when there are too many points to display (FALSE by default);
# @colContour   : colour of the contour of the polygon ("white" by default);
# @colPoints    : colour of the data points ("dodger blue" by default);
# @title        : title (if provided) to give to the plot;
# @contour      : if FALSE, contour are not plotted (TRUE by default);
# @round        : round value for the legend (2 by default);
# @text_size    : text size (22 by default).
plot_sKDE <- function(smooth, breaks, polygon, coord, alpha_coords = .8, size_coords = .8,
                      many_points = FALSE,
                      colContour="white",
                      colPoints="dodger blue", title, contour=TRUE,
                      round = 2, text_size = 22){
  
  # Get the right format for ggplot2
  obtenirMelt <- function(smoothed){
    res <- melt(smoothed$ZNA)
    res[,1] <- smoothed$X[res[,1]]
    res[,2] <- smoothed$Y[res[,2]]
    names(res) <- list("X","Y","ZNA")
    return(res)
  }
  
  smCont <- obtenirMelt(smooth)
  if(missing(breaks)) breaks <- seq(min(smooth$Z)*.95,max(smooth$Z)*1.05,length=21)
  smCont$colour <- cut(smCont[,"ZNA"],breaks=breaks,labels=round(breaks[-1],digits=round))
  smCont$colour2 <- as.character(cut(smCont[,"ZNA"],breaks=breaks,labels=rev(heat.colors(length(breaks)-1))))
  
  if(is.null(polygon$group)) polygon$group <- factor(1)
  
  
  P <- ggplot() +
    geom_polygon(data = polygon,  aes(x = x, y = y, group = group),
                 fill = NA, col = "black") +
    geom_tile(aes(x = X, y = Y, fill = ZNA),
              alpha = .9, data = smCont[!is.na(smCont$ZNA),], na.rm=TRUE)
  
  
  lesLabels <- round(breaks,round)
  lesIndicesLabels <- floor(seq(1,length(lesLabels),length.out=5)) # Only keep 5 points for the legend values
  lesIndicesLabels[length(lesIndicesLabels)] <- length(lesLabels) # Making sure we display the last value
  lesLabels <- as.character(lesLabels[lesIndicesLabels])
  lesLabels[lesLabels=="0"] <- "0.00"
  
  if(contour) P <- P + geom_contour(data = smCont[!is.na(smCont$ZNA),],
                                    aes(x = X, y = Y, z = ZNA),
                                    alpha=0.6,  colour = colContour,
                                    breaks = breaks[lesIndicesLabels])
  if(many_points){
    P <- P + geom_point(data = coord, aes(x = X, y = Y, alpha = .count),
                        col = "blue", size = size_coords) +
      scale_alpha_continuous(guide=FALSE)
  }else{
    P <- P + geom_point(data = coord[,c("X", "Y")], aes(x = X, y = Y),
                        alpha = alpha_coords, col = "blue", size = size_coords)
  }
  
  
  if(contour){
    # To add contour levels
    ind_level <- which(unlist(lapply(ggplot_build(P)$data, function(x) "level" %in% colnames(x))))
    tmp <- ggplot_build(P)$data[[ind_level]]
    ind <- unlist(lapply(unique(tmp$piece), function(x){
      corresp <- which(tmp$piece == x)
      corresp[round(length(corresp)/2)]
    }))
    tmp$level_r <- round(tmp$level, round)
    P <- P + geom_text(aes(label = level_r, z = NULL, x = x, y = y), data=tmp[ind,])
  }
  
  P <- P + scale_fill_gradient(name="",low='yellow', high='red',
                               breaks=breaks[lesIndicesLabels],
                               limits=range(breaks),labels=lesLabels)
  
  P <- P + theme(axis.text.x=element_blank(),
                 axis.text.y=element_blank(),
                 axis.ticks.x=element_blank(),
                 axis.ticks.y=element_blank(),
                 axis.title=element_blank(),
                 text = element_text(size = text_size))
  
  P <- P + geom_polygon(data=polygon, mapping=(aes(x=x, y=y)),
                        colour="black", fill=NA)
  # Add a title if one was provided
  if(!missing(title)) P <- P + ggtitle(title)
  P
}


library(rgdal)
library(surveillance)
library(maptools)
library(ggplot2)
library(plyr)
library(ellipse)
library(fields)
library(gpclib)
library(ks)
library(maps)
library(rgeos)
library(snow)
library(sp)
library(ggmap)
library(reshape2)

# --------- #
# LOAD DATA #
# --------- #

# Source: Here’s Waldo: Computing the optimal search strategy for finding Waldo (2015), Randal S. Olson
# URL: http://www.randalolson.com/2015/02/03/heres-waldo-computing-the-optimal-search-strategy-for-finding-waldo/

waldo <- read.csv("wheres-waldo-locations.csv")
head(waldo)

range(waldo$X) # Let's say 0:13
range(waldo$Y) # Let's say 0:8

# Plot of Waldo's locations
p <- ggplot(data = waldo, aes(x = X, y = Y)) + geom_point() +
  scale_x_continuous(breaks = seq(0,13)) +
  scale_y_continuous(breaks = seq(0,8))

# ------------- #
# THE RECTANGLE #
# ------------- #

# The rectangle on which we look for Waldo
bounding_polygon <- data.frame(x = c(0, 0, 13, 13, 0), y = c(0, 8, 8, 0, 0))
# As a gpc.poly object
bounding_polygon_gpc <- as(bounding_polygon, "gpc.poly")

# ---------- #
# ESTIMATION #
# ---------- #

# Estimation with border correction
smoothed_kde <- sKDE(U = waldo[, c("X", "Y")],
                     polygon = bounding_polygon_gpc,
                     optimal=TRUE, parallel = FALSE)

# Estimation without border correction
smoothed_kde_nc <- sKDE_without_c(U = waldo[, c("X", "Y")],
                                  polygon = bounding_polygon_gpc,
                                  optimal=TRUE)


# ------ #
# GRAPHS #
# ------ #

p <- plot_sKDE(smooth = smoothed_kde, 
               coord = waldo[, c("X", "Y")],
               alpha_coords = .8,
               size_coords = 1,
               breaks = seq(min(smoothed_kde$ZNA,
                                smoothed_kde_nc$ZNA,na.rm=T)*.95,
                            max(smoothed_kde$ZNA,
                                smoothed_kde_nc$ZNA,na.rm=T)*1.05,
                            length=21),
               polygon = bounding_polygon,
               round = 3,
               colContour = "black") +
  ggtitle("With correction") +
  coord_equal()

p

p_nc <- plot_sKDE(smooth = smoothed_kde_nc,
                  coord = waldo[, c("X", "Y")],
                  alpha_coords = .8,
                  size_coords = 1,
                  breaks = seq(min(smoothed_kde$ZNA,
                                   smoothed_kde_nc$ZNA,na.rm=T)*.95,
                               max(smoothed_kde$ZNA,
                                   smoothed_kde_nc$ZNA,na.rm=T)*1.05,
                               length=21),
                  polygon = bounding_polygon,
                  round = 3,
                  colContour = "black") +
  ggtitle("Without correction") +
  coord_equal()

p_nc

# -------------------------------- #
# TAKING THE POSTCARD INTO ACCOUNT #
# -------------------------------- #

# Actually, there is a postcard in the top left corner

# Let's create it
postcard <- data.frame(x = c(1, 1, 3, 3, 1), y = c(5, 7.5, 7.5, 5, 5))
# As a gpc.poly object
postcard_gpc <- as(postcard, "gpc.poly")


ggplot(data = waldo, aes(x = X, y = Y)) + geom_point() +
  scale_x_continuous(breaks = seq(0,13)) +
  scale_y_continuous(breaks = seq(0,8)) +
  geom_polygon(data = postcard, aes(x = x, y = y), fill = NA, col = "blue")


# Remove the postcard from the bounding polygon
bound_pol <- gpclib::setdiff(bounding_polygon_gpc, postcard_gpc)


# ------------------------------------------- #
# ESTIMATION TAKING THE POSTCARD INTO ACCOUNT #
# ------------------------------------------- #

# Estimation with correction of the border bias
smoothed_kde_post <- 
  sKDE(U = waldo[, c("X", "Y")],
       polygon = bound_pol,
       optimal=TRUE, parallel = FALSE)

# Estimation without the correction
smoothed_kde_post_nc <- 
  sKDE_without_c(U = waldo[, c("X", "Y")],
                 polygon = bound_pol,
                 optimal=TRUE)


# ------ #
# GRAPHS #
# ------ #

p_post <- 
  plot_sKDE(smooth = smoothed_kde_post, 
            coord = waldo[, c("X", "Y")],
            alpha_coords = .8,
            size_coords = 1,
            breaks = seq(min(smoothed_kde_post$ZNA,
                             smoothed_kde_post_nc$ZNA,na.rm=T)*.95,
                         max(smoothed_kde_post$ZNA,
                             smoothed_kde_post_nc$ZNA,na.rm=T)*1.05,
                         length=21),
            polygon = bounding_polygon,
            round = 3,
            colContour = "black") +
  geom_polygon(data = postcard, aes(x = x, y = y), fill = NA, colour = "black") +
  ggtitle("With correction") +
  coord_equal()

p_post

p_post_nc <- 
  plot_sKDE(smooth = smoothed_kde_post_nc,
            coord = waldo[, c("X", "Y")],
            alpha_coords = .8,
            size_coords = 1,
            breaks = seq(min(smoothed_kde_post$ZNA,
                             smoothed_kde_post_nc$ZNA,na.rm=T)*.95,
                         max(smoothed_kde_post$ZNA,
                             smoothed_kde_post_nc$ZNA,na.rm=T)*1.05,
                         length=21),
            polygon = bounding_polygon,
            round = 3,
            colContour = "black") +
  geom_polygon(data = postcard, aes(x = x, y = y), fill = NA, colour = "black") +
  ggtitle("Without correction") +
  coord_equal()

p_post_nc