This page proposes some R codes to compute the kernel density estimates of two-dimensional data points, using an extension of Ripley’s circumference method to correct for border bias. First, the functions computing the estimates are given. Then, we provide a function to plot the result on a map. And we finish with three examples:
Edit February 2022: the dependency to {rgeos} was removed. The code now relies on {sf} as {rgeos} is to retire soon.
The function sCircle()
returns n
points on a circle centered in centre
with a radius of radius
.
#' Creates "n" points on a circle centered in "centre" with a radius of "radius"
#'
#' @param n number of points to define the circle
#' @param centre center of the circle
#' @param radius radius of the circle
#' @return Returns a matrix with two columns corresponding to the (x,y) coordinates of the circle of centre `centre`
#' and radius `radius`. The number of lines depends on the number of points asked using the parameter `n`.
#' @examples sCircle(n=100, centre = c(0,0), radius = 1)
#' @export
sCircle <- function(n = 100, centre = c(0, 0), radius){
theta <- seq(0, 2*pi, length = n)
m <- cbind(cos(theta), sin(theta)) * radius
m[, 1] <- m[, 1] + centre[1]
m[, 2] <- m[, 2] + centre[2]
colnames(m) <- c("x", "y")
m
}# End of sCircle()
The function sWeights()
returns the proportion of the area of a circle of center x
and radius 1.759*h
on the area of a polygon named polygon
. Please note that the polygon needs to be created with {sf}.
#' Proportion of the area of a circle on a polygon's area
#'
#' @param x center of the circle
#' @param h bandwidth scalar
#' @param polygon polygon on which data points lie
#' @seealso \code{\link{sCircle}} which this function wraps
#' @return Returns the proportion of the area of a circle of center x and radius 1.759*h on the polygon's area
#' @examples
#' pol_coordinates <- matrix(c(0, 0, 1, 1, 0, 0, 1, 1, 0, 0), ncol = 2)
#' pol <-
#' cbind(
#' c(pol_coordinates[,1], pol_coordinates[1,1]),
#' c(pol_coordinates[, 2], pol_coordinates[1,1])
#' ) %>%
#' list() %>%
#' sf::st_polygon()
#' sWeights(x = c(0, 0), h = 1/1.759, polygon = pol)
#' @export
sWeights <- function(x, h, polygon) {
circle_matrix <- sCircle(centre = x, radius = 1.759*h)
circle_polygon <-
cbind(
c(circle_matrix[,"x"], circle_matrix[[1,"x"]]),
c(circle_matrix[,"y"], circle_matrix[[1,"y"]])
) %>%
list() %>%
sf::st_polygon()
area_circle <- sf::st_area(circle_polygon)
area_intersection <- sf::st_area(sf::st_intersection(polygon, circle_polygon))
area_intersection/area_circle
}# End of sWeights()
The function sKDE()
computes the kernel density estimates, correcting for a possible frontier bias. It returns a list, whose elements are : - X
: x coordinates at wich estimate is evaluated; - Y
: y coordinates at wich estimate is evaluated; - Z
: density estimates; - ZNA
: density estimates with NA values for points outside the polygon; - H
: bandwidth matrix; - W
: vector of weights.
#' Computes an estimation of the density using Kernel Density estimator,
#' correcting for fontier effects.
#'
#' @return Returns a list whose elements are:
#' > X: x coordinates at wich estimate is evaluated,
#' > Y: y coordinates at wich estimate is evaluated,
#' > Z: density estimates,
#' > ZNA: density estimates with NA values for points outside the polygon,
#' > H: bandwidth matrix,
#' > W: vector of weights.
#' @seealso \code{\link{sWeights}} which this function wraps
#' @param U data points.
#' @param polygon polygon (simple figure geometry created with {sf}) on which points lie.
#' @param optimal if TRUE, uses Hpi() to select the optimal bandwidth.
#' @param h only if optimal=FALSE, scalar bandwidth.
#' @param parallel if TRUE, computes the weights using clusters.
#' @param n_clusters only if n_clusters=TRUE, defines the number of clusters. (Set to NULL for automatic detection (on Unix)).
#' @references Charpentier, A. & Gallic, E. (2015). Kernel density estimation based on Ripley’s correction. GeoInformatica, 1-22.
#' (\href{https://link.springer.com/article/10.1007/s10707-015-0232-z}{Springer})
#' @examples
#' data(acci)
#' # Estimation with correction
#' polygon_finistere <-
#' cbind(
#' c(acci$finistere$polygon$long, acci$finistere$polygon$long[1]),
#' c(acci$finistere$polygon$lat, acci$finistere$polygon$lat[1])
#' ) %>%
#' list() %>%
#' sf::st_polygon()
#' smoothed_fin <- sKDE(U = acci$finistere$points, polygon = polygon_finistere,
#' optimal=TRUE, parallel = FALSE)
#' @export
sKDE <- function(U, polygon, optimal = TRUE, h = .1, parallel = FALSE, n_clusters = 4){
if(!"POLYGON" %in% class(polygon)) stop("Please provide a polygon created with sf::sf_polygon")
if("data.frame" %in% class(U)) U <- as.matrix(U)
IND <- which(is.na(U[, 1]) == FALSE)
U <- U[IND,]
n <- nrow(U)
if(optimal){
H <- Hpi(U, binned = FALSE)
H <- matrix(c(sqrt(H[1, 1] * H[2, 2]), 0, 0, sqrt(H[1, 1] * H[2, 2])), 2, 2)
}else{
H <- matrix(c(h, 0, 0, h), 2, 2)
}
# Help function to compute weights
poidsU <- function(i, U, h, POL){
x <- as.numeric(U[i,])
sWeights(x, h, POL)
}
# Use parallel methods to compute if the number of observation is a bit high
# Change the number of slaves according to the number of cores your processor has
# It is recommended to use a maximum of the number of cores minus one.
if(parallel){
if(is.null(n_clusters)) n_clusters <- parallel::detectCores()-1
cl <- makeCluster(n_clusters)
clusterEvalQ(cl, library(dplyr))
clusterEvalQ(cl, library(sf))
clusterExport(cl, c("sCircle", "sWeights"))
OMEGA <- pblapply(1:n, poidsU, U = U, h = sqrt(H[1, 1]), POL = polygon, cl = cl)
OMEGA <- do.call("c", OMEGA)
stopCluster(cl)
}else{
OMEGA <- NULL
for(i in 1:n){
OMEGA <- c(OMEGA, poidsU(i, U, h = sqrt(H[1, 1]), POL = polygon))
}
}
# Kernel Density Estimator
fhat <- kde(U, H, w = 1/OMEGA,
xmin = c(sf::st_bbox(polygon)["xmin"], sf::st_bbox(polygon)["ymin"]),
xmax = c(sf::st_bbox(polygon)["xmax"], sf::st_bbox(polygon)["ymax"]))
fhat$estimate <- fhat$estimate * sum(1/OMEGA) / n
vx <- unlist(fhat$eval.points[1])
vy <- unlist(fhat$eval.points[2])
VX <- cbind(rep(vx, each = length(vy)))
VY <- cbind(rep(vy, length(vx)))
VXY <- cbind(VX, VY)
ind_points_in_poly <-
sf::st_as_sf(data.frame(x = VX, y = VY), coords = c("x", "y")) %>%
sf::st_intersects(polygon, sparse = FALSE) %>%
matrix(length(vy), length(vx))
f0 <- fhat
f0$estimate[t(ind_points_in_poly) == 0] <- NA
list(
X = fhat$eval.points[[1]],
Y = fhat$eval.points[[2]],
Z = fhat$estimate,
ZNA = f0$estimate,
H = fhat$H,
W = fhat$w)
}# End of sKDE()
The function sKDE_without_c()
computes the kernel density estimates, without correcting for a possible frontier bias. It returns a list, whose elements are : - X
: x coordinates at wich estimate is evaluated; - Y
: y coordinates at wich estimate is evaluated; - Z
: density estimates; - ZNA
: density estimates with NA values for points outside the polygon; - H
: bandwidth matrix; - W
: vector of weights.
#' Computes an estimation of the density using Kernel Density estimator,
#' without correcting for fontier effects.
#'
#' @param U data points.
#' @param polygon polygon (simple figure geometry created with {sf}) on which points lie.
#' @param optimal if TRUE, uses Hpi() to select the optimal bandwidth.
#' @param h only if optimal=FALSE, scalar bandwidth.
#' @return Returns a list whose elements are:
#' > X: x coordinates at wich estimate is evaluated,
#' > Y: y coordinates at wich estimate is evaluated,
#' > Z: density estimates,
#' > ZNA: density estimates with NA values for points outside the polygon,
#' > H: bandwidth matrix,
#' > W: vector of weights.
#' @examples
#' data(acci)
#' polygon_finistere <-
#' cbind(
#' c(acci$finistere$polygon$long, acci$finistere$polygon$long[1]),
#' c(acci$finistere$polygon$lat, acci$finistere$polygon$lat[1])
#' ) %>%
#' list() %>%
#' sf::st_polygon()
#' smoothed_fin_nc <- sKDE_without_c(U = acci$finistere$points,
#' polygon = polygon_finistere, optimal=TRUE)
#' @export
sKDE_without_c = function(U, polygon, optimal = TRUE, h = .1){
if(!"POLYGON" %in% class(polygon)) stop("Please provide a polygon created with sf::sf_polygon")
IND <- which(is.na(U[,1]) == FALSE)
U <- U[IND,]
n <- nrow(U)
if(optimal){
H <- Hpi(U,binned=FALSE)
H <- matrix(c(sqrt(H[1, 1] * H[2, 2]), 0, 0, sqrt(H[1, 1] * H[2, 2])), 2, 2)
}
if(!optimal){
H <- matrix(c(h, 0, 0, h), 2, 2)
}
# Kernel density estimator
fhat <- kde(U, H,
xmin = c(sf::st_bbox(polygon)["xmin"], sf::st_bbox(polygon)["ymin"]),
xmax = c(sf::st_bbox(polygon)["xmax"], sf::st_bbox(polygon)["ymax"]))
vx <- unlist(fhat$eval.points[1])
vy <- unlist(fhat$eval.points[2])
VX <- cbind(rep(vx, each = length(vy)))
VY <- cbind(rep(vy, length(vx)))
VXY <- cbind(VX,VY)
ind_points_in_poly <-
sf::st_as_sf(data.frame(x = VX, y = VY), coords = c("x", "y")) %>%
sf::st_intersects(polygon, sparse = FALSE) %>%
matrix(length(vy), length(vx))
f0 <- fhat
f0$estimate[t(ind_points_in_poly) == 0] <- NA
list(
X = fhat$eval.points[[1]],
Y = fhat$eval.points[[2]],
Z = fhat$estimate,
ZNA = f0$estimate,
H = fhat$H,
W = fhat$W)
}# End of sKDE_without_c()
Using the result obtained by the evaluation of the functions sKDE()
or sKDE_without_c()
, the function plot_sKDE()
creates a visualization of the kernel density estimates.
#' Using the result obtained by the evaluation of the functions sKDE() or sKDE_without_c(),
#' the function plot_sKDE() creates a visualization of the kernel density estimates.
#' @param smooth result from sKDE() or sKDE_without_c();
#' @param breaks breaks for the legend (seq(min(smooth$Z)*.95,max(smooth$Z)*1.05,length=21) by default);
#' @param polygon polygon on which data points lie;
#' @param coord coordinates (long, lat) of data points;
#' @param alpha_coords transparency for data points (.8 by default);
#' @param size_coords size for data points (.8 by default);
#' @param many_points if TRUE, @coord must be the result of condense() (package bigvis). It is helpful when there are too many points to display (FALSE by default);
#' @param colContour colour of the contour of the polygon ("white" by default);
#' @param colPoints colour of the data points ("dodger blue" by default);
#' @param title title (if provided) to give to the plot;
#' @param contour if FALSE, contour are not plotted (TRUE by default);
#' @param round round value for the legend (2 by default);
#' @param text_size text size (22 by default).
#' @return a ggplot2 plot.
#' @examples
#' library(dplyr)
#' data(acci)
#' # Estimation with correction
#' polygon_finistere <-
#' cbind(
#' c(acci$finistere$polygon$long, acci$finistere$polygon$long[1]),
#' c(acci$finistere$polygon$lat, acci$finistere$polygon$lat[1])
#' ) %>%
#' list() %>%
#' sf::st_polygon()
#' smoothed_fin <- sKDE(U = acci$finistere$points, polygon = polygon_finistere,
#' optimal=TRUE, parallel = FALSE)
#' # Estimation without correction
#' smoothed_fin_nc <- sKDE_without_c(U = acci$finistere$points, polygon = polygon_finistere,
#' optimal=TRUE)
#' p_acci_fin <- plot_sKDE(smooth = smoothed_fin,
#' coord = acci$finistere$points,
#' alpha_coords = .8,
#' size_coords = 1,
#' breaks = seq(min(smoothed_fin$ZNA, smoothed_fin_nc$ZNA,na.rm=TRUE)*.95,
#' max(smoothed_fin$ZNA, smoothed_fin_nc$ZNA,na.rm=TRUE)*1.05, length=21),
#' polygon = acci$finistere$polygon, round = 3, colContour = "black") +
#' ggtitle("With correction") +
#' coord_equal()
#' print(p_acci_fin)
#' @export
plot_sKDE <- function(smooth, breaks, polygon, coord, alpha_coords = .8, size_coords = .8,
many_points = FALSE,
colContour="white",
colPoints="dodger blue", title, contour=TRUE,
round = 2, text_size = 22){
# Get the right format for ggplot2
obtenirMelt <- function(smoothed){
res <- reshape2::melt(smoothed$ZNA)
res[,1] <- smoothed$X[res[,1]]
res[,2] <- smoothed$Y[res[,2]]
names(res) <- list("X","Y","ZNA")
res
}
smCont <- obtenirMelt(smooth)
if(missing(breaks)) breaks <- seq(min(smooth$Z)*.95,max(smooth$Z)*1.05,length=21)
smCont$colour <- cut(smCont[,"ZNA"],breaks=breaks,labels=round(breaks[-1],digits=round))
smCont$colour2 <- as.character(cut(smCont[,"ZNA"],breaks=breaks,labels=rev(heat.colors(length(breaks)-1))))
if(is.null(polygon$group)) polygon$group <- factor(1)
P <- ggplot() +
geom_polygon(data = polygon,
mapping = aes(x = long, y = lat, group = group),
fill = NA, col = "black") +
geom_tile(aes(x = X, y = Y, fill = ZNA),
alpha = .9, data = smCont[!is.na(smCont$ZNA),], na.rm=TRUE)
lesLabels <- round(breaks,round)
lesIndicesLabels <- floor(seq(1,length(lesLabels),length.out=5)) # Only keep 5 points for the legend values
lesIndicesLabels[length(lesIndicesLabels)] <- length(lesLabels) # Making sure we display the last value
lesLabels <- as.character(lesLabels[lesIndicesLabels])
lesLabels[lesLabels=="0"] <- "0.00"
if(contour) P <- P + geom_contour(data = smCont[!is.na(smCont$ZNA),],
aes(x = X, y = Y, z = ZNA),
alpha=0.6, colour = colContour,
breaks = breaks[lesIndicesLabels])
if(many_points){
P <- P + geom_count(data = coord, aes(x = long, y = lat, alpha = ..prop..),
col = "blue", size = size_coords) +
scale_alpha_continuous(guide=FALSE)
}else{
P <- P + geom_point(data = coord[,c("long", "lat")], aes(x = long, y = lat),
alpha = alpha_coords, col = "blue", size = size_coords)
}
if(contour){
# To add contour levels
ind_level <- which(unlist(lapply(ggplot_build(P)$data, function(x) "level" %in% colnames(x))))
tmp <- ggplot_build(P)$data[[ind_level]]
ind <- unlist(lapply(unique(tmp$piece), function(x){
corresp <- which(tmp$piece == x)
corresp[round(length(corresp)/2)]
}))
tmp$level_r <- round(tmp$level, round)
P <- P + geom_text(aes(label = level_r, x = x, y = y), data=tmp[ind,])
}
P <- P + scale_fill_gradient(name="",low='yellow', high='red',
breaks=breaks[lesIndicesLabels],
limits=range(breaks),labels=lesLabels)
P <- P + theme(axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.x=element_blank(),
axis.ticks.y=element_blank(),
axis.title=element_blank(),
text = element_text(size = text_size))
P <- P + geom_polygon(data=polygon, mapping=(aes(x=long, y=lat)),
colour="black", fill=NA)
# Add a title if one was provided
if(!missing(title)) P <- P + ggtitle(title)
P
}
This page provides three example on how to estimate the density of car accidents that happened in Finistère and Morbihan, two French “départements”, on bike thefts in San Francisco and on camping locations in France. Once the estimates are computed, they are plotted on a map. Two estimations are provided: one using a border correction, and the other ignoring this possible issue.
But before going further, some packages need to be loaded.
library(tidyverse)
library(ks)
library(reshape2)
library(sf)
Data can be downloaded from http://freakonometrics.hypotheses.org/smooth-maps/data/car_accidents/acci.RData.
load(url("https://egallic.fr/R/sKDE/smooth-maps/data/car_accidents/acci.RData"))
They are contained in the object names acci
, which is a list of two elements:
finistere
: concerns only the “département” Finistère. It is also a list, whose elements are:
morbihan
: concerns only the “département” Morbihan. It is also a list, whose elements are:
# Finistere accidents locations
ggplot() +
geom_polygon(data = acci$finistere$polygon,
mapping = aes(x = long, y = lat),
fill = "grey75") +
geom_point(data = acci$finistere$points, aes(x = long, y = lat),
col = "dodger blue", alpha = .5) + coord_equal() +
ggtitle("Accidents in Finistere")
# Morbihan accidents location
ggplot() +
geom_polygon(data = acci$morbihan$polygon,
mapping = aes(x = long, y = lat), fill = "grey75") +
geom_point(data = acci$morbihan$points, aes(x = long, y = lat),
col = "dodger blue", alpha = .5) + coord_equal() +
ggtitle("Accidents in Morbihan")
Now, let’s see how to use the functions sKDE()
and sKDE_without_c()
to compute the kernel density estimates, taking care of possible border bias, and without considering them respectively.
Let’s do this for Finistère first.
We need to create a sfg object to be able to use the functions from {sf}, to compute the intersection of the area.
polygon_finistere <-
cbind(
c(acci$finistere$polygon$long, acci$finistere$polygon$long[1]),
c(acci$finistere$polygon$lat, acci$finistere$polygon$lat[1])
) %>%
list() %>%
sf::st_polygon()
# Estimation with correction
smoothed_fin <- sKDE(U = acci$finistere$points,
polygon = polygon_finistere,
optimal=TRUE, parallel = FALSE)
## Warning in ks.defaults(x = x, w = w, binned = binned, bgridsize = bgridsize, : Weights don't sum to sample size - they have been scaled accordingly
# Estimation without correction
smoothed_fin_nc <- sKDE_without_c(U = acci$finistere$points,
polygon = polygon_finistere,
optimal=TRUE)
To visualize the estimates, it is possible to use the plot_sKDE()
function.
First, taking care of the border effects:
p_acci_fin <- plot_sKDE(smooth = smoothed_fin,
coord = acci$finistere$points,
alpha_coords = .8,
size_coords = 1,
breaks = seq(min(smoothed_fin$ZNA,
smoothed_fin_nc$ZNA,na.rm=T)*.95,
max(smoothed_fin$ZNA,
smoothed_fin_nc$ZNA,na.rm=T)*1.05,
length=21),
polygon = acci$finistere$polygon,
round = 3,
colContour = "black") +
ggtitle("With correction") +
coord_equal()
print(p_acci_fin)
And secondly, without considering the possible border effect:
p_acci_fin_nc <- plot_sKDE(smooth = smoothed_fin_nc,
coord = acci$finistere$points,
alpha_coords = .8,
size_coords = 1,
breaks = seq(min(smoothed_fin$ZNA,
smoothed_fin_nc$ZNA,na.rm=T)*.95,
max(smoothed_fin$ZNA,
smoothed_fin_nc$ZNA,na.rm=T)*1.05,
length=21),
polygon = acci$finistere$polygon,
round = 3,
colContour = "black") +
ggtitle("Without correction") +
coord_equal()
print(p_acci_fin_nc)
The estimation for Morbihan follows the same scheme.
polygon_morbihan <-
cbind(
c(acci$morbihan$polygon$long, acci$morbihan$polygon$long[1]),
c(acci$morbihan$polygon$lat, acci$morbihan$polygon$lat[1])
) %>%
list() %>%
sf::st_polygon()
# Estimation with correction
smoothed_morb <- sKDE(U = acci$morbihan$points,
polygon = polygon_morbihan,
optimal=TRUE, parallel = FALSE)
## Warning in ks.defaults(x = x, w = w, binned = binned, bgridsize = bgridsize, : Weights don't sum to sample size - they have been scaled accordingly
# Estimation without correction
smoothed_morb_nc <- sKDE_without_c(U = acci$morbihan$points,
polygon = polygon_morbihan,
optimal=TRUE)
Plotting the result using the plot_sKDE()
function, first using the estimates taking the border bias into account, and secondly using the estimates computed without considering a border bias.
p_acci_morb <- plot_sKDE(smooth = smoothed_morb,
coord = acci$morbihan$points,
alpha_coords = .8,
size_coords = 1,
breaks = seq(min(smoothed_morb$ZNA,
smoothed_morb_nc$ZNA,na.rm=T)*.95,
max(smoothed_morb$ZNA,
smoothed_morb_nc$ZNA,na.rm=T)*1.05,
length=21),
polygon = acci$morbihan$polygon,
round = 3,
colContour = "black") +
ggtitle("With correction") +
coord_equal()
print(p_acci_morb)
p_acci_morb_nc <- plot_sKDE(smooth = smoothed_morb_nc,
coord = acci$morbihan$points,
alpha_coords = .8,
size_coords = 1,
breaks = seq(min(smoothed_morb$ZNA,
smoothed_morb_nc$ZNA,na.rm=T)*.95,
max(smoothed_morb$ZNA,
smoothed_morb_nc$ZNA,na.rm=T)*1.05,
length=21),
polygon = acci$morbihan$polygon,
round = 3,
colContour = "black") +
ggtitle("Without correction") +
coord_equal()
print(p_acci_morb_nc)
A map of San Francisco can be obtained from the Bay Area Census (2000) (http://www.mtc.ca.gov/maps_and_data/GIS/data.htm). The information contained in the shapefile that can be downloaded on that website, is also available, in a more R friendly format, from https://egallic.fr/R/sKDE/smooth-maps/data/bike_thefts/sf_df.rda.
load(url("https://egallic.fr/R/sKDE/smooth-maps/data/bike_thefts/sf_df.rda"))
The loaded object is called sf_df
. It is a data.frame
that can be used by ggplot()
.
Data about crimes reported in San Francisco are available on <a href=“https://data.sfgov.org/”“>https://data.sfgov.org/. A focus on bike thefts reported in 2013 can be downloaded from https://egallic.fr/R/sKDE/smooth-maps/data/bike_thefts/incidents_bikes.RData.
load(url("https://egallic.fr/R/sKDE/smooth-maps/data/bike_thefts/incidents_bikes.RData"))
These data are contained in an object called incidents_bikes
, which is a data.frame
, whose variables are:
IncidntNum
: incident number;Category
: category;Descript
: decription;DayOfWeek
: name of the day;Date
: date;Time
: time;PdDistrict
: Police Department District;Resolution
: resolution;Location
: address;long
: longitude;lat
: latitude;date
: date.# San Francisco bike thefts' locations (2013)
ggplot() +
geom_polygon(data = sf_df, aes(x = long, y = lat, group = group), fill = "grey75") +
geom_point(data = incidents_bikes, aes(x = long, y = lat),
alpha = .5, col = "dodger blue") +
ggtitle("Bike thefts in San Francisco (2013)") +
coord_equal()
Now, let’s see how to use the functions sKDE()
and sKDE_without_c()
to compute the kernel density estimates, taking care of possible border bias, and without considering them respectively.
sf_df_coords <- sf_df[, c("long", "lat")]
polygon_san_francisco <-
cbind(
c(sf_df_coords$long, sf_df_coords$long[1]),
c(sf_df_coords$lat, sf_df_coords$lat[1])
) %>%
list() %>%
sf::st_polygon()
The estimates are calculated as follows:
# Estimation with correction
smoothed_sf_bikes <- sKDE(U = incidents_bikes[, c("long", "lat")],
polygon = polygon_san_francisco,
optimal=TRUE, parallel=FALSE)
## Warning in ks.defaults(x = x, w = w, binned = binned, bgridsize = bgridsize, : Weights don't sum to sample size - they have been scaled accordingly
# Estimation without correction
smoothed_sf_bikes_nc <- sKDE_without_c(U = incidents_bikes[, c("long", "lat")],
polygon = polygon_san_francisco,
optimal=TRUE)
To visualize the estimates, it is possible to use the plot_sKDE()
function.
First, taking care of the border effects:
p_bikes_sf <- plot_sKDE(smooth = smoothed_sf_bikes,
coord = incidents_bikes[, c("long", "lat")],
alpha_coords = .8,
size_coords = 1,
breaks = seq(min(smoothed_sf_bikes$ZNA,
smoothed_sf_bikes_nc$ZNA,na.rm=T)*.95,
max(smoothed_sf_bikes$ZNA,
smoothed_sf_bikes_nc$ZNA,na.rm=T)*1.05,
length=21),
polygon = sf_df[, c("long", "lat")],
round = 2,
colContour = "black") +
ggtitle("With correction") +
coord_equal()
print(p_bikes_sf)
And secondly, without considering the possible border effect:
p_bikes_sf_nc <- plot_sKDE(smooth = smoothed_sf_bikes_nc,
coord = incidents_bikes[, c("long", "lat")],
alpha_coords = .8,
size_coords = 1,
breaks = seq(min(smoothed_sf_bikes$ZNA,
smoothed_sf_bikes_nc$ZNA,na.rm=T)*.95,
max(smoothed_sf_bikes$ZNA,
smoothed_sf_bikes_nc$ZNA,na.rm=T)*1.05,
length=21),
polygon = sf_df[, c("long", "lat")],
round = 2,
colContour = "black") +
ggtitle("Without correction") +
coord_equal()
print(p_bikes_sf_nc)
We need a French map, with a bit more details than the one that can be found in the maps
package. We extracted the French border on the EuroGeographics european map (http://epp.eurostat.ec.europa.eu/cache/GISCO/geodatafiles/CNTR_2014_03M_SH.zip, © EuroGeographics for the administrative boundaries). The French frontier, which is extracted from this shapefile, can be downloaded on https://egallic.fr/R/sKDE/smooth-maps/data/french_campings/france.RData.
load(url("https://egallic.fr/R/sKDE/smooth-maps/data/french_campings/france.RData"))
Data can be downloaded from https://egallic.fr/R/sKDE/smooth-maps/data/french_campings/tourisme_camping.RData.
load(url("https://egallic.fr/R/sKDE/smooth-maps/data/french_campings/tourisme_camping.RData"))
These data were downloaded from https://www.classement.atout-france.fr/web/guest;jsessionid=8436e9995ac3be992904b79690c0, and geocoded using the function geocode()
from the ggmaps
package, available on CRAN. These data are contained in an object called tourisme_camping
, which is a data.frame
, whose variables are:
ranking
: number of stars attributed;commercial_name
: name;address
: address;zip
: French area code;town
: town;no_pitches
: number of pitches;long
: longitude;lat
: latitude.# French campings locations
ggplot() + geom_polygon(data = france,
mapping = aes(x = long, y = lat, group = group),
fill = "grey75") +
geom_point(data = tourisme_camping, aes(x = long, y = lat),
col = "dodger blue", alpha = .5, size = 1.5) +
coord_equal()
Now, let’s see how to use the functions sKDE()
and sKDE_without_c()
to compute the kernel density estimates, taking care of possible border bias, and without considering them respectively.
Although it is possible to estimate the density with all 5494 points, it takes a lot of time. Hence, in this example, we’ll focus on a sample of 1000 points.
set.seed(1)
ind <- sample(x = seq_len(nrow(tourisme_camping)), size = 1000)
tourisme_camping_samp <- tourisme_camping[ind,]
ggplot() + geom_polygon(data = france, aes(x = long, y = lat, group = group), fill = "grey75") +
geom_point(data = tourisme_camping_samp, aes(x = long, y = lat),
col = "dodger blue", alpha = .5, size = 1.5) +
coord_equal()
Let us create a polygon from the coordinates of France:
france_coords <- france[,c("long", "lat")]
polygon_france <-
cbind(
c(france_coords$long, france_coords$long[1]),
c(france_coords$lat, france_coords$lat[1])
) %>%
list() %>%
sf::st_polygon()
The estimates are calculated as follows:
library(parallel)
library(pbapply)
# Estimation with correction (using parallel computation on three clusters)
smoothed_camping <- sKDE(U = tourisme_camping_samp[,c("long", "lat")],
polygon = polygon_france, optimal=TRUE,
parallel=TRUE, n_clusters = 3)
## Warning in ks.defaults(x = x, w = w, binned = binned, bgridsize = bgridsize, : Weights don't sum to sample size - they have been scaled accordingly
# Estimation without correction
smoothed_camping_nc <- sKDE_without_c(U = tourisme_camping_samp[,c("long", "lat")],
polygon = polygon_france, optimal=TRUE)
To visualize the estimates, it is possible to use the plot_sKDE()
function.
First, taking care of the border effects:
p_camping <- plot_sKDE(smooth = smoothed_camping,
coord = tourisme_camping_samp[, c("long", "lat")],
alpha_coords = .8,
size_coords = 1,
breaks = seq(min(smoothed_camping$ZNA,
smoothed_camping_nc$ZNA,na.rm=T)*.95,
max(smoothed_camping$ZNA,
smoothed_camping_nc$ZNA,na.rm=T)*1.05,
length=21),
polygon = france[, c("long", "lat")],
round = 3,
colContour = "black") +
ggtitle("With correction") +
coord_equal()
print(p_camping)
And secondly, without considering the possible border effect:
p_camping_nc <- plot_sKDE(smooth = smoothed_camping_nc,
coord = tourisme_camping_samp[, c("long", "lat")],
alpha_coords = .8,
size_coords = 1,
breaks = seq(min(smoothed_camping$ZNA,
smoothed_camping_nc$ZNA,na.rm=T)*.95,
max(smoothed_camping$ZNA,
smoothed_camping_nc$ZNA,na.rm=T)*1.05,
length=21),
polygon = france[, c("long", "lat")],
round = 3,
colContour = "black") +
ggtitle("Without correction") +
coord_equal()
print(p_camping_nc)