Ewen Gallic
http://egallic.fr
R is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. (https://www.r-project.org/)
Language inspired by S, a programming language deveoloped in the 1970s by John Chambers, Douglas Bates, Rick Becker, Bill Cleveland, Trevor Hastie, Daryl Pregibon and Allan Wilks from the AT&T Bell Laboratories
R was created in the middle of the 1990s, by Ross Ihaka and Robert Gentleman from the University of Auckland
Distributed under the GNU General Public License
Developped and distributed by the R Development Core Team
Useful to manipulate data, realise statistical analysis, create graphics, ...
The console acts like a calculator: one submits a code, it is evaluated and R an answer is returned
2+1
## [1] 3
CTRL + r
, CTRL + ENTER
or CMD ENTER
<-
or ->
(the latter is not often used)=
(not my favourite practice)variable_name <- value
a <- 2+1
# Or : a = 2+1 (note that the # sign enables to comment the rest of the line)
a
## [1] 3
a+1
## [1] 4
(a <- 2^2)
## [1] 4
b <- a ; b <- 20
a ; b
## [1] 4
## [1] 20
The rm()
function removes an object from a specific environment:
a
## [1] 4
rm(a)
a
## Error in eval(expr, envir, enclos): object 'a' not found
base
contains elementary functions (e.g. sum()
, mean()
, c()
, etc.)install.packages("package_name")
library(package_name)
help("function_name")
redirects to the help page of function_name
:help("log")
?log
help.search()
function:help.search("logarithm")
??logarithm
numeric
, character
, logical
numeric
:
integers
double
or real
a <- 2.0
typeof(a)
## [1] "double"
is.integer(a)
## [1] FALSE
b <- 2
typeof(b)
## [1] "double"
c <- as.integer(b)
typeof(c)
## [1] "integer"
is.numeric(c)
## [1] TRUE
a <- "Hello world!"
a
## [1] "Hello world!"
typeof(a)
## [1] "character"
TRUE
equals 1
ans FALSE
equals 0
TRUE + TRUE + FALSE + TRUE*TRUE
## [1] 3
length()
function returns the number of elements contained in an objecta <- 1
length(a)
## [1] 1
a
is a vector that contains a single element[1]
in the outputNA
value (Not Available)NA
s are logical
x <- NA
typeof(x)
## [1] "logical"
is.na(x)
## [1] TRUE
NULL
NULL
0
x <- NULL
length(x)
## [1] 0
is.null(x)
## [1] TRUE
c()
function can be used to create a vector:c(1,2,3)
## [1] 1 2 3
a <- c(last_name = "Piketty", first_name = "Thomas", birth = "1971")
a
## last_name first_name birth
## "Piketty" "Thomas" "1971"
b <- c("Piketty", "Thomas", "1971")
b
## [1] "Piketty" "Thomas" "1971"
names(b) <- c("last_name", "first_name", "birth")
b
## last_name first_name birth
## "Piketty" "Thomas" "1971"
c("two", 1, TRUE)
## [1] "two" "1" "TRUE"
factor()
:countries <- factor(c("France", "France", "China", "Spain", "China"))
countries
## [1] France France China Spain China
## Levels: China France Spain
class(countries)
## [1] "factor"
levels()
:levels(countries)
## [1] "China" "France" "Spain"
relevel()
function enables to change the reference:countries <- relevel(countries, ref = "Spain")
countries
## [1] France France China Spain China
## Levels: Spain China France
ordered()
:income <- ordered(c("<1500", ">2000", ">2000", "1500-2000",
">2000", "<1500"),
levels = c("<1500", "1500-2000", ">2000"))
income
## [1] <1500 >2000 >2000 1500-2000 >2000 <1500
## Levels: <1500 < 1500-2000 < >2000
data.frame
objects are lists of vectorsdata.frame()
function is used to create a data.frame
women <- data.frame(height = c(58, 59, 60, 61, 62, 63, 64, 65,
66, 67, 68, 69, 70, 71, 72),
weight = c(115, 117, 120, 123, 126, 129, 132,
135, 139, 142, 146, 150, 154, 159, 164))
head(women)
## height weight
## 1 58 115
## 2 59 117
## 3 60 120
## 4 61 123
## 5 62 126
## 6 63 129
class(women)
## [1] "data.frame"
dim(women)
## [1] 15 2
nrow(women)
## [1] 15
ncol(women)
## [1] 2
read.table()
ans scan()
scan()
function hereread.table()
read.table()
function is designed for data already organized as a tabledata.frame
Argument | Description |
---|---|
file |
File name, or complete path to file (can be an URL) |
header |
Whether the file contains the names of the variables at its first line ? (FALSE by default) |
sep |
Field separator character (white character by default) |
dec |
Character used for decimal points (". " by default) |
na.strings |
Character vector of strungs to be interpreded as NA (NA by default) |
read.xls()
from the gdata
packageread_excel()
from the readxl
packageiris.xls
file contained in the folder of the gdata
packagelibrary(gdata)
xlsfile <- file.path(path.package("gdata"), "xls", "iris.xls")
iris <- read.xls(xlsfile) # Creates a temporary csv file
sheet
argument enables to import another sheet, either by giving the number or the name of the sheetread_excel()
function is faster, has almost the same names for the arguments, but is not as robust at the moment as the read.xls()
function. In addition, it returns a tbl_df
object, not a data.frame
write.table()
can be used to export a data.frame
object (or a matrix) to an ASCII file:write.table(my_data_frame, file = "file_name.txt", sep = ";")
save()
; to import the object(s) back: load()
:save(obj_1, obj_2, file = "my_file.rda")
load("my_file.rda")
save.image()
; to load the session: load()
save.image("my_session.rda")
load("my_session.rda")
"["()
function"["()
function:x <- c(4, 7, 3, 5, 0)
"["(x, 2)
## [1] 7
x[2] # The second element of x
## [1] 7
x[-2] # All the elements of x minus the second one
## [1] 4 3 5 0
x[3:5] # Elements of x from 3rd to 5th position
## [1] 3 5 0
i <- 3:5 ; x[i] # Elements of x from 3rd to 5th position
## [1] 3 5 0
x[c(F, T, F, F, F)] # Second element from x
## [1] 7
x[x<1] # Elements of x that are lower than 1
## [1] 0
x<1 # Returns a logical vector
## [1] FALSE FALSE FALSE FALSE TRUE
TRUE
values from a logical vector: which()
which.min()
(which.max()
)x <- c(2, 4, 5, 1, 7, 6)
which(x < 7 & x > 2)
## [1] 2 3 6
which.min(x)
## [1] 4
which.max(x)
## [1] 5
x[which.max(x)]
## [1] 7
<-
symbolx <- seq_len(5)
x[2] <- 3
x
## [1] 1 3 3 4 5
x[2] <- x[3] <- 0
x
## [1] 1 0 0 4 5
"["()
worksi
) and columns (j
) indices: x[i,j]
(x <- matrix(1:9, ncol = 3, nrow = 3))
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
x[1, 2]
## [1] 4
i
and j
can be vectors of length greater than one:i <- c(1,3) ; j <- 3
x[i,j] # Elements of first and third row for the third column
## [1] 7 9
i
returns all lines for the j
columnsj
returns all columns for the i
rowsx[, 2] # Elements of the second column
## [1] 4 5 6
x[, -c(1,3)] # x without first and third columns
## [1] 4 5 6
data.frame
, columns are named and can thus be accessed using these nameswomen <-data.frame(height =c(58, 59, 60, 61, 62, 63, 64,
65, 66, 67, 68,69, 70, 71, 72),
weight =c(115, 117, 120, 123, 126, 129, 132, 135,
139,142, 146, 150, 154, 159, 164))
colnames(women) # Names of the columns
## [1] "height" "weight"
rownames(women) # Names of the rows
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14"
## [15] "15"
dimnames(women) # Names of both rows and columns
## [[1]]
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14"
## [15] "15"
##
## [[2]]
## [1] "height" "weight"
$
:women$height
## [1] 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
dplyr
offers many functions that are really easy to use to manipulate data%>%
) operator (from the package magrittr
), which transmits a value as the first argument of the following functionlibrary(magrittr)
mean(x) %>% log()
x
and the apply the logarithm function to the result of mean(x)
. It can also be written in the following (but harder to read) way:log(mean(x))
## [1] 1.609438
data.frame
: select()
library(dplyr)
women %>%
select(height)
## height
## 1 58
## 2 59
## 3 60
## 4 61
## 5 62
## 6 63
## 7 64
## 8 65
## 9 66
## 10 67
## 11 68
## 12 69
## 13 70
## 14 71
## 15 72
data.frame
: select()
and a negative signlibrary(dplyr)
women %>%
select(-height) %>%
head()
## weight
## 1 115
## 2 117
## 3 120
## 4 123
## 5 126
## 6 129
slice()
women %>% slice(4:5)
## height weight
## 1 61 123
## 2 62 126
filter()
women %>%
filter(height == 60)
## height weight
## 1 60 120
women %>%
filter(weight > 120, height <= 62)
## height weight
## 1 61 123
## 2 62 126
rename(data, new_name_1 = old_name_1, new_name_2 = old_name_2)
women <-
women %>%
rename(masse = weight)
head(women)
## height masse
## 1 58 115
## 2 59 117
## 3 60 120
## 4 61 123
## 5 62 126
## 6 63 129
data.frame
:unemp <- data.frame(year = 2012:2008,
unemployed = c(2.811, 2.604, 2.635, 2.573, 2.064),
active_pop = c(28.328, 28.147, 28.157, 28.074, 27.813))
mutate()
unemp <-
unemp %>%
mutate(unemp_rate = unemployed/active_pop*100,
log_unemployed = log(unemployed),
year = year / 1000)
head(unemp)
## year unemployed active_pop unemp_rate log_unemployed
## 1 2.012 2.811 28.328 9.923044 1.0335403
## 2 2.011 2.604 28.147 9.251430 0.9570487
## 3 2.010 2.635 28.157 9.358241 0.9688832
## 4 2.009 2.573 28.074 9.165064 0.9450725
## 5 2.008 2.064 27.813 7.420990 0.7246458
data.frame
:df <- data.frame(last_name = c("Durand", "Martin",
"Martin", "Martin", "Durand"),
first_name = c("Sonia", "Serge", "Julien-Yacine",
"Victor", "Emma"),
grade = c(23, 18, 17, 17, 19))
order()
:df %>% arrange(first_name, last_name)
## last_name first_name grade
## 1 Durand Emma 19
## 2 Martin Julien-Yacine 17
## 3 Martin Serge 18
## 4 Durand Sonia 23
## 5 Martin Victor 17
desc()
(negative sign can be used for numeric columns)df %>% arrange(first_name, desc(last_name))
## last_name first_name grade
## 1 Durand Emma 19
## 2 Martin Julien-Yacine 17
## 3 Martin Serge 18
## 4 Durand Sonia 23
## 5 Martin Victor 17
data.frame
data.frames
from dplyr
have an easy syntax:xxx_join(x, y, by = NULL, copy = FALSE, ...)
x
and y
are the two tables to joinby
is a character vector containing variables used to join the tables (if ommited, a natural join using all variables with common names accross the two tables will be done)data.frame
data.frame
to illustrate the different join functions:exportations <- data.frame(year = 2011:2013,
exportations = c(572.6, 587.3, 597.8))
importations <- data.frame(annee = 2010:2012,
importations = c(558.1, 625.3,628.5))
data.frame
inner_join()
: return all rows from x
where there are matching values in x
, and all columns from x
and y
. If there are multiple matches between x
and y
, all combination of the matches are returnedexportations %>%
inner_join(importations, by = c(year = "annee"))
## year exportations importations
## 1 2011 572.6 625.3
## 2 2012 587.3 628.5
data.frame
left_join()
: return all rows from x
, and all columns from x
and y
. Rows in x
with no match in y
will have NA
values in the new columns. If there are multiple matches between x
and y
, all combinations of the matches are returnedexportations %>%
left_join(importations, by = c(year = "annee"))
## year exportations importations
## 1 2011 572.6 625.3
## 2 2012 587.3 628.5
## 3 2013 597.8 NA
data.frame
right_join()
: return all rows from y
, and all columns from x
and y
. Rows in y
with no match in x
will have NA
values in the new columns. If there are multiple matches between x
and y
, all combinations of the matches are returnedexportations %>%
right_join(importations, by = c(year = "annee"))
## year exportations importations
## 1 2010 NA 558.1
## 2 2011 572.6 625.3
## 3 2012 587.3 628.5
data.frame
semi_join()
: return all rows from x
where there are matching values in y
, keeping just columns from x
exportations %>%
semi_join(importations, by = c(year = "annee"))
## year exportations
## 1 2011 572.6
## 2 2012 587.3
data.frame
anti_join()
: return all rows from x
where there are not matching values in y
, keeping just columns from x
.exportations %>%
anti_join(importations, by = c(year = "annee"))
## year exportations
## 1 2013 597.8
data.frame
full_join()
: return all rows and all columns from both x
and y
. Where there are not matching values, returns NA
for the one missingexportations %>%
full_join(importations, by = c(year = "annee"))
## year exportations importations
## 1 2011 572.6 625.3
## 2 2012 587.3 628.5
## 3 2013 597.8 NA
## 4 2010 NA 558.1
dplyr
offers an easy way: summarise()
data.frame
and one or multiple operations to do on the data.frame
# Nombre d'ingenieurs et cadres au chômage
chomage <- data.frame(region = rep(c(rep("Bretagne", 4),
rep("Corse", 2)), 2),
departement = rep(c("Cotes-d'Armor", "Finistere",
"Ille-et-Vilaine", "Morbihan",
"Corse-du-Sud", "Haute-Corse"), 2),
annee = rep(c(2011, 2010), each = 6),
ouvriers = c(8738, 12701, 11390, 10228, 975, 1297,
8113, 12258, 10897, 9617, 936, 1220),
ingenieurs = c(1420, 2530, 3986, 2025, 259, 254,
1334, 2401, 3776, 1979, 253, 241))
ouvriers
and ingenieurs
:chomage %>%
summarise(moy_ouvriers = mean(ouvriers),
sd_ouvriers = sd(ouvriers),
moy_ingenieurs = mean(ingenieurs),
sd_ingenieurs = sd(ingenieurs))
## moy_ouvriers sd_ouvriers moy_ingenieurs sd_ingenieurs
## 1 7364.167 4801.029 1704.833 1331.482
group_by()
functionchomage %>%
group_by(annee) %>%
summarise(ouvriers = sum(ouvriers),
ingenieurs = sum(ingenieurs))
## Source: local data frame [2 x 3]
##
## annee ouvriers ingenieurs
## (dbl) (dbl) (dbl)
## 1 2010 43041 9984
## 2 2011 45329 10474
chomage %>%
group_by(annee, region) %>%
summarise(ouvriers = sum(ouvriers),
ingenieurs = sum(ingenieurs))
## Source: local data frame [4 x 4]
## Groups: annee [?]
##
## annee region ouvriers ingenieurs
## (dbl) (fctr) (dbl) (dbl)
## 1 2010 Bretagne 40885 9490
## 2 2010 Corse 2156 494
## 3 2011 Bretagne 43057 9961
## 4 2011 Corse 2272 513
tidyr
contains interesting functions to manipulate datadplyr
packagegather()
and spread()
pop <- data.frame(city = c("Paris", "Paris", "Lyon", "Lyon"),
arrondissement = c(1, 2, 1, 2),
pop_municipale = c(17443, 22927, 28932, 30575),
pop_all = c(17620, 23102, 29874, 31131))
gather()
function takes a data.frame
as its first argumentkey
) is the name we want to give to the column that will contain the the names of the columns we want to gather, as a factorvalue
) is the name we want to give to the column that will contain the corresponding valuesselect()
function)library(tidyr)
pop_long <-
pop %>%
gather(key = type_pop,
value = population,
pop_municipale,pop_all)
pop_long
## city arrondissement type_pop population
## 1 Paris 1 pop_municipale 17443
## 2 Paris 2 pop_municipale 22927
## 3 Lyon 1 pop_municipale 28932
## 4 Lyon 2 pop_municipale 30575
## 5 Paris 1 pop_all 17620
## 6 Paris 2 pop_all 23102
## 7 Lyon 1 pop_all 29874
## 8 Lyon 2 pop_all 31131
spread()
data.frame
pop_long %>%
spread(type_pop, population)
## city arrondissement pop_municipale pop_all
## 1 Lyon 1 28932 29874
## 2 Lyon 2 30575 31131
## 3 Paris 1 17443 17620
## 4 Paris 2 22927 23102
ggplot2 is a plotting system for R, based on the grammar of graphics, which tries to take the good parts of base and lattice graphics and none of the bad parts. It takes care of many of the fiddly details that make plotting a hassle (like drawing legends) as well as providing a powerful model of graphics that makes it easy to produce complex multi-layered graphics. (http://ggplot2.org/)
ggplot2
are layered baseddata
)mapping
)geom
)stat
)scale
)coord
)facet
)ggplot()
function+
symbolggplot(data, aes(x, y, ...)) + layers
data.frame
load(url("http://egallic.fr/R/films.rda"))
name
: name of the filminitial_release_date
: release dateruntime
: runtimeyear
: year of filmingestimated_budget
: estimated budgetgross_revenue
: gross revenuecountry
: first country given in the list of locationscountry_abr
: country codedata.frame
that focuses only on some countries:country_list <- c("United States of America", "New Zealand",
"United Kingdom", "Spain")
films_s <- films %>%
filter(country %in% country_list)
library(ggplot2)
ggplot(data = films, aes(x = estimated_budget, y = gross_revenue))
library(ggplot2)
ggplot(data = films, aes(x = estimated_budget, y = gross_revenue)) +
geom_point()
colour
shape
size
alpha
fill
aes()
functionaes()
functionggplot(data = films,
aes(x = estimated_budget, y = gross_revenue)) +
geom_point(colour = "dodger blue",
alpha = .8,
aes(size = runtime))
ggplot2
will merge the scalesnumerical
or factor
)ggplot() +
geom_point(data = films,
aes(x = estimated_budget,
y = gross_revenue, col = runtime))
ggplot() +
geom_point(data = films,
aes(x = estimated_budget,
y = gross_revenue, col = country))
ggplot() +
geom_point(data = films,
aes(x = estimated_budget,
y = gross_revenue, col = country))
geom_point()
(useful for maps)geom_line()
geom_polygon()
(useful for maps)geom_path()
geom_step()
geom_boxplot()
geom_jitter()
geom_smooth()
geom_histogram()
geom_bar()
geom_density()
geom_*
functions have some optionnal parameters
data
mapping
stat
position
ggplot()
scale_
colour
, fill
, linetype
, ...)manual
, discrete
, gradient
, ...)p <- ggplot(data = films_s,
aes(x = estimated_budget,
y = gross_revenue, colour = runtime)) +
geom_point()
p
p + scale_colour_gradient(name = "Runtime", low = "#FF0000", high ="#FFFF00")
p <- ggplot(data = films_s,
aes(x = estimated_budget,
y = gross_revenue,
colour = country,
size = runtime)) +
geom_point()
p
colour
scale to set it to a grey colour scale:p + scale_colour_grey(name = "Country",
start = .1, end = .8,
na.value = "orange")
data.frame
will change the order in the legendfilms_s$country %>% factor() %>% levels()
## [1] "New Zealand" "Spain"
## [3] "United Kingdom" "United States of America"
new_order <- c("New Zealand","Spain",
"United Kingdom",
"United States of Americz")
films_s <- films_s %>%
mutate(country = factor(country,
levels = new_order))
(p <- p + scale_colour_manual(name = "Country",
values = c("Spain" = "green", "New Zealand" = "red",
"United States of America" = "orange",
"United Kingdom" = "blue"),
labels = c("Spain" = "ES", "New Zealand" = "NZ",
"United States of America" = "USA",
"United Kingdom" = "UK")))
range(films_s$runtime)
## [1] 66 375
p + scale_size_continuous(name = "Film\nDuration",
breaks = c(0, 60, 90, 120, 150, 300, Inf),
range = c(1,10))
ggplot2
regroups observation in a bunch of casesgroup
argument in the aes()
functionlibrary(reshape2)
df <- data.frame(year = rep(1949:1960, each = 12),
month = rep(1:12, 12),
passengers = c(AirPassengers))
head(df)
## year month passengers
## 1 1949 1 112
## 2 1949 2 118
## 3 1949 3 132
## 4 1949 4 129
## 5 1949 5 121
## 6 1949 6 135
ggplot(data = df, aes(x = month, y = passengers)) + geom_line()
year
:ggplot(data = df,
aes(x = month, y = passengers, group = year)) +
geom_line()
ggtitle()
function, though it might be better practice to leave it blank and leave that to \(\LaTeX\)ggplot(data = films,
aes(x = estimated_budget/1e6, y = gross_revenue/1e6)) +
geom_point() + ggtitle("a wonderful title")
xlab()
and ylab()
functions enable to modify axis labelsp <- ggplot(data = films,
aes(x = estimated_budget/1e6, y = gross_revenue/1e6)) +
geom_point() + ggtitle("Titre") +
xlab("x axis label") + ylab("y axis label")
ggsave()
functionggsave(filename = "my_grapg.pdf", plot = p, width = 15, height = 8)
rworldmap
Packagerworldmap
packagegetMap()
functiondata.frame
so it can be used by ggplot2
: we use fortify()
to go from a SpatialPolygonsDataFrame
to a data.frame
library(ggplot2)
library(rworldmap)
rworldmap
PackageworldMap <- getMap()
world_df <- fortify(worldMap)
## Regions defined for each Polygons
head(world_df)
## long lat order hole piece id group
## 1 61.21082 35.65007 1 FALSE 1 Afghanistan Afghanistan.1
## 2 62.23065 35.27066 2 FALSE 1 Afghanistan Afghanistan.1
## 3 62.98466 35.40404 3 FALSE 1 Afghanistan Afghanistan.1
## 4 63.19354 35.85717 4 FALSE 1 Afghanistan Afghanistan.1
## 5 63.98290 36.00796 5 FALSE 1 Afghanistan Afghanistan.1
## 6 64.54648 36.31207 6 FALSE 1 Afghanistan Afghanistan.1
rworldmap
Packagegroup
argument to define polygons (otherwise, ggplot2
will join all the points together)coord_quickmap()
worldmap <- ggplot() +
geom_polygon(data = world_df, aes(x = long, y = lat, group = group)) +
scale_y_continuous(breaks = (-2:2) * 30) +
scale_x_continuous(breaks = (-4:4) * 45) +
coord_equal()
rworldmap
Packageworldmap
rworldmap
Packagecord_map()
function, we can modify the coordinate system(worldmap <- ggplot() +
geom_polygon(data = world_df, aes(x = long, y = lat, group = group)) +
scale_y_continuous(breaks = (-2:2) * 30) +
scale_x_continuous(breaks = (-4:4) * 45) +
coord_map("ortho", orientation=c(61, 90, 0)))
rworldmap
Packagerworldmap
data are not very precise. It is useful to do maps at the global scale, but we need to get other data if we want to focus on more specific areasmaps
package contains some other maps with a finer scalemap_data()
function (from ggplot2
) relies on the map()
function from the package of the same namedata.frame
, already arranged to be used by ggplot()
!maps
PackageName | Description |
---|---|
county |
American counties |
france |
France |
italy |
Italy |
nz |
New-Zealand |
state |
United States with all states |
usa |
United States |
world |
World Map |
world2 |
World Map centered on Pacific |
maps
Packageregion
argumentmap_fr <- map_data("france")
# Region names
head(unique(map_fr$region))
## [1] "Nord" "Pas-de-Calais" "Somme" "Ardennes"
## [5] "Seine-Maritime" "Aisne"
head(map_fr, 3)
## long lat group order region subregion
## 1 2.557093 51.09752 1 1 Nord <NA>
## 2 2.579995 51.00298 1 2 Nord <NA>
## 3 2.609101 50.98545 1 3 Nord <NA>
maps
Package(p_map_fr <- ggplot(data = map_fr,
aes(x = long, y = lat, group = group, fill = region)) +
geom_polygon() + coord_equal() + scale_fill_discrete(guide = "none"))
maps
Packagelibrary(stringr)
ind_bzh <-
map_fr$region %>%
unique() %>%
str_detect(regex("armor|finis|vilaine|morb",
ignore_case = TRUE))
(dep_bzh <- unique(map_fr$region)[ind_bzh])
## [1] "Cotes-Darmor" "Finistere" "Ille-et-Vilaine" "Morbihan"
map_fr_bzh <- map_data("france", region = dep_bzh)
maps
Package(p_map_fr_bzh <-
ggplot(data = map_fr_bzh,
aes(x = long, y = lat, group = group, fill = region)) +
geom_polygon() + coord_equal() + scale_fill_discrete(name = "Département"))
rgdal
package provides the readOGR()
function that loads data from a shapefile into the R sessionlibrary(rgdal)
library(maptools)
library(ggplot2)
library(dplyr)
# Import shp data
rennes <- readOGR(dsn="./quartiers_shp_lamb93", layer="quartiers")
## OGR data source with driver: ESRI Shapefile
## Source: "./quartiers_shp_lamb93", layer: "quartiers"
## with 12 features
## It has 2 fields
# Change the coordinates
rennes <- spTransform(rennes, CRS("+proj=longlat +ellps=GRS80"))
# Add an ID field
rennes@data$id <- rownames(rennes@data)
# Transform the data so it ends up in a ggplot2-friendly data.frame
rennes_points <- fortify(rennes, region="id")
# To avoid holes
rennes_df <- plyr::join(rennes_points, rennes@data, by="id")
(p_map_rennes <-
ggplot(data = rennes_df,
aes(x = long, y = lat, group = group)) +
geom_polygon() +
coord_equal())
ggplot2
, it is quite simple: just add a variable to the data.frame
tx_chomage_2014_T1 <- data.frame(
region = c("Cotes-Darmor","Finistere",
"Ille-et-Vilaine", "Morbihan"),
tx_chomage_2014_T1 = c(8.8, 8.8,7.9, 9.1))
# Add value for tx_chomage_2014_T1 on each line of the data.frame
map_fr_bzh <-
map_fr_bzh %>%
left_join(tx_chomage_2014_T1)
fill
aesthetics!(p_map_fr_bzh <-
ggplot(data = map_fr_bzh,
aes(x = long, y = lat, group = group,
fill = tx_chomage_2014_T1)) +
geom_polygon() + coord_quickmap() +
scale_fill_gradient(name = "Département", low ="#FFFF00", high = "#FF0000"))
# Find the coordinates of the median point
mid_range <- function(x) median(range(x, na.rm = TRUE))
center <-
map_fr_bzh %>%
group_by(region) %>%
dplyr::summarise(long = mid_range(long),
lat = mid_range(lat))
center <-
center %>%
dplyr::left_join(tx_chomage_2014_T1) %>%
dplyr::mutate(label_unemp = paste0(tx_chomage_2014_T1, "%"))
p_map_fr_bzh + annotate("text", x = center$long,
y = center$lat, label = center$label_unemp)