# Langage R et programmation # Graphiques # Faculte des Sciences Economiques de Rennes 1 # 2014-2015 # Master 1 Statistique & Econometrie # Ewen Gallic # ewen.gallic[at]univ-rennes1.fr # http://editerna.free.fr/wp ## install.packages("ggplot2") library(ggplot2) ## ------------------------------------------------------------------------ load(url("http://editerna.free.fr/films.rda")) ## ------------------------------------------------------------------------ pays_liste <- c("United States of America", "New Zealand", "United Kingdom", "Spain") films_reduit <- films[which(films\$country %in% pays_liste),] ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget, y = gross_revenue)) + geom_point() ## ------------------------------------------------------------------------ p <- ggplot(data = films, aes(x = estimated_budget, y = gross_revenue)) + geom_point() print(p) ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget, y = gross_revenue)) + geom_point(colour = "dodger blue", alpha = .8, aes(size = runtime)) ## ------------------------------------------------------------------------ ggplot() + geom_point(data = films, aes(x = estimated_budget, y = gross_revenue, col = runtime)) ## ------------------------------------------------------------------------ ggplot() + geom_point(data = films, aes(x = estimated_budget, y = gross_revenue, col = country)) ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget, y = gross_revenue, col = country)) + geom_point() ## ------------------------------------------------------------------------ ggplot() + geom_point(data = films, aes(x = estimated_budget, y = gross_revenue, col = country)) ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget, y = gross_revenue)) + geom_point(colour = "red") ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget, y = gross_revenue, col = country)) + geom_line() ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget, y = gross_revenue, group = country)) + geom_line() ## ------------------------------------------------------------------------ df <- data.frame(x = c(0, 0.2, 1, 0.75), y = c(0, 1, 1, 0.5)) ggplot() + geom_polygon(data = df, aes(x = x, y = y), fill = "light green") ## ------------------------------------------------------------------------ ggplot(data = films_reduit, aes(x = country, y = runtime, fill = country)) + geom_boxplot() ## ------------------------------------------------------------------------ ggplot(data = films_reduit, aes(x = factor(1), y = runtime)) + geom_boxplot() ## ------------------------------------------------------------------------ ggplot(data = films_reduit, aes(x = country, y = estimated_budget, col = country)) + geom_point() ## ------------------------------------------------------------------------ ggplot(data = films_reduit, aes(x = country, y = estimated_budget, col = country)) + geom_jitter() ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget, y = gross_revenue)) + geom_point() + geom_smooth() ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget, y = gross_revenue)) + geom_point() + stat_smooth(method = "lm", level = 0.9) ## ------------------------------------------------------------------------ ggplot(data = films_reduit, aes(x = (estimated_budget/1000000)/runtime, fill = country)) + geom_histogram(binwidth = 0.1, colour = "dark grey") ## ------------------------------------------------------------------------ ggplot(data = films_reduit, aes(x = (estimated_budget/1000000)/runtime, fill = country)) + geom_density(colour = "black", alpha = .5) ## ------------------------------------------------------------------------ ggplot(data = films_reduit, aes(x = runtime)) + geom_histogram() ## ------------------------------------------------------------------------ ggplot(data = films_reduit, aes(x = runtime)) + geom_histogram(aes(y = ..density..)) ## ------------------------------------------------------------------------ p <- ggplot(data = films_reduit, aes(x = runtime)) p + geom_histogram() + geom_line(stat="density", col = "red", size = 1.2) ## ------------------------------------------------------------------------ p <- ggplot(data = films_reduit, aes(x = runtime, y = ..density..)) p + geom_histogram(colour = "white") + geom_line(stat="density", col = "red", size = 1.2) ## ------------------------------------------------------------------------ ggplot(data = films_reduit, aes(x = runtime, y = ..density..)) + geom_line(stat="density", col = "red", size = 1.2) ## ------------------------------------------------------------------------ p <- ggplot(data = films_reduit, aes(x = estimated_budget, y = gross_revenue, colour = runtime)) + geom_point() p ## ------------------------------------------------------------------------ p + scale_colour_gradient(name = "Runtime", low = "#FF0000", high ="#FFFF00") ## ------------------------------------------------------------------------ p <- ggplot(data = films_reduit, aes(x = estimated_budget, y = gross_revenue, colour = country, size = runtime)) + geom_point() ## ------------------------------------------------------------------------ p ## ------------------------------------------------------------------------ p + scale_colour_grey(name = "Country", start = .1, end = .8, na.value = "orange") ## ------------------------------------------------------------------------ levels(factor(films_reduit\$country)) ## ------------------------------------------------------------------------ p + scale_colour_manual(name = "Country", values = c("red", "green", "blue", "orange"), labels = c("NZ", "ES", "UK", "USA")) ## ------------------------------------------------------------------------ (p <- p + scale_colour_manual(name = "Country", values = c("Spain" = "green", "New Zealand" = "red", "United States of America" = "orange", "United Kingdom" = "blue"), labels = c("Spain" = "ES", "New Zealand" = "NZ", "United States of America" = "USA", "United Kingdom" = "UK"))) ## ------------------------------------------------------------------------ range(films_reduit\$runtime) ## ------------------------------------------------------------------------ p + scale_size_continuous(name = "Film\nDuration", breaks = c(0, 60, 90, 120, 150, 300, Inf), range = c(1,10)) ## ------------------------------------------------------------------------ films_reduit\$initial_release_date2 <- as.Date(films_reduit\$initial_release_date) (p_2 <- ggplot(data = films_reduit, aes(x = initial_release_date2, y = runtime)) + geom_point()) ## ------------------------------------------------------------------------ library(scales) p_2 + scale_x_date(breaks = date_breaks("10 year"), labels = date_format("%Y")) ## ------------------------------------------------------------------------ (p <- ggplot(data = films_reduit, aes(x = estimated_budget, y = gross_revenue, colour = country_abr, size = country_abr)) + geom_point()) ## ------------------------------------------------------------------------ p + scale_colour_discrete(name = "Country") ## ------------------------------------------------------------------------ p + scale_colour_discrete(name = "Country") + scale_size_discrete(name = "Country") ## ------------------------------------------------------------------------ library(reshape2) df <- data.frame(year = rep(1949:1960, each = 12), month = rep(1:12, 12), passengers = c(AirPassengers)) ## ------------------------------------------------------------------------ head(df) ## ------------------------------------------------------------------------ ggplot(data = df, aes(x = month, y = passengers)) + geom_line() ## ------------------------------------------------------------------------ ggplot(data = df, aes(x = month, y = passengers, group = year)) + geom_line() ## ------------------------------------------------------------------------ p <- ggplot(data = films_reduit, aes(x = estimated_budget, y = gross_revenue)) + geom_point() p ## ------------------------------------------------------------------------ p + annotate("text", x = 1e8, y = 2e9, label = "Du texte") ## ------------------------------------------------------------------------ p + annotate("text", x = c(1e8, 2e8), y = 2e9, label = c("Du texte", "Un autre texte"), colour = c("red", "blue")) ## ------------------------------------------------------------------------ p + annotate("text", x = 1e8, y = 2e9, label = "sqrt(1-alpha) + beta[i+1]^n", parse = TRUE) ## ------------------------------------------------------------------------ p + geom_vline(xintercept = seq(0, 3e8, by = 1e8), size = 1, col = "dodger blue") ## ------------------------------------------------------------------------ p + geom_hline(yintercept = seq(0, 2e9, by = 1e9), col = "gold", linetype = "longdash") ## ------------------------------------------------------------------------ p + geom_abline(intercept = 1e9, slope = -5) ## ------------------------------------------------------------------------ p + geom_segment(aes(x = 0, xend = 1e8, y = 0, yend = 1e9), col = "blue") ## ------------------------------------------------------------------------ p + annotate(geom = "segment", x = 0, xend = 1e8, y = 0, yend = 1e9, col = "blue") ## ------------------------------------------------------------------------ library(grid) p + annotate(geom = "segment", x = 0, xend = 1e8, y = 0, yend = 1e9, col = "blue", arrow = arrow(length = unit(0.5, "cm"))) ## ------------------------------------------------------------------------ p + annotate(geom = "rect", xmin = 1e8, xmax = 2e8, ymin = -Inf, ymax = Inf, alpha = .3, fill = "red") + annotate(geom ="rect", xmin = 0, xmax = 5e8, ymin = 1e9, ymax = 2e9, alpha = .2, fill = "dodger blue") ## ------------------------------------------------------------------------ p <- ggplot() + geom_point(data = diamonds[sample(1:nrow(diamonds), 1000),], aes(x = carat, y = price, colour = cut)) ## ------------------------------------------------------------------------ p <- ggplot(data = films_reduit, aes(x = runtime, fill = country_abr)) ## ------------------------------------------------------------------------ p + geom_bar(position = "dodge") ## ------------------------------------------------------------------------ p + geom_bar(position = "fill") ## ------------------------------------------------------------------------ p + geom_bar(position = "identity") ## ------------------------------------------------------------------------ p + geom_bar(position = "jitter") ## ------------------------------------------------------------------------ p + geom_bar(position = "stack") ## ------------------------------------------------------------------------ films_reduit\$old <- ifelse(films_reduit\$year <= 2000, "ancien", "nouveau") ## ------------------------------------------------------------------------ p <- ggplot(data = films_reduit, aes(x = estimated_budget, y = gross_revenue, colour = country, size = runtime)) + geom_point() ## ------------------------------------------------------------------------ p + facet_grid(. ~ country) ## ------------------------------------------------------------------------ p + facet_grid(old ~ country) ## ------------------------------------------------------------------------ p + facet_grid(old ~ country, margins = TRUE) ## ------------------------------------------------------------------------ p + facet_grid(old ~ country, margins = "country") ## ------------------------------------------------------------------------ p + facet_wrap(facets = ~ country) ## ------------------------------------------------------------------------ p + facet_wrap(facets = ~ country + old) ## ------------------------------------------------------------------------ p_m <- ggplot(data = films_reduit, aes(estimated_budget/1000000, gross_revenue/1000000, colour = country, size = runtime)) + geom_point() ## ------------------------------------------------------------------------ p_m + facet_wrap( ~ country, scales = "fixed") ## ------------------------------------------------------------------------ p_m + facet_wrap( ~ country, scales = "free_y") ## ------------------------------------------------------------------------ (p <- ggplot(data = films, aes(x = estimated_budget/1e6, y = gross_revenue/1e6)) + geom_point()) ## ------------------------------------------------------------------------ p + coord_flip() ## ------------------------------------------------------------------------ p + coord_trans(x = "log10", y = "log10") ## ------------------------------------------------------------------------ p + scale_x_log10() ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget/1e6, y = gross_revenue/1e6)) + geom_point() + ggtitle("Titre") ## ------------------------------------------------------------------------ ggplot(data = films, aes(x = estimated_budget/1e6, y = gross_revenue/1e6)) + geom_point() + ggtitle("Titre") + xlab("Étiquette axe des x") + ylab("Étiquette axe des y") ## ------------------------------------------------------------------------ df <- data.frame(x = c(0, 0, 5, 5, 0, 0, 10, 10), y = c(0,5, 5, 0, 10, 15, 15, 10), g = factor(rep(1:2, each = 4))) ## ------------------------------------------------------------------------ (p_2 <- ggplot(data = df, aes(x = x, y = y, group = g, fill = g)) + geom_polygon()) ## ------------------------------------------------------------------------ p_2 + xlim(0, 7) ## ------------------------------------------------------------------------ p_2 + coord_cartesian(xlim = c(0,7)) ## ------------------------------------------------------------------------ p <- ggplot() + geom_point(data = diamonds[sample(1:nrow(diamonds), 1000),], aes(x = carat, y = price, colour = cut)) ## ------------------------------------------------------------------------ library(grid) p <- ggplot(data = films_reduit, aes(x = estimated_budget/1e6, y = gross_revenue/1e6, colour = country_abr)) + # Tracer des points geom_point() + # Ajout d'un titre ggtitle("Titre\nsur deux lignes") + # Changement des étiquettes pour les axes xlab("Étiquette axe des x") + ylab("Étiquette axe des y") + # Changement du titre de la légende scale_colour_discrete(name = "Country") ## ------------------------------------------------------------------------ p ## ------------------------------------------------------------------------ p_2 <- p + theme(plot.title = element_text(family = "Times", face = "bold", colour = "red", size = rel(2), hjust = 0, lineheight = 1.5), axis.title = element_text(face = "bold", colour = "orange"), axis.text.x = element_text(colour = "blue", angle = 45), axis.ticks = element_line(colour = "brown", size = rel(2)), legend.key = element_rect(fill = "dodger blue", colour = "red"), legend.background = element_rect(fill = "green", colour = "purple", linetype = "twodash"), panel.grid.minor = element_blank(), panel.grid.major = element_line(colour = "black") ) ## ------------------------------------------------------------------------ p_2 ## ------------------------------------------------------------------------ p + theme_bw() ## ------------------------------------------------------------------------ p <- ggplot() + geom_point(data = diamonds[sample(1:nrow(diamonds), 1000),], aes(x = carat, y = price, colour = cut)) ## ------------------------------------------------------------------------ p <- ggplot(data = films_reduit, aes(x = estimated_budget, y = gross_revenue, colour = country)) + geom_point() + xlab("Estimated budget") + ylab("Gross Revenue") + scale_colour_discrete(name = "Country") + ggtitle("A small sample of movies") ## ------------------------------------------------------------------------ p ## ------------------------------------------------------------------------ ## ggsave(p, file = "estim_bud.pdf", width = 15, height = 8, unit = "cm", scale = 2) ## ------------------------------------------------------------------------ library(ggplot2) library(rworldmap) ## ------------------------------------------------------------------------ # Carte du monde worldMap <- getMap() # Format lisible pour ggplot() world_df <- fortify(worldMap) head(world_df) ## ------------------------------------------------------------------------ worldmap <- ggplot() + geom_polygon(data = world_df, aes(x = long, y = lat, group = group)) + scale_y_continuous(breaks = (-2:2) * 30) + scale_x_continuous(breaks = (-4:4) * 45) + coord_equal() ## ------------------------------------------------------------------------ worldmap ## ------------------------------------------------------------------------ (worldmap <- ggplot() + geom_polygon(data = world_df, aes(x = long, y = lat, group = group)) + scale_y_continuous(breaks = (-2:2) * 30) + scale_x_continuous(breaks = (-4:4) * 45) + coord_map("ortho", orientation=c(61, 90, 0))) ## ------------------------------------------------------------------------ map_fr <- map_data("france") # Le nom des régions head(unique(map_fr\$region)) head(map_fr, 3) ## ------------------------------------------------------------------------ (p_map_fr <- ggplot(data = map_fr, aes(x = long, y = lat, group = group, fill = region)) + geom_polygon() + coord_equal() + scale_fill_discrete(guide = "none")) ## ------------------------------------------------------------------------ ind_bzh <- grep("armor|finis|vilaine|morb", unique(map_fr\$region), ignore.case = TRUE) (dep_bzh <- unique(map_fr\$region)[ind_bzh]) map_fr_bzh <- map_data("france", region = dep_bzh) ## ------------------------------------------------------------------------ (p_map_fr_bzh <- ggplot(data = map_fr_bzh, aes(x = long, y = lat, group = group, fill = region)) + geom_polygon() + coord_equal() + scale_fill_discrete(name = "Département")) ## ------------------------------------------------------------------------ library(rgdal) library(maptools) library(ggplot2) library(plyr) ## ------------------------------------------------------------------------ # Importer les polygones rennes <- readOGR(dsn="./quartiers_shp_lamb93", layer="quartiers") # Étape pour changer la projection de la carte rennes <- spTransform(rennes, CRS("+proj=longlat +ellps=GRS80")) # Pour permettre la jointure des objets géométriques rennes@data\$id <- rownames(rennes@data) # Transformer en data frame pour fournir à ggplot() rennes_points <- fortify(rennes, region="id") # Permet d'éviter des trous éventuels rennes_df <- join(rennes_points, rennes@data, by="id") ## ------------------------------------------------------------------------ (p_map_rennes <- ggplot(data = rennes_df, aes(x = long, y = lat, group = group)) + geom_polygon() + coord_equal()) ## ------------------------------------------------------------------------ tx_chomage_2014_T1 <- data.frame( region = c("Cotes-Darmor","Finistere", "Ille-et-Vilaine", "Morbihan"), tx_chomage_2014_T1 = c(8.8, 8.8,7.9, 9.1)) # Ajout des valeurs pour chaque région ind_match <- match(map_fr_bzh\$region, tx_chomage_2014_T1\$region) map_fr_bzh\$tx_chomage_2014_T1 <- tx_chomage_2014_T1[ind_match, "tx_chomage_2014_T1"] ## ------------------------------------------------------------------------ (p_map_fr_bzh <- ggplot(data = map_fr_bzh, aes(x = long, y = lat, group = group, fill = tx_chomage_2014_T1)) + geom_polygon() + coord_equal() + scale_fill_gradient(name = "Département", low ="#FFFF00", high = "#FF0000")) ## ------------------------------------------------------------------------ # Fonction pour trouver le point central du polygone mid_range <- function(x) mean(range(x, na.rm = TRUE)) centres <- ddply(map_fr_bzh, .(region), colwise(mid_range, .(lat, long))) # Rajout des taux de chômage ind_match <- match(centres\$region, tx_chomage_2014_T1\$region) centres\$tx_chomage_2014_T1 <- tx_chomage_2014_T1\$tx_chomage_2014_T1[ind_match] label_chomage <- paste0(centres\$tx_chomage_2014_T1, "%") ## ------------------------------------------------------------------------ p_map_fr_bzh + annotate("text", x = centres\$long, y = centres\$lat, label = label_chomage) ## ------------------------------------------------------------------------ library(MASS) set.seed(1) # Normale bivariée Sigma <- matrix(c(10,3,3,2),2,2) biv_n <- mvrnorm(n=1000, rep(0, 2), Sigma) # Estimation de la densité par la méthode du noyau biv_n_kde <- kde2d(biv_n[,1], biv_n[,2], n = 50) ## ------------------------------------------------------------------------ persp(biv_n_kde, theta = 10, phi = 15, xlab = "X") ## ------------------------------------------------------------------------ ## library(rgl) ## set.seed(1) ## n <- 10000 ## x <- rnorm(n, mean = 38) ## y <- rnorm(n, mean = 42) ## ## biv_kde <- kde2d(x, y, n = 50) ## den_z <- biv_kde\$z ## ## surface3d(biv_kde\$x,biv_kde\$y,den_z*20,color="#FF2222",alpha=0.5)