# A tibble: 115 × 88
country type_Air transport, p…¹ type_Diabetes preval…² type_Domestic genera…³
<chr> <chr> <chr> <chr>
1 Algeria value 2020 last 10 value 2020
2 Bahrain value 2020 last 10 value 2020
3 Egypt value 2020 last 10 value 2020
4 Iran value 2020 last 10 value 2020
5 Iraq value 2020 last 10 value 2020
6 Israel value 2020 last 10 value 2020
7 Jordan value 2020 last 10 value 2020
8 Kuwait value 2020 last 10 value 2020
9 Lebanon value 2020 last 10 value 2020
10 Morocco value 2020 last 10 value 2020
# ℹ 105 more rows
# ℹ abbreviated names: ¹`type_Air transport, passengers carried`,
# ²`type_Diabetes prevalence (% of population ages 20 to 79)`,
# ³`type_Domestic general government health expenditure (% of general government expenditure)`
# ℹ 84 more variables:
# `type_Domestic private health expenditure per capita (current US$)` <chr>,
# `type_Domestic private health expenditure per capita, PPP (current international $)` <chr>, …
Let us check if there are duplicate countries:
sum(duplicated(df_all_covid))
[1] 0
3.2 Principal Component Analysis
Let us define a function to perform a principal component analysis. It allows us to obtain the coordinates weighted by the eigenvalues for each factor S1, S2, S3, S4, S5, S6.
#' Performs PCA on a group of variables (S1, S2, S3, S4, S5, or S6)#' #' @param variables_to_keep group of variables concerned#' @param type (string) name of group of variablesget_res_acp <-function(variables_to_keep, type) {# Keep only the variables considered in the database df_acp <- df_all_covid |>select(country, str_c("value_", variables_to_keep)) |>rename_with(~str_remove(., "^value_"))# Take Country name like row_name df_acp <-data.frame(df_acp, row.names =1, check.names = F)# PCA pca_res <-PCA( df_acp,graph =FALSE,scale.unit =TRUE,ncp =length(variables_to_keep) )# Obtain the coordinate of individuals ind_data <-get_pca_ind(pca_res) ind_coord <-data.frame(ind_data$coord)# Obtain the proportion of eigenvalues relating to each factor eigen <-get_eigenvalue(pca_res) weights <-data.frame(eigen[,2]/100)# Coordinates df_coord <-data.frame(crossprod(t(ind_coord),weights[,1])) df_coord <- df_coord |>mutate(country =rownames(df_coord))rownames(df_coord) <-NULLnames(df_coord) <-c(paste("coordinate",type,sep ="_"),"country")names(weights) <-"prop_var_expl"rownames(weights) <-NULL weights <- weights |>mutate(dim =row_number())list(coord = df_coord,weights = weights )} # End of get_res_acp
3.2.1 S1: healthcare infrastructure
variables_to_keep <-c("Physicians (per 1,000 people)","Hospital beds (per 1,000 people)","Nurses and midwives (per 1,000 people)","Domestic general government health expenditure (% of general government expenditure)","Domestic private health expenditure per capita (current US$)","Domestic private health expenditure per capita, PPP (current international $)")pca_s1 <-get_res_acp(variables_to_keep = variables_to_keep,type ="s1")df_coordinate_s1 <- pca_s1$coordprop_var_expl_s1 <- pca_s1$weights
3.2.2 S2: vulnerability to comorbidities
variables_to_keep <-c("Incidence of malaria (per 1,000 population at risk)","Incidence of HIV, all (per 1,000 uninfected population)","Incidence of tuberculosis (per 100,000 people)","Diabetes prevalence (% of population ages 20 to 79)","Mortality from CVD, cancer, diabetes or CRD between exact ages 30 and 70 (%)" )pca_s2 <-get_res_acp(variables_to_keep = variables_to_keep,type ="s2")df_coordinate_s2 <- pca_s2$coordprop_var_expl_s2 <- pca_s2$weights
3.2.3 S3: Vulnerability to natural environment
variables_to_keep <-c("Mortality rate attributed to household and ambient air pollution, age-standardized (per 100,000 population)","PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)","PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-1 value (% of total)","PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-2 value (% of total)","PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-3 value (% of total)","Air transport, passengers carried","International tourism, number of arrivals","Ecological Footprint")pca_s3 <-get_res_acp(variables_to_keep = variables_to_keep,type ="s3")df_coordinate_s3 <- pca_s3$coordprop_var_expl_s3 <- pca_s3$weights
3.2.4 S4: Living conditions
variables_to_keep <-c("People using at least basic drinking water services (% of population)","People using at least basic sanitation services (% of population)","Prevalence of undernourishment (% of population)","Prevalence of anemia among women of reproductive age (% of women ages 15-49)","Poverty headcount ratio at $3.65 a day (2017 PPP) (% of population)","Poverty headcount ratio at $6.85 a day (2017 PPP) (% of population)","Poverty headcount ratio at national poverty lines (% of population)","GDP per capita (current US$)","GDP per capita, PPP (current international $)","Urban density" )pca_s4 <-get_res_acp(variables_to_keep = variables_to_keep,type ="s4")df_coordinate_s4 <- pca_s4$coordprop_var_expl_s4 <- pca_s4$weights
3.2.5 S5: Economic and societal characteristics
variables_to_keep <-c("Population, total","GDP per capita growth (annual %)","International migrant stock (% of population)","Population ages 65 and above (% of total population)","Individuals using the Internet (% of population)","Mobile cellular subscriptions (per 100 people)","Shadow size Economy","Gini index (CIA estimate)")pca_s5 <-get_res_acp(variables_to_keep = variables_to_keep,type ="s5")df_coordinate_s5 <- pca_s5$coordprop_var_expl_s5 <- pca_s5$weights
#' Theme for maps with ggplot2#'#' @param ... arguments passed to the theme function#' @export#' @importFrom ggplot2 element_rect element_text element_blank element_line unit#' reltheme_map_paper <-function(...) {theme(text =element_text(family ="Times"),plot.background =element_rect(fill ="transparent", color =NA),panel.background =element_rect(fill ="transparent", color =NA),panel.border =element_blank(),axis.title =element_blank(),axis.text =element_blank(),axis.ticks =element_blank(), axis.line =element_blank(),plot.title.position ="plot",legend.text =element_text(size =rel(1.2)),legend.title =element_text(size =rel(1.2)),legend.background =element_rect(fill="transparent", color =NULL),legend.key =element_blank(),legend.key.height =unit(2, "line"),legend.key.width =unit(1.5, "line"),strip.background =element_rect(fill =NA),panel.spacing =unit(1, "lines"),panel.grid.major =element_blank(),panel.grid.minor =element_blank(),plot.margin =unit(c(1, 1, 1, 1), "lines"),strip.text =element_text(size =rel(1.2)) )}
A Shapefile that allows us to display the level 0 world administrative boundaries, freely available on the online open data platform “opendatasoft”, can be loaded:
library(sf)
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
There are some mismatching country names between the map file and the epidemic data. The names from the map file can be manually changed:
world_map <- world_map |>mutate(name =recode( name, # old = new"Brunei Darussalam"="Brunei","Côte d'Ivoire"="Cote d'Ivoire","Democratic Republic of the Congo"="Democratic Republic of Congo","Swaziland"="Eswatini","Iran (Islamic Republic of)"="Iran","Lao People's Democratic Republic"="Laos","Russian Federation"="Russia","Republic of Korea"="South Korea","Syrian Arab Republic"="Syria","United Republic of Tanzania"="Tanzania","U.K. of Great Britain and Northern Ireland"="United Kingdom","United States of America"="United States" ) )
df_coordinates_2 <- df_coordinates |>select(country, coord_norm_s1:coord_norm_s6) |>pivot_longer(cols =-country,names_to ="synthetic_factor",values_to ="coord_norm" )world_map_coords <-NULL# v <- str_c("coord_norm_s", 1:6)[1]for (v instr_c("coord_norm_s", 1:6)) { map_current <- world_map |>left_join( df_coordinates_2 |>filter(synthetic_factor == v),by =c("name"="country") ) map_current$synthetic_factor <- v world_map_coords <- world_map_coords |>bind_rows(map_current)}world_map_coords <- world_map_coords |>mutate(synthetic_factor =factor( synthetic_factor, labels =c("coord_norm_s1"="S1: Healthcare infrastructure","coord_norm_s2"="S2: Vulnerability to comorbidites","coord_norm_s3"="S3: Vulnerability to natural environment","coord_norm_s4"="S4: Living conditions","coord_norm_s5"="S5: Economic and societal characteristics","coord_norm_s6"="S6: Policy variables") ) )