\[ \usepackage{dsfont} \usepackage{xcolor} \require{mathtools} \definecolor{bayesred}{RGB}{147, 30, 24} \definecolor{bayesblue}{RGB}{32, 35, 91} \definecolor{bayesorange}{RGB}{218, 120, 1} \definecolor{grey}{RGB}{128, 128, 128} \definecolor{couleur1}{RGB}{0,163,137} \definecolor{couleur2}{RGB}{255,124,0} \definecolor{couleur3}{RGB}{0, 110, 158} \definecolor{coul1}{RGB}{255,37,0} \definecolor{coul2}{RGB}{242,173,0} \definecolor{col_neg}{RGB}{155, 191, 221} \definecolor{col_pos}{RGB}{255, 128, 106} {\color{bayesorange} P (\text{H} \mid \text{E})} = \frac {{\color{bayesred} P(\text{H})} \times {\color{bayesblue}P(\text{E} \mid \text{H})}} {\color{grey} {P(\text{E})}} \]
This online appendix provides R codes to apply the methods presented in the companion paper (Charpentier, Flachaire, and Gallic (2023)).
1 Data
To illustrate the methods, we rely on the Linked Birth/Infant Death Cohort Data (2013 cohort). The CSV files for the 2013 cohort were downloaded from the NBER collection of Birth Cohort Linked Birth and Infant Death Data of the National Vital Statistics System of the National Center for Health Statistics, on the NBER website. We formated the data (see ./births_stats.html).
Let us load the birth data.
library(tidyverse)
load("../data/births.RData")
Then, we can only keep a subsample of variables to illustrate the method.
<-
base %>%
births mutate(
black_mother = mracerec == "Black",
nonnatural_delivery = rdmeth_rec != "Vaginal"
%>%
) select(
sex, dbwt, cig_rec, wtgain,
black_mother, nonnatural_delivery,
mracerec, rdmeth_rec%>%
) mutate(
cig_rec = replace_na(cig_rec, "Unknown or not stated"),
black_mother = ifelse(black_mother, yes = "Yes", no = "No"),
is_girl = ifelse(sex == "Female", yes = "Yes", no = "No")
%>%
) ::set_variable_labels(
labelledblack_mother = "Is the mother Black?",
is_girl = "Is the newborn a girl?",
nonnatural_delivery = "Is the delivery method non-natural?"
%>%
) rename(birth_weight = dbwt)
base
# A tibble: 3,940,764 × 9
sex birth_weight cig_rec wtgain black_mo…¹ nonna…² mrace…³ rdmet…⁴ is_girl
<fct> <dbl> <fct> <dbl> <chr> <lgl> <fct> <fct> <chr>
1 Male 3017 No 1 No FALSE "White" Vaginal No
2 Female 3367 No 18 No FALSE "White" Vaginal Yes
3 Male 4409 No 36 No FALSE "White" Vaginal No
4 Male 4047 No 25 No FALSE "White" Vaginal No
5 Male 3119 Yes 0 No FALSE "Ameri… Vaginal No
6 Female 4295 Yes 34 No FALSE "White" Vaginal Yes
7 Male 3160 No 47 No FALSE "Ameri… Vaginal No
8 Male 3711 No 39 No FALSE "White" Vaginal No
9 Male 3430 No NA No FALSE "White" Vaginal No
10 Female 3772 No 36 No FALSE "White" Vaginal Yes
# … with 3,940,754 more rows, and abbreviated variable names ¹black_mother,
# ²nonnatural_delivery, ³mracerec, ⁴rdmeth_rec
The variables that were kept are the following:
sex
: sex of the newbornbirth_weight
: Birth Weight (in Grams)cig_rec
: mother smokes cigaretteswtgain
: mother weight gainmracerec
: mother’s raceblack_mother
: is mother black?rdmeth_rec
: delivery methodnonnatural_delivery
: is the delivery method non-natural?
Then, let us discard individuals with missing values.
<-
base %>%
base filter(
!is.na(birth_weight),
!is.na(wtgain),
!is.na(nonnatural_delivery),
!is.na(black_mother)
)
Let us define some colours for later use:
library(wesanderson)
<- wes_palette("Darjeeling1")
colr1 <- wes_palette("Darjeeling2")
colr2 <- colr1[2]
couleur1 <- colr1[4]
couleur2 <- colr2[2]
couleur3
<- "#882255"
coul1 <- "#DDCC77" coul2
We need to load some packages.
library(car)
library(transport)
library(splines)
library(mgcv)
library(expm)
library(tidyverse)
Let us visualize a sample of the data using a scatter plot of \(\boldsymbol{x} = (\text{wtgain}, \text{birth weight})\), conditional on the treatment \(T\). For each value of the treatment, we will add to the scatter plot the iso-density curve such that 95% of the points lie in the ellipse formed by the curve, assuming a Gaussian distribution for the mediator variables.
set.seed(123)
# A sample with only a few points
<- base[sample(1:nrow(base), size = 8000), ] base_s
The treatment variable \(T\), (cig_rec
) indicates whether the mother smokes \(\color{couleur2}{t=1}\) or not \(\color{couleur1}{t=0}\).
Show the codes used to iso-density curves
# Ellipse for smoker mothers
<- dataEllipse(base_s$birth_weight[base_s$cig_rec=="Yes"],
E_C_Y $wtgain[base_s$cig_rec=="Yes"],
base_slevels=0.9,draw = FALSE)
# Ellipse for non-smoker mothers
<- dataEllipse(base_s$birth_weight[base_s$cig_rec=="No"],
E_C_N $wtgain[base_s$cig_rec=="No"],
base_slevels=0.9,draw = FALSE)
Show the codes used to create the plot
plot(base_s$birth_weight[base_s$cig_rec=="Yes"],
$wtgain[base_s$cig_rec=="Yes"],
base_scol = yarrr::transparent(couleur1, trans.val = .9), pch = 19,
main = "", xlab = "Weight of the baby (g)",
xlim = c(1800,4600), ylim = c(0,90),
ylab = "Weight gain of the mother (pounds)", axes = FALSE,cex=.3)
axis(1)
axis(2)
points(base_s$birth_weight[base_s$cig_rec=="No"],
$wtgain[base_s$cig_rec=="No"],
base_scol = yarrr::transparent(couleur2, trans.val = .9), pch = 19, cex = .3)
lines(E_C_N,col=couleur1,lwd=2)
lines(E_C_Y,col=couleur2,lwd=2)
legend("topleft",c("Smoker","Non-smoker"),lty=1,col=c(couleur2,couleur1),bty="n")