3 Macroeconomic Data

In this chapter, we present the sources and data formatting of the macroeconomic variables.

We use data from the Central Reserve Bank of Peru. The Consumer Price Index (CPI) data are obtained using the token PN01270PM, the Food Consumer Price Index (CPIA) data with token PN01336PM, the Real Exchange Rate (RER) data with token PN01259PM, Exports data with token PN01461BM, the GDP data with token PN01773AM, the agricultural GDP data with token PN01755AM, the interbank interest rate data with token PN07819NM, and PN02079AM for the manufacturing production.

3.1 Load Data

We have downloaded these series from the the Banco Central De Reserva Del Perú website.

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(labelled)
peru_macro <- 
  readxl::read_excel(
  path = "../data/raw/Macro/data_macro_peru.xlsx",
  skip = 1,
  na = "n.d."
) |> 
  mutate(
    month = case_when(
      str_detect(date, "^Ene") ~ 1,
      str_detect(date, "^Feb") ~ 2,
      str_detect(date, "^Mar") ~ 3,
      str_detect(date, "^Abr") ~ 4,
      str_detect(date, "^May") ~ 5,
      str_detect(date, "^Jun") ~ 6,
      str_detect(date, "^Jul") ~ 7,
      str_detect(date, "^Ago") ~ 8,
      str_detect(date, "^Sep") ~ 9,
      str_detect(date, "^Oct") ~ 10,
      str_detect(date, "^Nov") ~ 11,
      str_detect(date, "^Dic") ~ 12
    ),
    year = as.numeric(str_sub(date, -2)) + 2000
  ) |> 
  mutate(date = lubridate::ym(str_c(year, month, sep = "-")))

Let us rename the columns :

peru_macro <- 
  peru_macro |> 
  select(
    date,
    yy_raw  = PN01773AM, # GDP
    rer_raw = PN01259PM, # Real Exchange Rate
    x_raw   = PN01461BM, # Exports
    r_raw   = PN07819NM, # Interest rate
    pi_raw  = PN01270PM, # CPI
    pia_raw = PN01336PM, # CPI: food
    ya_raw  = PN01755AM, # Agricultural GDP
    ind_prod_raw = PN02079AM # Manufacturing production
  )

3.2 Helper Functions

In order to address any underlying trends in the GDP data, we express this variable in percentage deviations from the Hodrick-Prescott trend. To that end, we define a helper that returns the estimated trend:

#' Applies HP filter for monthly data and returns the trend component
#' 
#' @param x vector of monthly observations
#' @param freq smoothing parameter (\lambda)
hp_filter_trend <- function(x, freq = 14400) {
  res_hp_filter <- mFilter::hpfilter(
    x,
    freq = 14400, 
    type = "lambda", 
    drift = FALSE
  )
  as.vector(res_hp_filter$trend)
}

Furthermore, to eliminate any seasonal components present in the data, we employ the X13 method developed by the Census Bureau. This method enables us to remove seasonal variations and isolate the underlying patterns and dynamics of the variables

#' Removes the seasonality of a vector of monthly values
#'
#' @param x vector of numerical values
#' @param start_date start date of the values
#' @param remove_trend should the estimated trend be removed? Default to ``FALSE
adj_season_X13 <- function(x,
                           start_date,
                           remove_trend = FALSE) {
  x_ts <- ts(x, start = c(year(start_date), month(start_date)), freq = 12)
  # Seasonal Adjustment with X-13ARIMA-SEATS
  x_ts_season <- seasonal::seas(
    x_ts, estimate.maxiter=10000,
    arima.model = "(0 1 1)(0 1 1)",
    x11 = ""
  )
  x_ts_season_df <- timetk::tk_tbl(x_ts_season$data) |>
    mutate(date = lubridate::my(index))

  df_resul <- tibble(
    date = seq(start_date, last(x_ts_season_df$date), by = "month")
  ) |>
    left_join(x_ts_season_df, by = "date") |>
    mutate(val = seasonaladj)

  if (remove_trend) {
    df_resul <- df_resul |>
      mutate(val = seasonaladj - trend)
  }

  df_resul |> pull(val)
}

Note

The hp_filter_trend() and the adj_season_X13() functions are defined in the ../weatherperu/R/detrending.R script.

3.3 Data Pre-Processing

For GDP:

peru_gdp <- 
  peru_macro |> 
  select(date, yy_raw) |> 
  filter(date >= "2003-01-01") |> 
  mutate(
    # Remove seasonality
    yy_sa  = adj_season_X13(yy_raw, ymd("2003-01-01")),
    # Extract trend
    yy_trend = hp_filter_trend(yy_sa, freq = 14400),
    # Percentage dev. from trend
    y = 100 * log(yy_raw / yy_trend)
  )

For agricultural GDP:

peru_ya <- 
  peru_macro |> 
  select(date, ya_raw) |> 
  filter(date >= "2003-01-01") |> 
  mutate(
    # Remove seasonality
    ya_sa  = adj_season_X13(ya_raw, ymd("2003-01-01")),
    # Extract trend
    ya_trend = hp_filter_trend(ya_sa, freq = 14400),
    # Percentage dev. from trend
    ya = 100 * log(ya_raw / ya_trend)
  )

For Real Exchange Rate:

peru_rer <- 
  peru_macro |> 
  select(date, rer_raw) |> 
  filter(date >= "2001-01-01") |> 
  mutate(
    # Remove seasonality
    rer_sa = adj_season_X13(rer_raw, ymd("2001-01-01")),
    rer = rer_sa / 100
  ) |> 
  mutate(
    rer_hp_trend = hp_filter_trend(rer_raw, freq = 14400),
    rer_hp = rer_raw - rer_hp_trend,
    rer_trend = hp_filter_trend(rer_sa, freq = 14400),
    rer_dt_sa = log(rer_sa / rer_trend)
  ) |> 
  select(-rer_hp_trend, -rer_trend, -rer_sa)

For Exports:

peru_x <- 
  peru_macro |> 
  select(date, x_raw) |> 
  filter(date >= "2001-01-01") |> 
  mutate(
    # Remove seasonality
    x = adj_season_X13(100 + x_raw, ymd("2000-12-01")),
    x = x / 100
  )

For the Interest rate:

peru_r <- 
  peru_macro |> 
  select(date, r_raw) |> 
  filter(date >= "2001-01-01") |> 
  rename(r = r_raw) |> 
  mutate(
    r_hp_trend = hp_filter_trend(r, freq = 14400),
    r_hp = r - r_hp_trend
  ) |> 
  select(-r_hp_trend)

For the Consumer Price Index:

peru_cpi <- 
  peru_macro |> 
  select(date, pi_raw) |> 
  mutate(
    # Remove seasonality
    pi_sa = adj_season_X13(pi_raw, ymd("2000-12-01")),
    # Log difference
    pi = c(NA, 100 * diff(log(pi_sa)))
  )

For the Food Consumer Price Index:

peru_cpia <- 
  peru_macro |> 
  select(date, pia_raw) |> 
  mutate(
    # Remove seasonality
    pia_sa = adj_season_X13(pia_raw, ymd("2000-12-01")),
    # Log difference
    pia = c(NA, 100 * diff(log(pia_sa)))
  )

Manufacturing production

peru_ind_prod <- 
  peru_macro |> 
  select(date, ind_prod_raw) |> 
  filter(date >= "2001-01-01") |> 
  mutate(
    # Remove seasonality
    ind_prod_sa  = adj_season_X13(ind_prod_raw, ymd("2001-01-01")),
    # Extract trend
    ind_prod_trend = hp_filter_trend(ind_prod_sa, freq = 14400),
    # Percentage dev. from trend
    ind_prod = 100 * log(ind_prod_raw / ind_prod_trend)
  )

3.4 International Commodity prices

For the international commodity prices, we will not include those in the final macro data, as they are given depending on the crop. We will produce a separate file. We have downloaded these series from the IMF Primary Commodity Price System website.

library(readxl)
int_prices <- read_excel(
  path = "../data/raw/Macro/IMF_DATA.xls",
  sheet = "SELECTED",
  col_types = "text") |> 
  mutate(date = lubridate::ymd(str_c(YEAR,  "-", MONTH, "-01"))) |> 
  rename(
    "FPI"           = "FPI_PFOOD", 
    "FERTILIZER"    = "FERTILIZER_PFERT",   
    "IndexOIL"      = "IndexOIL_POILAPSP",
    "PriceOIL"      = "PriceOIL_POILAPSP", 
    "CORN"          = "CORN_PMAIZMT", 
    "ARROZ CÁSCARA" = "RICE_PRICENPQ", 
    "TRIGO"         = "WHEAT_PWHEAMT"
  ) |> 
  mutate(CORN2 = CORN, 
         FPI2 = FPI) |> 
  select(-c(MONTH, YEAR, CPI_Peru_IMF, CPI_US_IMF, IndexOIL, PriceOIL)) |> 
  pivot_longer(cols = !date, names_to = "product", values_to = "price_int") |> 
  mutate(
    product = case_when(
      product == "FPI"  ~ "PAPA", 
      product == "FPI2" ~ "YUCA", 
      product == "CORN" ~ "MAÍZ AMILÁCEO", 
      product == "CORN2"~ "MAÍZ AMARILLO DURO", 
      TRUE ~ product
    ), 
    product_eng = case_when(
      product == "PAPA" ~ "Potato",
      product == "YUCA" ~ "Cassava",
      product == "ARROZ CÁSCARA" ~ "Rice",
      product == "MAÍZ AMARILLO DURO" ~ "Dent corn"
    ),
    price_int = as.numeric(price_int)
  ) |> 
  arrange(product, date) |> 
  group_by(product) |> 
  mutate(price_int_inf = price_int / lag(price_int) - 1) |> 
  ungroup() |> 
  filter(! year(date) == 2000)

Let us add labels to the columns:

int_prices <- 
  int_prices |> 
  labelled::set_variable_labels(
    date = "Date",
    price_int = "International commodity price",
    price_int_inf = "Int. commodity inflation rate"
  )

And save the resulting table:

save(int_prices, file = "../data/output/macro/df_int_prices.rda")

3.5 Merging the Data

The sample period for our analysis covers the time span from January 2003 (2003M1) to December 2015 (2015M12).

df_macro <- 
  peru_gdp |> 
  select(date, y) |> 
  full_join(
    peru_ya |> select(date, ya),
    by = "date"
  ) |> 
  full_join(
    peru_rer |> select(date, rer, rer_hp, rer_dt_sa),
    by = "date"
  ) |> 
  full_join(
    peru_r |> select(date, r, r_hp),
    by = "date"
  ) |> 
  full_join(
    peru_x |> select(date, x),
    by = "date"
  ) |> 
  full_join(
    peru_cpi |> select(date, pi),
    by = "date"
  ) |>
  full_join(
    peru_cpia |> select(date, pia),
    by = "date"
  ) |>
  full_join(
    peru_ind_prod |> select(date, ind_prod),
    by = "date"
  ) |> 
  arrange(date) |> 
  filter(date >= ymd("2001-01-01"))

Let us add labels to the columns:

df_macro <- 
  df_macro |> 
  labelled::set_variable_labels(
    date = "Date",
    rer_dt_sa = "Real exchange rate",
    rer_hp = "Real exchange rate",
    x  = "Exports",
    pi = "Inflation rate (pp)",
    pia = "Food inflation rate (pp)",
    y = "GDP (pp)",
    ya = "Agricultural GDP (pp)",
    r = "Interest rate (pp)",
    r_hp = "Interest rate (pp)",
    ind_prod = "Manufacturing Prod. (pp)"
  )

And lastly, we need to save the data for further use (especially in Section 10).

save(df_macro, file = "../data/output/macro/df_macro.rda")

3.6 Content of the dataset

Table 3.1: Variables in the `df_macro.rda` file
Variable name	Type	Description
`date`	date	Date of the observation (YYYY-MM-DD)
`y`	numeric	GDP in percentage point, percentage deviation from trend, detrended and seasonally adjusted
`ya`	numeric	Agricultural GDP in percentage point, percentage deviation from trend, detrended and seasonally adjusted
`rer_hp`	numeric	Real exchange rate, detrended using HP filter
`rer_dt_sa`	numeric	Real exchange rate, detrended and seasonally adjusted
`x`	numeric	Exports, seasonally adjusted
`r`	numeric	Interest rate, in percentage point, detrended
`pi`	numeric	Inflation rate, in percentage point
`pia`	numeric	Food inflation rate, in percentage point, seasonally adjusted
`ind_prod`	numeric	Manufacturing Production, in percentage point, percentage deviation from trend, detrended and seasonally adjusted

Table 3.2: Variables in the `int_prices.rda` file
Variable name	Type	Description
`date`	date	Date of the observation (YYYY-MM-DD)
`product`	character	Crop name in Spanish
`price_int`	numeric	International commodity prices
`price_int_inf`	numeric	Growth rate of international commodity prices