Section 5 Modelling historical climate over the study area

Here we use the model formulation chosen in the previous section to model climate variables as a function of geographical predictors over the study area. We use one model formula (henceforth, model) for each climate variable in each season. We model historical climate for 30 year periods, as this is about the temporal resolution of interest.

5.1 Load libraries

# for data
library(tidyverse)
library(glue)

# for gam
library(mgcv)

# for rasters
library(terra)

5.2 Read in data

# Read in geographical covariates of weather data locations
# and rasters of spatial data
data <- read_csv("results/climate/data_for_gam.csv")
stack <- terra::rast("results/climate/raster_gam_stack.tif")

5.3 Model climate as a function of geographic variables

Here we prepare the data and model climate variables as a function of geographic variables. As in the section on modelling the contemporary climate, we add some small error to each geographic variable to facilitate model fitting.

# Prepare geographic variables
data <- mutate(
  data,

  # divide distance by 1000 for km
  coast = coast / 1000,

  # add some error to all physical variables
  # hopefully prevents model issues
  coast = coast + rnorm(length(coast), 0.1, sd = 0.01),
  elev = elev + rnorm(length(coast), 1, sd = 0.01),
  lat = lat + rnorm(length(lat), 0, sd = 0.01)
)

# set distance to coast raster in km
values(stack[["coast"]]) <- values(stack[["coast"]]) / 1000

# Pivot the data to long format and remove the SD of temperature
data <- pivot_longer(
  data,
  cols = c("ppt", "t_mean", "t_sd"),
  names_to = "climvar"
)
data <- dplyr::select(data, !month)

# remove t_sd
data <- filter(
  data, climvar != "t_sd"
)

# Nest the data by climate variable, season, and 30-year bin
data <- nest(
  data,
  data = !c(climvar, season, year_bin)
)

We specify the GAM formulas for temperature and precipitation, chosen in the previous section after examining the mean absolute error (MAE) of predictions for contemporary data (i.e., compared against remotely sensed data).

Temperature is modelled as a smooth function of elevation with three knots. Precipitation is modelled as a smooth function of elevation with three knots, with linear terms for distance from the coast and latitude.

# model formula for temp
form_temp <- "value ~ s(elev, k = 3)"

# model formula for ppt
form_ppt <- "value ~ s(elev, k = 3) + coast + lat"

# Assign formula as a character column in the nested data.frame
data <- mutate(
  data,
  form = if_else(
    climvar == "t_mean", form_temp, form_ppt
  )
)

# Fit models for each climate variable, season, and year bin
# Note that the formulae are converted to the `<formula>` class
# using `as.formula()`
data <- mutate(
  data,
  mod = map2(data, form, function(df, form) {
    gam(
      formula = as.formula(form),
      data = df
    )
  })
)

# NOTE: if mgcv::gam() is excessively slow, consider mgcv::bam() which is
# said to be more suitable for large datasets

We save the model as an R data object.

save(data, file = "results/climate/model_reconstruction_climate.Rds")

5.4 Obtaining model predictions over the study area

We use the predict() method to get climate predictions over the study area, using the spatial raster data as inputs.

# ensure that NAs are removed from the stack object before running the model
# projections

# this tells us that two raster layers within the stack object have NA values
terra::global(stack, fun = "isNA")

# Replace NAs with 0s for distance to coast
stack$coast[is.na(stack$coast)] <- 0

# Replace elevations < 0 with NA and then 0
stack$elev[stack$elev < 0] <- NA
stack$elev[is.na(stack$elev)] <- 0
stack$elev <- as.numeric(stack$elev)

# Get model predictions
model_pred <- map(
  data$mod, function(g) {
    predict(stack, g, type = "response") # order matters here
  }
)

# Assign predicted climate rasters as list column object
data <- mutate(
  data,
  pred = model_pred
)

# Ungroup the data and select relevant columns
data_pred <- ungroup(data) %>%
  select(season, year_bin, climvar, pred)

5.5 Scaling predicted historical climate

Here we aim to scale the predicted historical climate using the scaling layers developed in the previous section. The idea is that the predicted historical climate differs from the true historical climate over the study area in the same way (i.e., with the same ratio) that the contemporary climate prediction differs from the canonical (remotely sensed) climate variables.

We read in the scaling layers and apply them to the data (taking the cell-wise product of the two).

# Read in scaling layers
temp_correction <- terra::rast(
  "results/climate/raster_correction_layers_temp.tif"
)

ppt_correction <- terra::rast(
  "results/climate/raster_correction_layers_ppt.tif"
)

# Match scaling layers to the appropriate prediction layer
data_pred <- mutate(
  data_pred,
  correction = map2(
    season, climvar, function(season, climvar) {
      if (season == "dry" & climvar == "ppt") {
        ppt_correction[["correction_layer_ppt_dry"]]
      } else if (season == "dry" & climvar == "temp") {
        temp_correction[["correction_layer_temp_dry"]]
      } else if (season == "wet" & climvar == "ppt") {
        ppt_correction[["correction_layer_ppt_wet"]]
      } else {
        temp_correction[["correction_layer_temp_wet"]]
      }
    }
  )
)

# Scale predicted historical climate by cell-wise product of
# prediction and scaling layer
data_pred <- mutate(
  data_pred,
  corrected = map2(
    correction, pred, function(ch, pr) {
      pr <- terra::resample(pr, ch) # resampling required
      ch * pr
    }
  )
)

# Assign names to corrected rasters
data_pred <- mutate(
  data_pred,
  names = glue("{climvar}_{year_bin}_{season}")
)

names(data_pred$corrected) <- data_pred$names

# Collect the predictions for each season and climate variable
# into one raster stack each
# Each stack has multiple layers, each representing one 30-year bin
data_reconstructed <- data_pred %>%
  group_by(
    season, climvar
  ) %>%
  summarise(
    corrected = list(Reduce(corrected, f = c))
  )

# Save each raster stack as a TIF file
pwalk(
  data_reconstructed,
  function(season, climvar, corrected) {
    names(corrected) <- glue("{climvar}_{season}_{seq(1870, 2018, 10)}")

    terra::writeRaster(
      corrected,
      glue("results/climate/raster_reconstructed_{climvar}_{season}.tif")
    )
  }
)