Section 7 Land cover classification and change detection

In this script, we will process land cover polygons and rasters for multiple time periods (1848 and 2018) across the Nilgiri hills of the Western Ghats biodiversity hotspot. For the year 1848, a survey map (created by Captain John Ouchterlony) was obtained from the British Library and the Tamil Nadu State Archive and was manually digitized to arrive at multiple land cover classes (for more information on the process of digitization, please read the accompanying manuscript and use the READMe section in the GitHub repository). For the year 2018, we relied on satellite imagery from Sentinel-2 (the imagery was classified using Google Earth Engine and more information on the classification can be obtained below).

7.1 Load necessary libraries

library(sf)
library(raster)
library(terra)
library(tmap)
library(stars)
library(dplyr)
library(tidyverse)
library(mapview)
library(landscapemetrics)
library(scico)
library(extrafont)
library(exactextractr)

7.2 Processing digitized shapefiles from the 1848 map

We will be loading shapefiles that were digitized by Amrutha Rajan for the 1848 historical map.

# list all shapefiles in the directory
nil1848 <- list.files("data/landcover/1848-nilgiris/", full.names = T, recursive = T, pattern=".shp$")

# create vector files
ag1848 <- st_read(nil1848[1]) # type: multipolygon; 6 empty geometries
noData1848 <- st_read(nil1848[2]) # type: multipolygon
plantations1848 <- st_read(nil1848[3]) # type: polygon & multipolygon; 1 empty geometry
roads1848 <- st_read(nil1848[4]) # type: linestring
settlements1848 <- st_read(nil1848[5]) # type: polygon & multipolygon; 3 geometries empty
sholaForest1848 <- st_read(nil1848[6]) # type: polygon & multippolygon; 6 geometries empty  
sholaGrassland1848 <- st_read(nil1848[7]) # type: multipolygon
swamps1848 <- st_read(nil1848[8]) # type: polygon & multipolygon; eight geometries are empty
waterBodies1848 <- st_read(nil1848[9]) # type: multipolygon

# explore and fix any issues with the above vector files
# we need to ensure consistency across files for the sake of merging them into a single geometry collection

# we notice a range of small issues with the shapefiles above
# the geometry type is variable and needs to be consistent
# empty geometries need to be removed
# attribute names need to be consistent across shapefiles

# first, we will remove empty geometries
ag1848 <- ag1848[!st_is_empty(ag1848), ]
noData1848 <- noData1848[!st_is_empty(noData1848), ]
plantations1848 <- plantations1848[!st_is_empty(plantations1848),]
roads1848 <- roads1848[!st_is_empty(roads1848),]
settlements1848 <- settlements1848[!st_is_empty(settlements1848),]
sholaForest1848 <- sholaForest1848[!st_is_empty(sholaForest1848),]
sholaGrassland1848 <- sholaGrassland1848[!st_is_empty(sholaGrassland1848),]
swamps1848 <- swamps1848[!st_is_empty(swamps1848),]
waterBodies1848 <- waterBodies1848[!st_is_empty(waterBodies1848),]

# fixing attribute tables to ensure they are consistent across shapefiles
names(ag1848) <- c("id", "name","geometry")
ag1848$name <- "agriculture"
names(noData1848) <- c("id", "name","geometry")
noData1848$name <- "no_data"
names(plantations1848) <- c("id", "name","geometry")
plantations1848$name <- "plantations"
names(roads1848) <- c("id", "name","geometry")
roads1848$name <- "roads"
names(settlements1848) <- c("id", "name","geometry")
settlements1848$name <- "settlements"
names(sholaForest1848) <- c("id", "name","geometry")
sholaForest1848$name <- "shola_forest"
names(sholaGrassland1848) <- c("id", "name","geometry")
sholaGrassland1848$name <- "shola_grassland"
names(swamps1848) <- c("id", "name","geometry")
swamps1848$name <- "swamps"
names(waterBodies1848) <- c("id", "name","geometry")
waterBodies1848$name <- "water_bodies"

# Note: the roads shapefile is a linestring while the other geometry types are polygon/multipolygon

# transform to UTM 43N
ag1848 <- st_transform(ag1848, 32643)
noData1848 <- st_transform(noData1848, 32643)
plantations1848 <- st_transform(plantations1848, 32643)
roads1848 <- st_transform(roads1848, 32643)
settlements1848 <- st_transform(settlements1848, 32643)
sholaForest1848 <- st_transform(sholaForest1848, 32643)
sholaGrassland1848 <- st_transform(sholaGrassland1848, 32643)
swamps1848 <- st_transform(swamps1848, 32643)
waterBodies1848 <- st_transform(waterBodies1848, 32643)

# creating a single simple feature collection
nil1848 <- rbind(ag1848, plantations1848, settlements1848,
  sholaForest1848, sholaGrassland1848, swamps1848,
  waterBodies1848)

# subsuming swamps under grasslands
nil1848 <- nil1848 %>%
  mutate(name = case_when(
    name == "swamps" ~ "shola_grassland",
    .default = as.character(name)))

# crop the 1848 shapefiles to within the 1400m contour only
nil1848 <- st_buffer(nil1848, dist = 0)
nil1848 <- nil1848[,-c(3:5)]

# Create a common boundary for clipping files from another time period (2018)(see below)
all1848 <- st_buffer(st_union(nil1848), dist = 0)

7.3 Rasterization of the 1848 shapefiles

The 1848 digitized shapefiles are rasterized for comparison with the 2018 sentinel satellite imagery.

# nil11848 raster
# scale: 1000ft to 1 inch (1:12000; 6 metres resolution)
# This link was used for reference to convert from map scale to raster pixel resolution: https://www.esri.com/arcgis-blog/products/product/imagery/on-map-scale-and-raster-resolution/
vect1848 <- terra::vect(nil1848)
emptyRast <- terra::rast(res = 6, xmin = 660555.3, xmax = 719135.5, ymin = 1240050, ymax = 1277597, crs = "+proj=utm +zone=43 +datum=WGS84 +units=m +no_defs")
rast1848 <- terra::rasterize(vect1848, emptyRast, "name")

7.4 Load the 2018 satellite image

The 2018 satellite image was obtained from Sentinel-2. Cloud-free days (1% cloud cover) were chosen from March 2018 and a composite was created. We then utilized groundtruthing points from [@arasumani2019] and used a Random Forests classifier (n = 1000 trees; Kappa statistic = 93.9%, overall accuracy of 94.8% was obtained on test data) to obtain a classified image with seven land cover classes (similar to the 1848 map). For more details on the classification, please visit this link: https://code.earthengine.google.com/49e2de4f9cd84c8587fc0596d3d5aec3

## load the raster
rast2018 <- terra::rast("data/landcover/2018-nilgiris/2018.tif")
values(rast2018)[values(rast2018) <= 0] <- NA

## convert the raster to a categorical raster
rast2018 <- as.factor(rast2018)

## set land cover class names to the 2018 raster
## note: this was done carefully by examining the values associated
## with each number from classification process (done in GEE)

## create a dataframe with names of classes and their corresponding values
landcover_class <- data.frame(ID = 1:7,
                              name = c( "agriculture",
                                           "shola_forest",
                                           "shola_grassland",
                                           "timber_plantations",
                                           "settlements",
                                           "tea_plantations",
                                           "water_bodies"))
levels(rast2018) <- landcover_class

## resample the 1848 raster to the 2018 raster to match spatial res
## The 1848 raster is at 6 m resolution
## The 2018 raster is at 10 m resolution
rast1848 <- resample(rast1848, rast2018, method = "near")

# visualization of the two rasters
colors2018 <- c(
  '#be4fc4', # agriculture, violetish
  '#025a05',# shola forests, dark green
  '#cbb315', # shola grasslands, yellowish
  '#c17111', # timber plantations, brownish
  '#b0a69d', # settlements, grayish
   '#04a310',# tea plantations, light green
  '#2035df'  # waterbodies, royal blue
)

## side-by-side visualization
colors1848 <- c(
  '#be4fc4', # agriculture, violetish
  '#c17111', # plantations, brownish
  '#b0a69d', # settlements, grayish
  '#025a05',# shola forests, dark green
  '#cbb315', # shola grasslands, yellowish
  '#2035df'  # waterbodies, royal blue
)

## saving a high resolution visualization
png(filename = "figs/fig_landCover_1848_vs_2018.png", 
    width = 12, height = 7, units = "in", res = 300)

par(mfrow = c(1,2))
plot(rast1848, col = colors1848, main = "1848",
     legend = FALSE)
plot(rast2018, col=colors2018, main = "2018", 
     legend=FALSE)

# add custom legend
par(mar = c(0, 0, 0, 0), 
     new = TRUE)
plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n')
legend("bottom",
       legend=landcover_class$name, fill=colors2018, 
       cex = 0.8, 
       title="land cover classes",
       ncol = 2)
dev.off()

7.5 Subsume plantations into one category for 2018

Since 1848 had very few to no tea plantations and only timber plantations were present at the time, we reclassify timber plantations into the plantations category for the 2018 raster to ease comparisons.

# read reclassification matrix
reclassification_matrix <- read.csv("data/landcover/2018-nilgiris/reclassification-matrix.csv")
reclassification_matrix <- as.matrix(reclassification_matrix[, c("V1", "To")])

# reclassification
rast2018_reclassified <- terra::classify(
  x = rast2018,
  rcl = reclassification_matrix
)

## create a dataframe with names of classes and their corresponding values
landcover_reclass <- data.frame(ID = 1:6,
                              name = c("agriculture",
                                           "shola_forest",
                                           "shola_grassland",
                                           "plantations",
                                           "settlements",
                                           "water_bodies"))

levels(rast2018_reclassified) <- landcover_reclass

## side-by-side visualization
colors2018reclass <- c(
  '#be4fc4', # agriculture, violetish
  '#025a05',# shola forests, dark green
  '#cbb315', # shola grasslands, yellowish
  '#c17111', # plantations, brownish
  '#b0a69d', # settlements, grayish
  '#2035df'  # waterbodies, royal blue
)

png(filename = "figs/fig_landCover_1848_vs_2018reclassified.png", 
    width = 12, height = 7, units = "in", res = 300)

par(mfrow = c(1,2))
plot(rast1848, col = colors1848, main = "1848",
     legend = FALSE)
plot(rast2018_reclassified, col=colors2018reclass, main = "2018", 
     legend=FALSE)

# add custom legend
par(mar = c(0, 0, 0, 0), 
     new = TRUE)
plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n')
legend("bottom",
       legend=landcover_reclass$name, fill=colors2018reclass, 
       cex = 0.8, 
       title="land cover classes",
       ncol = 2)
dev.off()

Landcover change between 1848 and 2018

7.6 Write the shapefiles and rasters to file

Please note that the processed rasters above have been resampled to a resolution of 10 metres.

# vectors
st_write(nil1848, "results/landcover/1848.shp", 
         driver = "ESRI Shapefile")

# rasters
terra::writeRaster(rast1848, "results/landcover/1848.tif")
terra::writeRaster(rast2018, "results/landcover/2018.tif")
terra::writeRaster(rast2018_reclassified, "results/landcover/2018reclassified.tif")

7.7 Area-wise comparisons of land cover change

# area-wise calculations for the 2018 raster (the below objects will be used to compare overall areas, alongside the 1848 map)
sz2018 <- cellSize(rast2018, unit = "m")
area2018 <- zonal(sz2018, rast2018, sum)
area2018SqKm <- (area2018$area / 1000000)
area2018 <- cbind(area2018, area2018SqKm)

# area-wise calculations for the 2018 reclassified raster (the below objects will be used to compare overall areas, alongside the 1848 map)
sz2018_reclass <- cellSize(rast2018_reclassified, unit = "m")
area2018_reclass <- zonal(sz2018_reclass, rast2018_reclassified, sum)
area2018SqKm_reclass <- (area2018_reclass$area / 1000000)
area2018_reclass <- cbind(area2018_reclass, area2018SqKm_reclass)

# area-wise calculations for the 1848 raster (the below objects will be used to compare overall areas, alongside the 2018 sentinel satellite image)
sz1848 <- cellSize(rast1848, unit = "m")
area1848 <- zonal(sz1848, rast1848, sum)
area1848SqKm <- (area1848$area / 1000000)
area1848 <- cbind(area1848, area1848SqKm)

7.8 Barplots of land cover change

# Joining all dataframes to create a single one for plotting and visualization

names(area1848) <- c("class", "areaInMeters","sumArea")
names(area2018) <- c("class", "areaInMeters","sumArea")

areaCalc <- purrr::reduce(list(
  area1848, 
  area2018
), dplyr::full_join, by = "class")

names(areaCalc) <- c("class", "areaMt1848", "1848", "areaMt2018", "2018")

areaCalc <- areaCalc %>%
  dplyr::select("class", "1848", "2018") %>%
  pivot_longer(!class, names_to = "Year", values_to = "Area")

# since the 1848 map had timber and tea plantations subsumed and the 2018 had it separated, we will manually edit the same
areaCalc <- areaCalc %>% drop_na()

write.csv(areaCalc, "results/totalArea-by-landCover-timePeriod.csv", row.names = F)

# make plot
fig_area <- ggplot(areaCalc, aes(x = class, 
                                 y = Area, fill = class)) + 
  geom_bar(stat = "identity", 
           position = position_dodge()) + 
  scale_fill_manual(values = c('#be4fc4', '#d73027',  '#b0a69d',
                              '#025a05','#cbb315', '#04a310',
                                '#c17111', '#2035df' ))+
    geom_text(aes(label = round(Area), hjust = "middle", 
                vjust = -0.5), family = "Century Gothic",
    position = position_dodge(), angle = 0, 
    size = 5) +
  facet_wrap(~Year, scales ="free_x") +
  theme_bw() +
  labs(
    x = "\nLand cover type",
    y = "Area in sq.km. \n"
  ) +
  theme(text = element_text(size=14,  family="Century Gothic"),
    axis.title = element_text(
      family = "Century Gothic",
      size = 14, face = "bold"),
    axis.text = element_text(family = "Century Gothic", 
                             size = 14),
    axis.text.x = element_text(angle = 90, vjust = 0.5, 
                               hjust = 1),
    legend.position = "none")

ggsave(fig_area,
  filename = "figs/fig_totalArea_landCover.png", width = 15, height = 13, device = png(), units = "in", dpi = 600)
dev.off()

# get percentArea occupied by each land cover class
percentArea <- areaCalc %>%
  group_by(Year) %>%
  mutate(
    totalArea = sum(Area, na.rm = T),
    percentArea = (Area / totalArea) * 100
  )

# plot figure
fig_percent_area <- ggplot(percentArea, aes(x = class, y = percentArea, fill = class)) +
  geom_bar(stat = "identity", position = position_dodge()) +
   scale_fill_manual(values = c('#be4fc4', '#d73027',  '#b0a69d',
                              '#025a05','#cbb315', '#04a310',
                                '#c17111', '#2035df' ))+
  geom_text(aes(label = round(percentArea, digits = 1), hjust = "middle", vjust = -0.5), position = position_dodge(), family = "Century Gothic", angle = 0, size = 5) +
  facet_wrap(~Year, scales = "free_x") +
  theme_bw() +
  labs(
    x = "\nLand cover type",
    y = "Percent Area \n"
  ) +
  theme(text = element_text(size=14,  family="Century Gothic"),
    axis.title = element_text(
      family = "Century Gothic",
      size = 14, face = "bold"
    ),axis.text = element_text(family = "Century Gothic", 
                               size = 14),
    axis.text.x = element_text(angle = 90, 
                               vjust = 0.5, hjust = 1),
    legend.position = "none")

ggsave(fig_percent_area, filename = "figs/fig_percentArea_landCover.png", width = 14, height = 13, device = png(), units = "in", dpi = 500)
dev.off()

Barplots of change in area of each land cover class between 1848 and 2018

7.9 Validating the 2018 Sentinel-2 classification

In the above classification, we see an increase in forest cover of ~ 132 sq.km between 1848 and 2018. However, is the 2018 classification accurate? Can we compare these estimates to an existing land cover classification from 2017?

nil2017 <- st_read("data/landcover/2017-nilgiris/2017.shp")
nil2017 <- st_intersection(nil2017, all1848)

# Let's add class name to nil2017 shapefile
nil2017 <- nil2017 %>%
  mutate(name = case_when(
    gridcode == 1 ~ "shola_grassland",
    gridcode == 2 ~ "shola_forest",
    gridcode == 3 ~ "timber_plantations",
    gridcode == 4 ~ "tea_plantations",
    gridcode == 6 ~ "settlements",
    gridcode == 7 ~ "agriculture",
    gridcode == 8 ~ "water_bodies"
  ))

# remove older column
nil2017 <- nil2017[,-3]

# renaming for ease of visualization
names(nil2017) <- c("id","area","geometry","name")

## note that the spatial resolution of Landsat is at 30 m spatial resolution
vect2017 <- terra::vect(nil2017)
emptyRast <- terra::rast(res = 30, xmin = 660750, xmax = 718329, ymin = 1240554, ymax = 1275510, crs = "+proj=utm +zone=43 +datum=WGS84 +units=m +no_defs")
rast2017 <- terra::rasterize(vect2017, emptyRast, "name")

# resample the 2018 raster to 30m
rast2018 <- resample(rast2018, rast2017, method = "near")

# mask the 2018 raster with the 2017 one since the 2017 raster was only for area above 1400m
rast2018 <- mask(rast2018, rast2017)

# visualizing the two classifications
colors2017 <- c(
  '#be4fc4', # agriculture, violetish
  '#b0a69d', # settlements, grayish
   '#025a05', # shola forests, dark green
  '#cbb315', # shola grasslands, yellowish
   '#04a310', # tea plantations, light green
  '#c17111', # timber plantations, brownish
  '#2035df'  # waterbodies, royal blue
)

## saving a high resolution visualization
png(filename = "figs/fig_landCover_2018_vs_2017.png", 
    width = 12, height = 7, units = "in", res = 300)

par(mfrow = c(1,2))
plot(rast2018, col = colors2018, main = "2018 Sentinel classification",
     legend = FALSE)
plot(rast2017, col=colors2017, main = "2017 Landsat classification", legend=FALSE)

# add custom legend
par(mar = c(0, 0, 0, 0), 
     new = TRUE)
plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n')
legend("bottom",
       legend=landcover_class$name, fill=colors2018, 
       cex = 0.8, 
       title="land cover classes",
       ncol = 2)
dev.off()

## area-wise calculations
sz2018 <- cellSize(rast2018, unit = "m")
area2018 <- zonal(sz2018, rast2018, sum)
area2018SqKm <- (area2018$area / 1000000)
area2018 <- cbind(area2018, area2018SqKm)

sz2017 <- cellSize(rast2017, unit = "m")
area2017 <- zonal(sz2017, rast2017, sum)
area2017SqKm <- (area2017$area / 1000000)
area2017 <- cbind(area2017, area2017SqKm)

# Joining all dataframes to create a single one for plotting and visualization
names(area2017) <- c("class", "areaInMeters","sumArea")
names(area2018) <- c("class", "areaInMeters","sumArea")

areaCalc <- purrr::reduce(list(
  area2017, 
  area2018
), dplyr::full_join, by = "class")

names(areaCalc) <- c("class", "areaMt2017", "2017", "areaMt2018", "2018")

areaCalc <- areaCalc %>%
  dplyr::select("class", "2017", "2018") %>%
  pivot_longer(!class, names_to = "Year", values_to = "Area")

write.csv(areaCalc, "results/totalArea-by-landCover-2018-vs-2017.csv", row.names = F)

Comparing the 2018 Sentinel classification to the 2017 Landsat classification of the same area revealed no major differences