Section 2 Exploring point-count data
In this script, we will explore the point-count data to understand bird species diversity patterns across seasons. For this analysis, we rely on two seasons of point-count data, that was carried out in June 2024 and Dec 2024.
## Install required libraries
library(tidyverse)
library(dplyr)
library(stringr)
library(ggplot2)
library(data.table)
library(extrafont)
library(sf)
library(raster)
# for plotting
library(scales)
library(ggplot2)
library(ggspatial)
library(colorspace)
library(scico)
# Source any custom/other internal functions necessary for analysis
source("code/01_internal-functions.R")
2.1 Loading point-count data
# load two seasons of point-count data
point_counts <- read.csv("data/point-count-data.csv")
# removing all mammal species and unidentified bird species
point_counts <- point_counts %>%
filter(birdMamm == "Bird")
# Using gsub to remove text within parentheses including the parentheses
point_counts$eBirdIndiaName2023 <- gsub("\\s*\\([^\\)]+\\)", "", point_counts$eBirdIndiaName2023)
# add species scientific name to the point-count data
sci_name <- read.csv("data/species-taxonomy.csv")
sci_name$eBirdIndiaName2023 <- gsub("\\s*\\([^\\)]+\\)", "", sci_name$eBirdIndiaName2023)
# add scientific_name to the point_count data
point_counts <- left_join(point_counts[,-7], sci_name[,-c(1,2,4)])
# rename columns
names(point_counts)[4] <- "start_time"
names(point_counts)[7] <- "common_name"
names(point_counts)[12] <- "scientific_name"
# write to file
write.csv(point_counts, "results/cleaned-point-count-data.csv", row.names = F)
2.2 Load site level information and create gridded data
# we write a function below to create a grid around the centroid of each location of size 1ha
# the grid is primarily created for visualizing species richness & other associated metrics (this step is optional)
# function to create square grid around a point
create_square_grid <- function(lon, lat, size_ha = 1) {
# convert hectares to degrees (approximate conversion)
# 1 ha = 100m x 100m
# at the equator, 1 degree is approximately 111,320 meters
# so we need to convert 100m to degrees
size_degrees <- sqrt(size_ha) * 100 / 111320
# Create a square polygon
coords <- matrix(c(
lon - size_degrees/2, lat - size_degrees/2, # bottom left
lon + size_degrees/2, lat - size_degrees/2, # bottom right
lon + size_degrees/2, lat + size_degrees/2, # top right
lon - size_degrees/2, lat + size_degrees/2, # top left
lon - size_degrees/2, lat - size_degrees/2 # close the polygon
), ncol = 2, byrow = TRUE)
# Create polygon
pol <- st_polygon(list(coords))
return(pol)
}
# function to obtain the latitude and longitude from a .csv file and create an associated shapefile
create_grid_shapefile <- function(csv_path, output_shp, lon_col = "longitude", lat_col = "latitude") {
# read CSV file
data <- read.csv(csv_path)
# create list to store polygons
polygons <- list()
# create square grid for each point
for(i in 1:nrow(data)) {
polygons[[i]] <- create_square_grid(
data[[lon_col]][i],
data[[lat_col]][i]
)
}
# convert to sf object
grid_sf <- st_sf(
# Include original data
data,
# Convert polygon list to geometry
geometry = st_sfc(polygons, crs = 4326)
)
# write to shapefile
st_write(grid_sf, output_shp, driver = "ESRI Shapefile", append = FALSE)
return(grid_sf)
}
# site-level information
sites <- "data/sites.csv"
grid_shp <- "data/candura_grids.shp"
# Create the grid shapefile
grid_sf <- create_grid_shapefile(
csv_path = sites,
output_shp = grid_shp,
lon_col = "decimalLongitude", # replace with your longitude column name
lat_col = "decimalLatitude" # replace with your latitude column name
)
2.3 Extract species-level information
# estimate abundance across all species for each grid and season
abundance <- point_counts %>%
group_by(gridID, common_name, seasonYear) %>%
summarise(abundance = sum(number)) %>%
ungroup()
# unique species observed across seasons
# in summer 82 unique species were detected
summer_species <- abundance %>%
filter(seasonYear == "2024 Summer") %>%
distinct(common_name)
# 89 unique species were detected in the winter
winter_species <- abundance %>%
filter(seasonYear == "2024 Winter") %>%
distinct(common_name)
# overall list of unique species detected in point counts
# 107 unique species were detected across both seasons
pc_species <- abundance %>%
group_by(common_name) %>%
summarise(cumulative_abundance = sum(abundance)) %>%
left_join(., sci_name[,-c(1,2,4)],
by = c("common_name" = "eBirdIndiaName2023")) %>%
rename(., scientific_name = eBirdScientificName2023) %>%
arrange(desc(cumulative_abundance))
# write to file
write.csv(pc_species, "results/species-in-point-counts.csv",
row.names = F)
# total abundance by species for each season across grids
totAbundance_by_season <- abundance %>%
group_by(common_name, seasonYear) %>%
summarise(totAbundance = sum(abundance))
# species most abundant across seasons include the Yellow-browed Bulbul, Southern Hill Myna, Crimson-backed Sunbird, Greater Racket-tailed Drongo
# estimate richness for point count data (calculated for each site)
richness <- abundance %>%
mutate(forRichness = case_when(abundance > 0 ~ 1)) %>%
group_by(gridID, seasonYear) %>%
summarise(richness = sum(forRichness)) %>%
ungroup()
2.4 Visualizing richness data across seasons
# First, join your species data with the spatial data
grid_with_richness <- grid_sf %>%
left_join(richness, by = "gridID")
# Create the plot
fig_richness_by_season <- ggplot() +
geom_sf(data = grid_with_richness, aes(fill = richness),
alpha=0.9) +
geom_sf_text(data = grid_with_richness,
aes(label = paste(gridID, "\n", richness)),
size = 2) +
facet_wrap(~seasonYear) +
scale_fill_scico(palette = "lajolla")+
theme_minimal() +
labs(fill = "Species\nRichness",
title = "Species Richness by Season") +
theme(text = element_text(family = "Century Gothic", size = 14, face = "bold"),plot.title = element_text(family = "Century Gothic",
size = 14, face = "bold"),
plot.subtitle = element_text(family = "Century Gothic",
size = 14, face = "bold",color="#1b2838"),
axis.title = element_blank())
ggsave("figs/fig_richness_by_season.png", plot = fig_richness_by_season, width = 14, height = 7, units = "in",
dpi = 300, bg = "white")
dev.off()

Species richness by season
2.5 Visualizing abundance by species across seasons
# join abundance data with the spatial data
grid_with_abundance <- grid_sf %>%
left_join(abundance, by = "gridID")
# first, let's create a base grid that has all possible combinations
base_grid <- grid_with_abundance %>%
dplyr::select(gridID, geometry) %>%
distinct()
# get unique seasons
seasons <- unique(grid_with_abundance$seasonYear)
# create a complete grid with all combinations
complete_grid <- base_grid %>%
crossing(seasonYear = seasons) %>%
st_sf()
# get unique species list
species_list <- unique(grid_with_abundance$common_name)
# create PDF
pdf("figs/species_by_season_abundance.pdf", width = 14, height = 7)
# loop through each species
for(sp in species_list) {
# get data for this species
current_species_data <- grid_with_abundance %>%
filter(common_name == sp) %>%
dplyr::select(gridID, seasonYear, abundance) %>%
st_drop_geometry()
# join with complete grid
plot_data <- complete_grid %>%
left_join(current_species_data, by = c("gridID", "seasonYear"))
# Create plot
p <- ggplot() +
geom_sf(data = plot_data, aes(fill = abundance),
alpha = 0.9) +
geom_sf_text(data = plot_data,
aes(label = paste(gridID, "\n",
ifelse(is.na(abundance),
"0", abundance))),
size = 2) +
facet_wrap(~seasonYear) +
scale_fill_scico(palette = "lajolla",
na.value = "grey80")+
theme_minimal() +
theme(text = element_text(family = "Century Gothic", size = 14, face = "bold"),plot.title = element_text(family = "Century Gothic",
size = 14, face = "bold"),
plot.subtitle = element_text(family = "Century Gothic",
size = 14, face = "bold",color="#1b2838"),
axis.title = element_blank()) +
labs(fill = "Abundance",
title = paste("Abundance of", sp, "by Season"))
print(p)
}
dev.off()