Section 6 Calling rates

In this script, we compute calling rates and test if the relationship between predicted counts of individuals and acoustic detections is governed by calling activity.

6.1 Load necessary libraries

library(tidyverse)
library(dplyr)
library(stringr)
library(vegan)
library(scico)
library(data.table)
library(extrafont)
library(sf)
library(raster)
library(scales)
library(ggplot2)
library(ggspatial)
library(colorspace)
library(lubridate)
library(naniar)
library(patchwork)
library(ggrepel)

6.2 Load previous model object and filtered data

load("results/glmm_model_results.RData")
summary_stats <- read.csv("results/glmm_model_abundance_detections.csv")
data <- read.csv("results/datSubset.csv")

# we will also load the unfiltered acoustic data and point count data for calling rate calculation
unfiltered_data <- read.csv("results/pooled_pointCount_acoustic_data.csv")

6.3 Calculating calling rate

Here, we estimate species-specific calling rate as the detections per minute per species and the corresponding visit.

# we work with the unfiltered data first to extract a detections per minute column for each species per visit
detections_per_minute <- unfiltered_data %>%
  filter(data_type == "acoustic_data") %>%
  mutate(minute = floor_date(as.POSIXct(begin_clock_time, format = "%H:%M:%S"), "minute")) %>%
  group_by(site_id, site_name, year, visit_number,
    scientific_name, common_name, 
    minute) %>%
  summarise(detections_per_minute = n(), .groups = 'drop')

# we work with the unfiltered data first to extract a detections per 30s column for each species per visit
# Alternative method using a custom 30-second flooring function
detections_per_30s <- unfiltered_data %>%
  filter(data_type == "acoustic_data") %>%
  mutate(
    # Convert to POSIXct
    time_posix = as.POSIXct(begin_clock_time, format = "%H:%M:%S"),
    # Floor to the nearest 30-second interval
    bin_30s = floor_date(time_posix, "30 seconds")
  ) %>%
  group_by(site_id, site_name, year, visit_number,
    scientific_name, common_name, 
    bin_30s) %>%
  summarise(detections_per_30s = n(), .groups = 'drop')

# let's add the detections per minute or rate of detections for a species per visit back to the filtered data
data <- left_join(data, detections_per_30s) %>%
    replace_na(list(abundance = 0, detections_per_30s = 0))

# visualize calling rate across species
fig_callingRate <- data %>%
  filter(common_name != "Scarlet Tanager") %>% # since it was excluded from the GLMM
  ggplot(., aes(x = common_name, 
                              y = detections_per_30s)) +
  geom_boxplot(fill = "#D29C44FF") +  
  #facet_wrap(~site_name, scales ="free") +          
  theme_bw(base_family = "Century Gothic") +    
  theme(
    plot.title = element_text(size = 18, face = "bold"),
    axis.title = element_text(size = 16, face = "bold"),
    axis.text.x = element_text(
      size = 12,
      angle = 45,             
      hjust = 1,              
      face = "italic"         
    ),
    axis.text.y = element_text(size = 14),
    strip.text = element_text(size = 12, face = "bold")
  ) +
  labs(
    x = "Common Name",         
    y = "Calling rate (total detections per minute per visit)",        
    title = "Calling Rate by Species" 
  )

# Hermit thrush has the highest mean rate of calling per minute across the list of 21 species
ggsave(fig_callingRate, filename = "figs/callingRates-by-species.png", width = 12, height = 9, device = png(), units = "in", dpi = 300)
dev.off()

# Load required libraries
library(dplyr)
library(ggplot2)
library(tidyr)
library(lubridate)
library(patchwork)
library(viridis)

# Function to calculate detection rates for different temporal bins
calculate_detection_rates <- function(data, bin_durations_sec) {
  
  results <- data.frame()
  
  for(bin_sec in bin_durations_sec) {
    
    # Convert bin duration to appropriate label
    if(bin_sec < 60) {
      bin_label <- paste0(bin_sec, "s")
    } else if(bin_sec < 3600) {
      bin_label <- paste0(bin_sec/60, "min")
    } else {
      bin_label <- paste0(bin_sec/3600, "hr")
    }
    
    # Create time bins based on the current bin duration
    data_binned <- data %>%
      filter(data_type == "acoustic_data") %>%
      mutate(
        # Convert time to seconds from start of day for easier binning
        time_seconds = as.numeric(as.POSIXct(begin_clock_time, 
                                             format = "%H:%M:%S") - 
                                 as.POSIXct("00:00:00", format = "%H:%M:%S")),
        # Create time bins
        time_bin = floor(time_seconds / bin_sec),
        bin_duration_sec = bin_sec,
        bin_label = bin_label
      ) %>%
      group_by(site_id, site_name, year, visit_number,
               scientific_name, common_name, 
               time_bin, bin_duration_sec, bin_label) %>%
      summarise(
        detections_per_bin = n(),
        bin_start_time = min(time_seconds),
        .groups = 'drop'
      ) %>%
      mutate(
        # Convert to detections per minute for comparison
        detections_per_minute = detections_per_bin / (bin_duration_sec / 60)
      )
    
    results <- rbind(results, data_binned)
  }
  
  return(results)
}

# Function to calculate summary statistics for sensitivity analysis
calculate_sensitivity_stats <- function(binned_data) {
  
  # Calculate statistics for each species and bin duration
  summary_stats <- binned_data %>%
    group_by(scientific_name, common_name, bin_duration_sec, bin_label) %>%
    summarise(
      mean_rate = mean(detections_per_minute, na.rm = TRUE),
      median_rate = median(detections_per_minute, na.rm = TRUE),
      sd_rate = sd(detections_per_minute, na.rm = TRUE),
      cv_rate = sd(detections_per_minute, na.rm = TRUE) / mean(detections_per_minute, na.rm = TRUE),
      q25_rate = quantile(detections_per_minute, 0.25, na.rm = TRUE),
      q75_rate = quantile(detections_per_minute, 0.75, na.rm = TRUE),
      iqr_rate = IQR(detections_per_minute, na.rm = TRUE),
      n_bins = n(),
      n_zero_bins = sum(detections_per_bin == 0),
      prop_zero_bins = n_zero_bins / n_bins,
      .groups = 'drop'
    ) %>%
    # Remove infinite CV values
    mutate(cv_rate = ifelse(is.infinite(cv_rate) | is.nan(cv_rate), NA, cv_rate))
  
  return(summary_stats)
}

# Define temporal bin durations to test (in seconds)
bin_durations <- c(30, 60, 120, 240, 300, 600)  # 30s to 20min
bin_labels <- c("30s", "1min", "2min", "4min", "5min", "10min")

# Calculate detection rates for all bin durations
cat("Calculating detection rates for different temporal bins...\n")
unfiltered_data <- unfiltered_data %>%
  semi_join(data, by = "common_name")

binned_detection_data <- calculate_detection_rates(unfiltered_data, bin_durations)

# Calculate summary statistics
summary_stats <- calculate_sensitivity_stats(binned_detection_data)

# Filter out Scarlet Tanager to match your analysis
summary_stats_filtered <- summary_stats %>%
  filter(common_name != "Scarlet Tanager")

# 1. Heatmap showing coefficient of variation (precision) across species and bin durations
p1_cv <- summary_stats_filtered %>%
  ggplot(aes(x = factor(bin_duration_sec, levels = bin_durations, labels = bin_labels), 
             y = reorder(common_name, mean_rate))) +
  geom_tile(aes(fill = cv_rate), color = "white", size = 0.1) +
  scale_fill_viridis_c(name = "Coefficient of\nVariation", 
                       option = "plasma",
                       na.value = "grey90",
                       trans = "log10") +
  labs(
    title = "Precision Analysis: Coefficient of Variation by Temporal Bin Duration",
    subtitle = "Lower values (darker) indicate more consistent detection rate estimates",
    x = "Temporal Bin Duration",
    y = "Species (ordered by mean calling rate)"
  ) +
  theme_minimal(base_family = "Century Gothic") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(size = 8, face = "italic"),
    panel.grid = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12)
  )

# 2. Heatmap showing proportion of zero detection bins
p2_zeros <- summary_stats_filtered %>%
  ggplot(aes(x = factor(bin_duration_sec, levels = bin_durations, labels = bin_labels), 
             y = reorder(common_name, mean_rate))) +
  geom_tile(aes(fill = prop_zero_bins), color = "white", size = 0.1) +
  scale_fill_viridis_c(name = "Proportion\nZero Bins", 
                       option = "viridis",
                       direction = -1) +
  labs(
    title = "Data Sparsity Analysis: Proportion of Zero-Detection Bins",
    subtitle = "Darker colors indicate fewer zero-detection bins (better data density)",
    x = "Temporal Bin Duration",
    y = "Species (ordered by mean calling rate)"
  ) +
  theme_minimal(base_family = "Century Gothic") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(size = 8, face = "italic"),
    panel.grid = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12)
  )

# 3. Line plots showing how detection rates change with bin duration for each species
p3_rates <- summary_stats_filtered %>%
  ggplot(aes(x = bin_duration_sec, y = mean_rate, group = common_name)) +
  geom_line(alpha = 0.7, color = "#D29C44FF") +
  geom_point(alpha = 0.8, color = "#D29C44FF") +
  facet_wrap(~common_name, scales = "free_y", ncol = 4) +
  scale_x_continuous(
    breaks = bin_durations[c(1,2,3,4,5,6)],
    labels = bin_labels[c(1,2,3,4,5,6)]
  ) +
  labs(
    title = "Mean Detection Rate by Temporal Bin Duration",
    subtitle = "Individual species responses to different temporal resolutions",
    x = "Temporal Bin Duration",
    y = "Mean Detection Rate\n(detections per minute)"
  ) +
  theme_minimal(base_family = "Century Gothic") +
  theme(
    strip.text = element_text(size = 8, face = "italic"),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 7),
    axis.text.y = element_text(size = 7),
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12)
  )

# 4. Summary metrics across all species
overall_summary <- summary_stats_filtered %>%
  group_by(bin_duration_sec, bin_label) %>%
  summarise(
    mean_cv = mean(cv_rate, na.rm = TRUE),
    median_cv = median(cv_rate, na.rm = TRUE),
    mean_prop_zeros = mean(prop_zero_bins, na.rm = TRUE),
    mean_n_bins = mean(n_bins, na.rm = TRUE),
    n_species = n(),
    .groups = 'drop'
  )

p4_summary <- overall_summary %>%
  pivot_longer(cols = c(mean_cv, mean_prop_zeros), 
               names_to = "metric", values_to = "value") %>%
  mutate(
    metric = case_when(
      metric == "mean_cv" ~ "Mean Coefficient of Variation",
      metric == "mean_prop_zeros" ~ "Mean Proportion Zero Bins"
    )
  ) %>%
  ggplot(aes(x = factor(bin_duration_sec, 
                        levels = bin_durations, labels = bin_labels), 
             y = value, group = metric, color = metric)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  facet_wrap(~metric, scales = "free_y") +
  scale_color_manual(values = c("#D29C44FF", "#2E8B57")) +
  labs(
    title = "Overall Performance Metrics by Temporal Bin Duration",
    subtitle = "Lower values generally indicate better performance",
    x = "Temporal Bin Duration",
    y = "Metric Value"
  ) +
  theme_minimal(base_family = "Century Gothic") +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12)
  )

# Display all plots
print(p1_cv)
print(p2_zeros)
print(p3_rates)
print(p4_summary)

# Print recommendations
cat("\n=== SENSITIVITY ANALYSIS RESULTS ===\n")
cat("Temporal bin durations tested:", paste(bin_labels, collapse = ", "), "\n\n")

# Find optimal durations based on different criteria
optimal_precision <- overall_summary %>% 
  filter(!is.na(mean_cv)) %>%
  slice_min(mean_cv, n = 1)

optimal_sparsity <- overall_summary %>% 
  slice_min(mean_prop_zeros, n = 1)

cat("Best precision (lowest CV):", optimal_precision$bin_label, "\n")
cat("Best data density (fewest zero bins):", optimal_sparsity$bin_label, "\n")

# Balanced recommendation
balanced_score <- overall_summary %>%
  mutate(
    norm_cv = scale(mean_cv)[,1],
    norm_zeros = scale(mean_prop_zeros)[,1],
    combined_score = norm_cv + norm_zeros
  ) %>%
  slice_min(combined_score, n = 1)

cat("Recommended duration (balanced performance):", balanced_score$bin_label, "\n")
cat("\nYour current choice of 1 minute ranks #", 
    which(overall_summary$bin_duration_sec == 60), 
    "out of", nrow(overall_summary), "for balanced performance.\n")

# Show the summary table
print(overall_summary)

6.4 Does calling rate vary as a function of number of individuals

Here, we expect that species-specific calling rate would increase with higher counts of individuals at a site. Alternately, there is no increase in the number of individuals and the same individual might produce no variation in calling rates per minute and we may see no relationship between the two.

# visualize detections per minute vs abundance by species
fig_det_rate_abundance <- data %>%
  filter(common_name != "Scarlet Tanager") %>% # since it was excluded from the GLMM
  ggplot(., aes(x = abundance, 
                y = detections_per_minute)) +
  geom_point(color = "#D29C44FF", alpha = 0.7, size = 2) +  
  geom_smooth(method = "lm", se = TRUE, color = "black", linewidth = 1) +
  facet_wrap(~common_name, scales = "free") +          
  theme_bw(base_family = "Century Gothic") +    
  theme(
    plot.title = element_text(size = 18, face = "bold"),
    axis.title = element_text(size = 16, face = "bold"),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    strip.text = element_text(size = 12, face = "bold")
  ) +
  labs(
    x = "Abundance",         
    y = "Calling rate",        
    title = "Calling rate vs Abundance by Species" 
  )

# several species show an increase in rate of detections per minute with increasing number of individuals
ggsave(fig_det_rate_abundance, filename = "figs/calling-rate-vs-abundance-by-species.png", width = 14, height = 10, device = png(), units = "in", dpi = 300)
dev.off()

6.5 Is calling rate linked to the fit between predicted abundance and acoustic detections?

Here, we ask if calling rate governs the strength of the relationship between predicted abundance and acoustic detections as estimated from the generalized linear mixed effect model (previous script)?

# extract the mean, median, and max calling rate per species
calling_rate_summary <- data %>%
  filter(common_name != "Scarlet Tanager") %>%
  group_by(common_name) %>%
  summarise(
    mean_detections_per_minute = mean(detections_per_minute, na.rm = TRUE),
    median_detections_per_minute = median(detections_per_minute, na.rm = TRUE),
    max_detections_per_minute = max(detections_per_minute, na.rm = TRUE),
    n_observations = n(),
    .groups = 'drop'
  ) 

# join the psuedo r2 values and significance of fit to a single dataframe
for_plot <- left_join(summary_stats[,c(1,11,12)],calling_rate_summary,
                    by = c("Species" = "common_name"))

# note: we remove species that have a ns or no significant fit/association between predicted abundance and acoustic detections

# plot mean detections
fig_pseudoR2_mean_detections <- for_plot %>%
  filter(Significance != "ns") %>%
  ggplot(., aes(x = mean_detections_per_minute, y = Pseudo_R2)) +
  geom_point(color = "#D29C44FF", size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = TRUE, color = "black", linewidth = 1) +
  geom_text_repel(aes(label = Species), 
                  size = 3, 
                  fontface = "italic",
                  max.overlaps = 15) +  
  theme_bw(base_family = "Century Gothic") +
  theme(
    plot.title = element_text(size = 18, face = "bold"),
    axis.title = element_text(size = 16, face = "bold"),
    axis.text = element_text(size = 12)
  ) +
  labs(
    x = "Mean Calling Rate (mean detections per minute)",
    y = "Pseudo R²",
    title = "Pseudo R² vs Mean Calling Rate Across Species"
  )

# negative association between pseudo R2 and mean detections per minute
ggsave(fig_pseudoR2_mean_detections, filename = "figs/pseudoR2-vs-mean-calling-rate.png", width = 12, height = 7, device = png(), units = "in", dpi = 300)
dev.off()

# max detections per minute 
fig_pseudoR2_max_detections <- for_plot %>%
  filter(Significance != "ns") %>%
  ggplot(., aes(x = max_detections_per_minute, y = Pseudo_R2)) +
  geom_point(color = "#D29C44FF", size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = TRUE, color = "black", linewidth = 1) +
  geom_text_repel(aes(label = Species), 
                  size = 3, 
                  fontface = "italic",
                  max.overlaps = 15) +  
  theme_bw(base_family = "Century Gothic") +
  theme(
    plot.title = element_text(size = 18, face = "bold"),
    axis.title = element_text(size = 16, face = "bold"),
    axis.text = element_text(size = 12)
  ) +
  labs(
    x = "Max Calling Rate (max detections per minute)",
    y = "Pseudo R²",
    title = "Pseudo R² vs Max Calling Rate Across Species"
  )

# negative association between pseudo R2 and max detections per minute
ggsave(fig_pseudoR2_max_detections, filename = "figs/pseudoR2-vs-max-calling-rate.png", width = 12, height = 7, device = png(), units = "in", dpi = 300)
dev.off()

-do it per hour as well? -then plot it against the number of individuals -