Section 13 r sq vs. species traits and calling rates

In this script, we will plot the adjusted r squared values (derived from the abundance vs. detections script) against species-specific traits.

13.1 Load necessary libraries

library(tidyverse)
library(dplyr)
library(stringr)
library(vegan)
library(ggplot2)
library(scico)
library(psych)
library(ecodist)
library(RColorBrewer)
library(ggforce)
library(ggpubr)
library(ggalt)
library(patchwork)
library(sjPlot)
library(ggside)
library(ggstatsplot)
library(extrafont)
library(ggrepel)

# Source any custom/other internal functions necessary for analysis
source("code/01_internal-functions.R")

13.2 Load species trait data and frequency data

trait <- read.csv("data/species-trait-dat.csv")
freq <- read.csv("data/frequency-data.csv")

13.3 Load dataframe containing point count and acoustic data

datSubset <- read.csv("results/datSubset.csv")

13.4 Load adjusted r squared values from previous script

r_sq <- read.csv("results/abundance-detections-regressions.csv")

13.5 Body mass and R squared values

Are species of a certain body mass showing stronger/poorer R squared values (between abundance and acoustic detections)?

r_sq_trait <- left_join(r_sq, trait, by = "scientific_name")

# log transform body mass
r_sq_trait$log_mass <- log10(r_sq_trait$mass)

# visualization
fig_bodyMass_rSqValue <- ggplot(r_sq_trait, aes(x=log_mass,y=r_sq)) +  geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+ 
  geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +  
  theme_bw() +
  stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
                        size = 8) +
  stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
                        size = 8) +
  labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)", 
       x="Log mass\n") +
  theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",
      size = 18, face = "bold"),
      plot.subtitle = element_text(family = "Century Gothic", 
      size = 15, face = "bold",color="#1b2838"),
      axis.title = element_text(family = "Century Gothic",
      size = 18, face = "bold"))

ggsave(fig_bodyMass_rSqValue, filename = "figs/fig_bodyMass_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off() 
Weak fit - in other words, larger-bodied birds do not necessarily have a stronger fit between acoustic detections and abundance (from point count data)
Weak fit - in other words, larger-bodied birds do not necessarily have a stronger fit between acoustic detections and abundance (from point count data)

13.6 Median peak frequency and R squared values

# Only a total of 87 species are left after filtering species with very few templates
nTemplates_10 <- freq %>%
  group_by(eBird_codes) %>%
  count() %>%
  filter(n >= 10)

# left-join to remove species with less than 10 templates in the frequency dataset
freq_10 <- left_join(nTemplates_10[,1], freq)

# calculate median peak frequency
median_pf <- freq_10 %>%
  group_by(eBird_codes) %>%
  summarise(median_peak_freq =  median(peak_freq_in_Hz))

# add scientific_name to median_pf
median_pf <- left_join(median_pf, trait[,c(1,4)], by = "eBird_codes")

# join r sq and median pf dataframes
r_sq_freq <- left_join(r_sq, median_pf, by = "scientific_name")

# visualization
fig_medianPeakFreq_rSqValue <- ggplot(r_sq_freq, aes(x=median_peak_freq,y=r_sq)) +  geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+ 
  geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +  
  theme_bw() +
  stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
                        size = 8) +
  stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
                        size = 8) +
  labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)", 
       x="Median Peak Frequency\n") +
  theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",
      size = 18, face = "bold"),
      plot.subtitle = element_text(family = "Century Gothic", 
      size = 15, face = "bold",color="#1b2838"),
      axis.title = element_text(family = "Century Gothic",
      size = 18, face = "bold"))

ggsave(fig_medianPeakFreq_rSqValue, filename = "figs/fig_medianPeakFrequency_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off() 
No association between adjusted RSq values and median peak frequency
No association between adjusted RSq values and median peak frequency

13.7 Median peak amplitude and R squared values

# calculate median peak amplitude
# we extract only the median of the 95th percentile of values
median_amp <- freq_10 %>%
  group_by(eBird_codes) %>%
  filter(peak_amp >= quantile(peak_amp, 0.95, na.rm = T)) %>%
  summarise(median_peak_amp = median(peak_amp))

# add scientific_name to median_amp
median_amp <- left_join(median_amp, trait[,c(1,4)], by = "eBird_codes")

# join r sq and median amplitude dataframes
r_sq_amp <- left_join(r_sq, median_amp, by = "scientific_name")

# visualization
fig_medianPeakAmplitude_rSqValue <- ggplot(r_sq_amp, aes(x=median_peak_amp,y=r_sq)) +  geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+ 
  geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +  
  theme_bw() +
  stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
                        size = 8) +
  stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
                        size = 8) +
  labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)", 
       x="Median Peak Amplitude\n") +
  theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",
      size = 18, face = "bold"),
      plot.subtitle = element_text(family = "Century Gothic", 
      size = 15, face = "bold",color="#1b2838"),
      axis.title = element_text(family = "Century Gothic",
      size = 18, face = "bold"))

ggsave(fig_medianPeakAmplitude_rSqValue, filename = "figs/fig_medianPeakAmplitude_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off() 
No association between adjusted RSq values and median peak amplitude
No association between adjusted RSq values and median peak amplitude

13.8 Multiple regressions between adjusted R Squared values and species traits

We run multiple linear regressions to ask if a particular species trait is more predictive of a stronger association between acoustic detections and abundance (as derived from point count data).

# prep dataframe that includes all adjusted r squared values and species traits

reg_data <- left_join(r_sq_trait, r_sq_freq[,c(1,8)], 
                      by = "scientific_name") %>%
  left_join(., r_sq_amp[,c(1,8)], by = "scientific_name")

# scale predictors prior to running regression
reg_data$mass_scale <- scale(reg_data$mass)
reg_data$freq_scale <- scale(reg_data$median_peak_freq)
reg_data$amp_scale <- scale(reg_data$median_peak_amp)

# run multiple linear regression
mult_regress <- lm(r_sq ~ mass_scale + freq_scale + amp_scale,
                   data = reg_data)

Summary of the regression analysis:

Call:
lm(formula = r_sq ~ mass_scale + freq_scale + amp_scale, data = reg_data)

Residuals:
Min 1Q Median 3Q Max
-0.2278 -0.1488 -0.0544 0.1280 0.4731

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.16026 0.02961 5.41 2.8e-06 ***
mass_scale -0.03540 0.03428 -1.03 0.31
freq_scale 0.01898 0.03587 0.53 0.60
amp_scale -0.00583 0.03057 -0.19 0.85

Residual standard error: 0.201 on 42 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.0581, Adjusted R-squared: -0.0092
F-statistic: 0.863 on 3 and 42 DF, p-value: 0.468

13.9 Calling rate and R squared values

How does the coefficient of determination (tightness of fit between acoustic detections and abundance from point count data) compare to the calling rate? Here, calling rate for each species is defined as the total acoustic detections divided by the total number of individuals (from point counts). We aim to ask if species that have higher calling rate per individual have a stronger relationship between acoustics and point count data.

# point-count data
# estimate total abundance across all species for each site
abundance <- datSubset %>%
  filter(data_type == "point_count") %>%
  group_by(site_id, restoration_type, scientific_name,
           common_name, eBird_codes) %>% 
  summarise(abundance_pc = sum(number)) %>%
  ungroup()

# estimate total number of detections across the acoustic data
# note: we cannot call this abundance as it refers to the total number of vocalizations across all sites
detections <- datSubset %>%
  filter(data_type == "acoustic_data") %>%
  group_by(site_id, restoration_type, scientific_name,
           common_name, eBird_codes) %>% 
  summarise(detections_aru = sum(number)) %>%
  ungroup()

# create a single dataframe
data <- full_join(abundance, detections)%>%
  replace_na(list(abundance_pc = 0, detections_aru = 0)) 

# identifying species that need to be kept
# only those species that have a minimum abundance value of 10 and minimum detection value of 10
spp_subset <-  data %>%
  group_by(scientific_name) %>%
  summarise(abundance_pc = sum(abundance_pc), detections_aru = sum(detections_aru)) %>%
  ungroup() %>%
  filter(abundance_pc >=10 & detections_aru >= 10)

# subset data
dat_subset <- data %>%
  filter(scientific_name %in% spp_subset$scientific_name) 

# summarizing data (to join with the dataframe on R sq values)
dat_subset <- dat_subset %>%
  group_by(scientific_name, common_name, eBird_codes) %>%
  summarise(abundance_pc = sum(abundance_pc), detections_aru = sum(detections_aru)) %>%
  ungroup()

# join with r squared dataframe
r_sq_callingRate <- left_join(r_sq, dat_subset, by = "scientific_name")

# extract calling rate
r_sq_callingRate$calling_rate <- r_sq_callingRate$detections_aru/r_sq_callingRate$abundance_pc

## visualization
fig_calling_rate_rSq <- ggplot(r_sq_callingRate, aes(x=calling_rate,y=r_sq)) +  geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+ 
  geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +  
  theme_bw() +
  stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
                        size = 6, family = "Century Gothic") +
  stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
                        size = 6, family = "Century Gothic") +
  labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)", 
       x="Calling rate\n") +
  geom_text_repel(aes(label = scientific_name),family = "Century Gothic", fontface = "italic")+
  theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",size = 18, face = "bold"),
      plot.subtitle = element_text(family = "Century Gothic", 
      size = 15, face = "bold",color="#1b2838"),
      axis.title = element_text(family = "Century Gothic",
      size = 18, face = "bold"))

ggsave(fig_calling_rate_rSq, filename = "figs/fig_callingRate_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off() 
No clear association between adjusted RSq values and calling rate
No clear association between adjusted RSq values and calling rate

13.10 Average abundance and average detections and R sq

Here, we ask if the coefficient of determination between acoustic detections and abundance (from point count data) varies as a function of proportion of visits across which a species was detected in point count data and proportion of visits across which a species was detected in acoustic surveys.

# point-count data
nSitesDays_pc <- datSubset %>%
  filter(data_type == "point_count") %>%
  dplyr::select(site_id, date) %>%
  distinct() %>%
  arrange(site_id) %>%
  count(site_id) %>%
  rename(., "nVisits" = n)

avgAbundance <- left_join(abundance, nSitesDays_pc, by = "site_id") %>%
  mutate(avgAbundance = abundance_pc/nVisits)

# acoustic data
nSitesDays_aru <- datSubset %>%
  filter(data_type == "acoustic_data") %>%
  dplyr::select(site_id, date) %>%
  distinct() %>%
  arrange(site_id) %>%
  count(site_id) %>%
  rename(., "nVisits" = n)

avgDetections <- left_join(detections, nSitesDays_aru, 
                           by = "site_id") %>%
  mutate(avgDetections = detections_aru/nVisits)

# create a single dataframe
data <- full_join(avgAbundance[,-7], avgDetections[,-7])%>%
  replace_na(list(avgAbundance = 0, avgDetections = 0,
                  abundance_pc = 0, detections_aru = 0)) 

# identifying species that need to be kept
# only those species that have a minimum abundance value of 10 and minimum detection value of 10
spp_subset <-  data %>%
  group_by(scientific_name) %>%
  summarise(abundance_pc = sum(abundance_pc), detections_aru = sum(detections_aru)) %>%
  ungroup() %>%
  filter(abundance_pc >=10 & detections_aru >= 10)

# subset data
dat_subset <- data %>%
  filter(scientific_name %in% spp_subset$scientific_name) 

# summarizing data (to join with the dataframe on R sq values)
dat_subset <- dat_subset %>%
  group_by(scientific_name, common_name, eBird_codes) %>%
  summarise(avgAbundance = sum(avgAbundance), avgDetections = sum(avgDetections)) %>%
  ungroup()

# join with r squared dataframe
r_sq_avgData <- left_join(r_sq, dat_subset, by = "scientific_name")

## visualization
fig_avgAbundance_rSq <- ggplot(r_sq_avgData, aes(x=avgAbundance,y=r_sq)) +  geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+ 
  geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +  
  theme_bw() +
  stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
                        size = 6, family = "Century Gothic") +
  stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
                        size = 6, family = "Century Gothic") +
  labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)", 
       x="Average abundance\n") +
  geom_text_repel(aes(label = scientific_name),family = "Century Gothic", fontface = "italic")+
  theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",size = 18, face = "bold"),
      plot.subtitle = element_text(family = "Century Gothic", 
      size = 15, face = "bold",color="#1b2838"),
      axis.title = element_text(family = "Century Gothic",
      size = 18, face = "bold"))

ggsave(fig_avgAbundance_rSq, filename = "figs/fig_abundancePerVisit_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off() 

# visualization
fig_avgDetections_rSq <- ggplot(r_sq_avgData, aes(x=avgDetections,y=r_sq)) +  geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+ 
  geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +  
  theme_bw() +
  stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
                        size = 6, family = "Century Gothic") +
  stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
                        size = 6, family = "Century Gothic") +
  labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)", 
       x="Average detections\n") +
  geom_text_repel(aes(label = scientific_name),family = "Century Gothic", fontface = "italic")+
  theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",size = 18, face = "bold"),
      plot.subtitle = element_text(family = "Century Gothic", 
      size = 15, face = "bold",color="#1b2838"),
      axis.title = element_text(family = "Century Gothic",
      size = 18, face = "bold"))

ggsave(fig_avgDetections_rSq, filename = "figs/fig_detectionsPerVisit_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off() 
No clear association between adjusted RSq values and average abundance
No clear association between adjusted RSq values and average abundance
No clear association between adjusted RSq values and average detections
No clear association between adjusted RSq values and average detections