Section 13 r sq vs. species traits and calling rates
In this script, we will plot the adjusted r squared values (derived from the abundance vs. detections script) against species-specific traits.
13.1 Load necessary libraries
library(tidyverse)
library(dplyr)
library(stringr)
library(vegan)
library(ggplot2)
library(scico)
library(psych)
library(ecodist)
library(RColorBrewer)
library(ggforce)
library(ggpubr)
library(ggalt)
library(patchwork)
library(sjPlot)
library(ggside)
library(ggstatsplot)
library(extrafont)
library(ggrepel)
# Source any custom/other internal functions necessary for analysis
source("code/01_internal-functions.R")
13.5 Body mass and R squared values
Are species of a certain body mass showing stronger/poorer R squared values (between abundance and acoustic detections)?
r_sq_trait <- left_join(r_sq, trait, by = "scientific_name")
# log transform body mass
r_sq_trait$log_mass <- log10(r_sq_trait$mass)
# visualization
fig_bodyMass_rSqValue <- ggplot(r_sq_trait, aes(x=log_mass,y=r_sq)) + geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+
geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +
theme_bw() +
stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
size = 8) +
stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
size = 8) +
labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)",
x="Log mass\n") +
theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",
size = 18, face = "bold"),
plot.subtitle = element_text(family = "Century Gothic",
size = 15, face = "bold",color="#1b2838"),
axis.title = element_text(family = "Century Gothic",
size = 18, face = "bold"))
ggsave(fig_bodyMass_rSqValue, filename = "figs/fig_bodyMass_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off()

13.6 Median peak frequency and R squared values
# Only a total of 87 species are left after filtering species with very few templates
nTemplates_10 <- freq %>%
group_by(eBird_codes) %>%
count() %>%
filter(n >= 10)
# left-join to remove species with less than 10 templates in the frequency dataset
freq_10 <- left_join(nTemplates_10[,1], freq)
# calculate median peak frequency
median_pf <- freq_10 %>%
group_by(eBird_codes) %>%
summarise(median_peak_freq = median(peak_freq_in_Hz))
# add scientific_name to median_pf
median_pf <- left_join(median_pf, trait[,c(1,4)], by = "eBird_codes")
# join r sq and median pf dataframes
r_sq_freq <- left_join(r_sq, median_pf, by = "scientific_name")
# visualization
fig_medianPeakFreq_rSqValue <- ggplot(r_sq_freq, aes(x=median_peak_freq,y=r_sq)) + geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+
geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +
theme_bw() +
stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
size = 8) +
stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
size = 8) +
labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)",
x="Median Peak Frequency\n") +
theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",
size = 18, face = "bold"),
plot.subtitle = element_text(family = "Century Gothic",
size = 15, face = "bold",color="#1b2838"),
axis.title = element_text(family = "Century Gothic",
size = 18, face = "bold"))
ggsave(fig_medianPeakFreq_rSqValue, filename = "figs/fig_medianPeakFrequency_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off()

13.7 Median peak amplitude and R squared values
# calculate median peak amplitude
# we extract only the median of the 95th percentile of values
median_amp <- freq_10 %>%
group_by(eBird_codes) %>%
filter(peak_amp >= quantile(peak_amp, 0.95, na.rm = T)) %>%
summarise(median_peak_amp = median(peak_amp))
# add scientific_name to median_amp
median_amp <- left_join(median_amp, trait[,c(1,4)], by = "eBird_codes")
# join r sq and median amplitude dataframes
r_sq_amp <- left_join(r_sq, median_amp, by = "scientific_name")
# visualization
fig_medianPeakAmplitude_rSqValue <- ggplot(r_sq_amp, aes(x=median_peak_amp,y=r_sq)) + geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+
geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +
theme_bw() +
stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
size = 8) +
stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
size = 8) +
labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)",
x="Median Peak Amplitude\n") +
theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",
size = 18, face = "bold"),
plot.subtitle = element_text(family = "Century Gothic",
size = 15, face = "bold",color="#1b2838"),
axis.title = element_text(family = "Century Gothic",
size = 18, face = "bold"))
ggsave(fig_medianPeakAmplitude_rSqValue, filename = "figs/fig_medianPeakAmplitude_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off()

13.8 Multiple regressions between adjusted R Squared values and species traits
We run multiple linear regressions to ask if a particular species trait is more predictive of a stronger association between acoustic detections and abundance (as derived from point count data).
# prep dataframe that includes all adjusted r squared values and species traits
reg_data <- left_join(r_sq_trait, r_sq_freq[,c(1,8)],
by = "scientific_name") %>%
left_join(., r_sq_amp[,c(1,8)], by = "scientific_name")
# scale predictors prior to running regression
reg_data$mass_scale <- scale(reg_data$mass)
reg_data$freq_scale <- scale(reg_data$median_peak_freq)
reg_data$amp_scale <- scale(reg_data$median_peak_amp)
# run multiple linear regression
mult_regress <- lm(r_sq ~ mass_scale + freq_scale + amp_scale,
data = reg_data)
Summary of the regression analysis:
Call:
lm(formula = r_sq ~ mass_scale + freq_scale + amp_scale, data = reg_data)
Residuals:
Min 1Q Median 3Q Max
-0.2278 -0.1488 -0.0544 0.1280 0.4731
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.16026 0.02961 5.41 2.8e-06 ***
mass_scale -0.03540 0.03428 -1.03 0.31
freq_scale 0.01898 0.03587 0.53 0.60
amp_scale -0.00583 0.03057 -0.19 0.85
Residual standard error: 0.201 on 42 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.0581, Adjusted R-squared: -0.0092
F-statistic: 0.863 on 3 and 42 DF, p-value: 0.468
13.9 Calling rate and R squared values
How does the coefficient of determination (tightness of fit between acoustic detections and abundance from point count data) compare to the calling rate? Here, calling rate for each species is defined as the total acoustic detections divided by the total number of individuals (from point counts). We aim to ask if species that have higher calling rate per individual have a stronger relationship between acoustics and point count data.
# point-count data
# estimate total abundance across all species for each site
abundance <- datSubset %>%
filter(data_type == "point_count") %>%
group_by(site_id, restoration_type, scientific_name,
common_name, eBird_codes) %>%
summarise(abundance_pc = sum(number)) %>%
ungroup()
# estimate total number of detections across the acoustic data
# note: we cannot call this abundance as it refers to the total number of vocalizations across all sites
detections <- datSubset %>%
filter(data_type == "acoustic_data") %>%
group_by(site_id, restoration_type, scientific_name,
common_name, eBird_codes) %>%
summarise(detections_aru = sum(number)) %>%
ungroup()
# create a single dataframe
data <- full_join(abundance, detections)%>%
replace_na(list(abundance_pc = 0, detections_aru = 0))
# identifying species that need to be kept
# only those species that have a minimum abundance value of 10 and minimum detection value of 10
spp_subset <- data %>%
group_by(scientific_name) %>%
summarise(abundance_pc = sum(abundance_pc), detections_aru = sum(detections_aru)) %>%
ungroup() %>%
filter(abundance_pc >=10 & detections_aru >= 10)
# subset data
dat_subset <- data %>%
filter(scientific_name %in% spp_subset$scientific_name)
# summarizing data (to join with the dataframe on R sq values)
dat_subset <- dat_subset %>%
group_by(scientific_name, common_name, eBird_codes) %>%
summarise(abundance_pc = sum(abundance_pc), detections_aru = sum(detections_aru)) %>%
ungroup()
# join with r squared dataframe
r_sq_callingRate <- left_join(r_sq, dat_subset, by = "scientific_name")
# extract calling rate
r_sq_callingRate$calling_rate <- r_sq_callingRate$detections_aru/r_sq_callingRate$abundance_pc
## visualization
fig_calling_rate_rSq <- ggplot(r_sq_callingRate, aes(x=calling_rate,y=r_sq)) + geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+
geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +
theme_bw() +
stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
size = 6, family = "Century Gothic") +
stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
size = 6, family = "Century Gothic") +
labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)",
x="Calling rate\n") +
geom_text_repel(aes(label = scientific_name),family = "Century Gothic", fontface = "italic")+
theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",size = 18, face = "bold"),
plot.subtitle = element_text(family = "Century Gothic",
size = 15, face = "bold",color="#1b2838"),
axis.title = element_text(family = "Century Gothic",
size = 18, face = "bold"))
ggsave(fig_calling_rate_rSq, filename = "figs/fig_callingRate_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off()

13.10 Average abundance and average detections and R sq
Here, we ask if the coefficient of determination between acoustic detections and abundance (from point count data) varies as a function of proportion of visits across which a species was detected in point count data and proportion of visits across which a species was detected in acoustic surveys.
# point-count data
nSitesDays_pc <- datSubset %>%
filter(data_type == "point_count") %>%
dplyr::select(site_id, date) %>%
distinct() %>%
arrange(site_id) %>%
count(site_id) %>%
rename(., "nVisits" = n)
avgAbundance <- left_join(abundance, nSitesDays_pc, by = "site_id") %>%
mutate(avgAbundance = abundance_pc/nVisits)
# acoustic data
nSitesDays_aru <- datSubset %>%
filter(data_type == "acoustic_data") %>%
dplyr::select(site_id, date) %>%
distinct() %>%
arrange(site_id) %>%
count(site_id) %>%
rename(., "nVisits" = n)
avgDetections <- left_join(detections, nSitesDays_aru,
by = "site_id") %>%
mutate(avgDetections = detections_aru/nVisits)
# create a single dataframe
data <- full_join(avgAbundance[,-7], avgDetections[,-7])%>%
replace_na(list(avgAbundance = 0, avgDetections = 0,
abundance_pc = 0, detections_aru = 0))
# identifying species that need to be kept
# only those species that have a minimum abundance value of 10 and minimum detection value of 10
spp_subset <- data %>%
group_by(scientific_name) %>%
summarise(abundance_pc = sum(abundance_pc), detections_aru = sum(detections_aru)) %>%
ungroup() %>%
filter(abundance_pc >=10 & detections_aru >= 10)
# subset data
dat_subset <- data %>%
filter(scientific_name %in% spp_subset$scientific_name)
# summarizing data (to join with the dataframe on R sq values)
dat_subset <- dat_subset %>%
group_by(scientific_name, common_name, eBird_codes) %>%
summarise(avgAbundance = sum(avgAbundance), avgDetections = sum(avgDetections)) %>%
ungroup()
# join with r squared dataframe
r_sq_avgData <- left_join(r_sq, dat_subset, by = "scientific_name")
## visualization
fig_avgAbundance_rSq <- ggplot(r_sq_avgData, aes(x=avgAbundance,y=r_sq)) + geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+
geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +
theme_bw() +
stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
size = 6, family = "Century Gothic") +
stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
size = 6, family = "Century Gothic") +
labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)",
x="Average abundance\n") +
geom_text_repel(aes(label = scientific_name),family = "Century Gothic", fontface = "italic")+
theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",size = 18, face = "bold"),
plot.subtitle = element_text(family = "Century Gothic",
size = 15, face = "bold",color="#1b2838"),
axis.title = element_text(family = "Century Gothic",
size = 18, face = "bold"))
ggsave(fig_avgAbundance_rSq, filename = "figs/fig_abundancePerVisit_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off()
# visualization
fig_avgDetections_rSq <- ggplot(r_sq_avgData, aes(x=avgDetections,y=r_sq)) + geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+
geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid") +
theme_bw() +
stat_regline_equation(label.y = 0.55, aes(label = ..eq.label..),
size = 6, family = "Century Gothic") +
stat_regline_equation(label.y = 0.65, aes(label = ..rr.label..),
size = 6, family = "Century Gothic") +
labs(y="\nAdjusted R-Squared value between acoustic detections and abundance (point-counts)",
x="Average detections\n") +
geom_text_repel(aes(label = scientific_name),family = "Century Gothic", fontface = "italic")+
theme(text = element_text(family = "Century Gothic", size = 18, face = "bold"),plot.title = element_text(family = "Century Gothic",size = 18, face = "bold"),
plot.subtitle = element_text(family = "Century Gothic",
size = 15, face = "bold",color="#1b2838"),
axis.title = element_text(family = "Century Gothic",
size = 18, face = "bold"))
ggsave(fig_avgDetections_rSq, filename = "figs/fig_detectionsPerVisit_adjustedrSq.png", width = 14, height = 16, device = png(), units = "in", dpi = 300)
dev.off()

