Section 5 Resampling analysis

Using the previously generated .Rdata file, we resample checklists for three levels of sampling effort at different elevational bands.

5.1 Load .Rdata file containing elevation data across eBird sampling locations

Code

load("results/eBird_elev.RData")
dat <- as.data.frame(dat)
dat <- dat [,-27] # removing unnecessary columns

## include only full species
dat <- dat %>%
  filter(CATEGORY == "species" | CATEGORY == "issf")

# dividing eastern and western himalayas by the 83E longitude
datWest <- dat %>% filter (LONGITUDE < 83)
datEast <- dat %>% filter (LONGITUDE > 83)

5.2 Create resampled datasets

Code

# Creating resampled Dataset for the centre, lower, and upper limit of a species' elevational distribution for 3 levels of sampling effort (number of checklists)- separately for east and west 

## Eastern Himalayas

## Number of checklists in either season in each elevational band
Checklists <- datEast[!duplicated(datEast$group.id), ]
Checklists$elev_level <- cut(Checklists$elevation, breaks = c(-Inf, 500, 1000, 1500, 2000, 2500, 3000, Inf), labels = 1:7)
Checklists.S <- subset(Checklists, month %in% 5:8) # summer
Checklists.W <- subset(Checklists, month %in% c(1, 2, 12)) # winter

summer <- Checklists.S %>% 
  group_by(elev_level) %>% 
  summarise(summer = n_distinct(group.id))

winter <- Checklists.W %>% 
  group_by(elev_level) %>% 
  summarise(winter = n_distinct(group.id))

## if you want to output a table the number of checklists at each elevation band and season
CL_ES <- left_join(summer, winter, by = "elev_level")
levels(CL_ES$elev_level) <- c("0-500","500-1000","1000-1500","1500-2000","2000-2500","2500-3000",">3000")
write.csv(CL_ES, "results/ChecklistNo_SeasonElevation_East.csv", row.names = F)

## Sampling event IDs in each season and in each elevation band
ID.S <- lapply(1:7, function(x) Checklists.S$group.id[Checklists.S$elev_level == x])
ID.W <- lapply(1:7, function(x) Checklists.W$group.id[Checklists.W$elev_level == x])

## Get unique species list
uniSpe <- datEast %>% filter(CATEGORY == "species" | CATEGORY == "issf")
uniSpe <- unique(uniSpe[, "COMMON.NAME"]) %>% data.frame()

## Get first second and third quartile of effort (number of checklists) across season and elevation
efforts.S <- summary(Checklists.S$elev_level)
efforts.W <- summary(Checklists.W$elev_level)
qEffort <- quantile(c(efforts.S, efforts.W), c(0.25,0.50,0.75))

# resample for the lower limit (5th percentile), center (median), and upper limit (95th percentile) for a species elevational distribution
for (qt in c(0.05, 0.50, 0.95)) {
  #resample for the levels of sampling effort
  for (i in 1:3) {
    
## sample an equal number of checklists (3 levels of effort) from each elevation band 
set.seed(56789)
sampleID.S <- lapply(1:1000, function(y) {unlist(lapply(1:7, function(x) sample(ID.S[[x]], qEffort[i], replace=T)))})
set.seed(56789)
sampleID.W <- lapply(1:1000, function(y) {unlist(lapply(1:7, function(x) sample(ID.W[[x]], qEffort[i], replace=T)))})

# calculate the lower, median and upper elevation distribution for each bird species in the two seasons

# This step takes awhile
# We used parallel processing

  rs<- mclapply(1:1000, function(y) {
  m <- t(sapply(uniSpe[, 1], function(x){
    occs.S <- subset(datEast, COMMON.NAME ==x & group.id %in% sampleID.S[[y]], select="elevation")
    occs.W <- subset(datEast, COMMON.NAME==x & group.id %in% sampleID.W[[y]], select="elevation")
    B.n <- nrow(occs.S)
    W.n <- nrow(occs.W)
    B.elev <- if (B.n > 0) quantile(occs.S$elevation, qt) else NA
    W.elev <- if (W.n > 0) quantile(occs.W$elevation, qt) else NA
    return(c(B.elev = B.elev, B.n = B.n, W.elev = W.elev, W.n = W.n))
  }))
}, mc.cores = 7)
  save(rs,file = paste0("results/eBird_woNepal_resampled_east_", sprintf("%02d", qt*100),".q", i ,".RData"))
  }
}

## Repeat the above process for Western Himalayas

Checklists <- datWest[!duplicated(datWest$group.id), ]
Checklists$elev_level <- cut(Checklists$elevation, breaks = c(-Inf, 500, 1000, 1500, 2000, 2500, 3000, Inf), labels = 1:7)
Checklists.S <- subset(Checklists, month %in% 5:8)
Checklists.W <- subset(Checklists, month %in% c(1, 2, 12))

summer <- Checklists.S %>% group_by(elev_level) %>% summarise(summer = n_distinct(group.id))
winter <- Checklists.W %>% group_by(elev_level) %>% summarise(winter = n_distinct(group.id))

CL_ES <- left_join(summer,winter, by = "elev_level")
levels(CL_ES$elev_level) <-c("0-500","500-1000","1000-1500","1500-2000","2000-2500","2500-3000",">3000")
write.csv(CL_ES, "results/ChecklistNo_SeasonElevation_West.csv", row.names = F )

ID.S <- lapply(1:7, function(x) Checklists.S$group.id[Checklists.S$elev_level == x])
ID.W <- lapply(1:7, function(x) Checklists.W$group.id[Checklists.W$elev_level == x])

uniSpe <- datWest %>% filter(CATEGORY == "species" | CATEGORY == "issf")
uniSpe <- unique(uniSpe[, "COMMON.NAME"]) %>% data.frame()

efforts.S <- summary(Checklists.S$elev_level)
efforts.W <- summary(Checklists.W$elev_level)
qEffort <- quantile(c(efforts.S, efforts.W), c(0.25,0.50,0.75))

for (qt in c(0.05, 0.50, 0.95)) {
  for (i in 1:3) {
    
set.seed(56789)
sampleID.S <- lapply(1:1000, function(y) {unlist(lapply(1:7, function(x) sample(ID.S[[x]], qEffort[i], replace=T)))})
set.seed(56789)
sampleID.W <- lapply(1:1000, function(y) {unlist(lapply(1:7, function(x) sample(ID.W[[x]], qEffort[i], replace=T)))})

  rs<- mclapply(1:1000, function(y) {
    m <- t(sapply(uniSpe[, 1], function(x){
      occs.S <- subset(datWest, COMMON.NAME ==x & group.id %in% sampleID.S[[y]], select="elevation")
      occs.W <- subset(datWest, COMMON.NAME==x & group.id %in% sampleID.W[[y]], select="elevation")
      B.n <- nrow(occs.S)
      W.n <- nrow(occs.W)
      B.elev <- if (B.n > 0) quantile(occs.S$elevation, qt) else NA
      W.elev <- if (W.n > 0) quantile(occs.W$elevation, qt) else NA
      return(c(B.elev = B.elev, B.n = B.n, W.elev = W.elev, W.n = W.n))
    }))
  }, mc.cores = 7 )
  save(rs,file = paste0("results/eBird_woNepal_resampled_west_", sprintf("%02d", qt*100),".q", i,".RData"))
}
}