Section 4 Processing eBird data
In this script, we will process the community science data across the Eastern and Western Himalayas.
4.2 Loading custom functions to process eBird data
Code
# This function processes the eBird data as long as the path where the data is stored and list of countries are mentioned
readcleanrawdata = function(rawpath)
{
require(lubridate)
require(tidyverse)
require(cowplot)
preimp = c("COMMON.NAME","OBSERVATION.COUNT",
"LOCALITY.ID","LOCALITY.TYPE", "STATE", "COUNTRY",
"LATITUDE","LONGITUDE","OBSERVATION.DATE",
"TIME.OBSERVATIONS.STARTED","OBSERVER.ID",
"PROTOCOL.TYPE","DURATION.MINUTES","EFFORT.DISTANCE.KM",
"REVIEWED","NUMBER.OBSERVERS","ALL.SPECIES.REPORTED",
"GROUP.IDENTIFIER","SAMPLING.EVENT.IDENTIFIER","APPROVED",
"CATEGORY")
nms = read.delim(rawpath, nrows = 1, sep = "\t", header = T, quote = "", stringsAsFactors = F, na.strings = c(""," ",NA))
nms = names(nms)
nms[!(nms %in% preimp)] = "NULL"
nms[nms %in% preimp] = NA
data = read.delim(rawpath, colClasses = nms, sep = "\t", header = T, quote = "", stringsAsFactors = F, na.strings = c(""," ",NA))
## choosing important variables
imp = c("COMMON.NAME","OBSERVATION.COUNT",
"LOCALITY.ID","LOCALITY.TYPE", "STATE", "COUNTRY",
"LATITUDE","LONGITUDE","OBSERVATION.DATE",
"TIME.OBSERVATIONS.STARTED","OBSERVER.ID",
"PROTOCOL.TYPE","DURATION.MINUTES","EFFORT.DISTANCE.KM",
"SAMPLING.EVENT.IDENTIFIER",
"NUMBER.OBSERVERS","ALL.SPECIES.REPORTED","group.id",
"CATEGORY","no.sp")
days = c(31,28,31,30,31,30,31,31,30,31,30,31)
cdays = c(0,31,59,90,120,151,181,212,243,273,304,334)
## setup eBird data ##
## filter approved observations, species, slice by single group ID, remove repetitions
## remove repeats
## set date, add month, year and day columns using package LUBRIDATE
## filter distance travelled, duration birded and number of observers
## add number of species column (no.sp)
data = data %>%
filter(REVIEWED == 0 | APPROVED == 1) %>%
mutate(group.id = ifelse(is.na(GROUP.IDENTIFIER), SAMPLING.EVENT.IDENTIFIER, GROUP.IDENTIFIER)) %>%
filter(ALL.SPECIES.REPORTED == 1) %>% filter(PROTOCOL.TYPE == "Stationary"| PROTOCOL.TYPE == "Traveling")%>%
filter(EFFORT.DISTANCE.KM<=2.5|is.na(EFFORT.DISTANCE.KM))%>%
filter(DURATION.MINUTES <= 120)%>% filter(NUMBER.OBSERVERS <= 10)%>%
mutate(Time = hms(TIME.OBSERVATIONS.STARTED)) %>% filter(Time > hms("4:00:00") & Time < hms("19:00:00"))%>%
group_by(group.id,COMMON.NAME) %>% slice(1) %>% ungroup %>%
group_by(group.id) %>% mutate(no.sp = n_distinct(COMMON.NAME))%>%
dplyr::select(imp) %>%
mutate(OBSERVATION.DATE = as.Date(OBSERVATION.DATE),
month = month(OBSERVATION.DATE), year = year(OBSERVATION.DATE),
day = day(OBSERVATION.DATE) + cdays[month], week = week(OBSERVATION.DATE),
fort = ceiling(day/14)) %>%
filter(year > 2010)
ungroup
return(data)
}
4.3 Use the function written above to extract eBird data
Code
# please download the latest versions of eBird data from https://ebird.org/data/download and set the file path accordingly. Since these two datasets are extremely large, we have not uploaded the same to github.
# In this study, the latest version of the data corresponds to August 31st 2022
# extract data for the following list of countries
Bhutan <- readcleanrawdata("ebd_BT_relAug-2022.txt")
India <- rbind(readcleanrawdata("ebd_IN-JK_relAug-2022.txt"), readcleanrawdata("ebd_IN-LA_relAug-2022.txt"), readcleanrawdata("ebd_IN-HP_relAug-2022.txt"), readcleanrawdata("ebd_IN-AR_relAug-2022.txt"), readcleanrawdata("ebd_IN-WB_relAug-2022.txt"), readcleanrawdata("ebd_IN-UL_relAug-2022.txt"), readcleanrawdata("ebd_IN-SK_relAug-2022.txt"))
## Removing non himalayan regions
India<-India %>% filter(LATITUDE>26,LONGITUDE<100)
dat <-rbind(India,Bhutan)
# Keep only unique locations used
datll<-dat%>% filter(month %in% c(1,2,5,6,7,8,12)) %>% distinct(LATITUDE,LONGITUDE, .keep_all = T)%>%select(LOCALITY.ID,LATITUDE,LONGITUDE)
write.csv(datll, "results/unique_loc.csv", row.names = F)
4.4 Extract elevation at unique locations
Code
dat <- st_as_sf(dat, coords = c("LONGITUDE","LATITUDE"), crs=4326, remove = "F")
# Loading the elevation data
elev <- raster("data/elevation/alt")
# extract elevation
elevDat <- raster::extract(elev,dat)
# cbind elevation back to dataframe
dat <- cbind(dat,elevDat)
# save Rdata file (uploaded to GitHub)
save(dat, file = "results/eBird_elev.RData")