# Moth declines are most severe in broadleaf woodlands despite a net gain in habitat # availability - Blumgart et al., 2022 # Script_1_Making_diversity_stats_data_frame # This script is to get the diversity stats for each site-year using iNEXT. I will calculate estimated # species richness (Hill number 0) and Shannon diversity (Hill number 1), aka 'effective common species'. # The only data frame needed here is "All_years_all_species_all_sites.csv". This contains the annual # totals of each species in each site in each year. # Libraries #### library(dplyr) # For data manipulation library(tidyr) # For data manipulation library(iNEXT) # For calculating species richness and diversity # Import and prepare data #### # Read in raw species data (yearly totals for each species) All_years <- read.csv("Data/All_years_all_species_all_sites.csv") # The data is in long form. It's needs to be in a S x N form with species as rows and samples as # columns. Rearrange it. # Rename variables All_years <- All_years %>% rename(Year = CalYear, SITE = RIS.TrapCode, RIS_code = Code.RISgeneric, Common_name = Common.Name, Binomial = binomial) # Cut it down to the years I need (1968 - 2016) and cut out the Channel Isles All_years_2 <- All_years %>% dplyr::filter(Year >= 1968 & Year <= 2016) %>% dplyr::filter(Country != "Channel Islands [United Kingdom]") # Now get rid of records that are not identified to species level # Make a small df to look at Temp <- All_years_2 %>% dplyr::select(RIS_code, Common_name, Binomial) %>% dplyr::distinct() # Remove the ones that could not be identified to species All_years_3 <- All_years_2 %>% dplyr::filter(Binomial != "Diachrysia species") %>% dplyr::filter(Binomial != "Epirrhoe species") %>% dplyr::filter(Binomial != "Geometridae species") %>% dplyr::filter(Binomial != "Macaria species") %>% dplyr::filter(Binomial != "Mesapamea species") %>% dplyr::filter(Binomial != "Mesoligia species") %>% dplyr::filter(Binomial != "Noctuidae species") %>% dplyr::filter(Binomial != "Plusia species") %>% dplyr::filter(Binomial != "Chloroclysta species") %>% dplyr::filter(Binomial != "Eilema species") %>% dplyr::filter(Binomial != "Eupithecia species") %>% dplyr::filter(Binomial != "Hoplodrina species") %>% dplyr::filter(Binomial != "Unidentifiable macro spp") %>% dplyr::filter(Binomial != "Idaea species") %>% dplyr::filter(Binomial != "Acleris cristana") # A micro snuck through! # Note that Acronicta species, Amphipoea species, Epirrita species and Oligia species are typically # aggregated in the RIS so they are left in as valid taxa # Make a consistent single taxon for Common/Lesser Common Rustic All_years_3$Binomial[All_years_3$RIS_code == 3392] <- "Mesapamea secalis/didyma" All_years_3$Binomial[All_years_3$RIS_code == 3047] <- "Mesapamea secalis/didyma" All_years_3$Binomial[All_years_3$RIS_code == 3046] <- "Mesapamea secalis/didyma" # Fix error for Pale Brindled Beauty All_years_3$Binomial[All_years_3$RIS_code == 929] <- "Phigalia pilosaria" # And for Plain Pug All_years_3$Binomial[All_years_3$RIS_code == 840] <- "Eupithecia simpliciata" # Check that all the remaining species are either true species or accepted aggregates (Amphipoea, # Epirrita and Oligia species) Temp <- All_years_3 %>% dplyr::select(RIS_code, Common_name, Binomial) %>% dplyr::distinct() # Looks fine # Remove this df rm(Temp) # I now need to get it in the form so that each column is one site-year... # First, cut it down to its essential variables All_years_4 <- All_years_3 %>% dplyr::select(Year, SITE, Binomial, Annual_count) # Get rid of site-years with very few observations otherwise it screws up the iNEXT stuff later on. # Get annual totals Annual_totals <- All_years_4 %>% group_by(SITE, Year) %>% dplyr::summarise(Sum = sum(Annual_count)) # Merge this on to All_years_4 All_years_4_B <- merge(All_years_4, Annual_totals, by = c("SITE", "Year")) # Now cut out anything with a site-year total of less than 50 (an arbitrary minimum arrived at # by trial and error) All_years_4_C <- All_years_4_B %>% dplyr::filter(Sum > 50) # And get rid of the Sum column All_years_4_C$Sum <- NULL # combine Site-year into one variable and remove SITE and Year All_years_4_C$Site_year <- paste0(All_years_4_C$SITE,"_",All_years_4_C$Year) All_years_4_C$Year <- NULL All_years_4_C$SITE <- NULL # Round the counts and convert to integers. All_years_4_C$Annual_count <- round (All_years_4_C$Annual_count, 0) All_years_4_C$Annual_count <- as.integer(All_years_4_C$Annual_count) # Get rid of duplicates created by standardising the binomial names above All_years_4_C <- All_years_4_C %>% dplyr::group_by(Site_year, Binomial) %>% dplyr::summarise(Annual_count = sum(Annual_count)) # This got rid of 7 duplicate rows # Now convert data frame to wide form All_years_5 <- pivot_wider(All_years_4_C, names_from = Site_year, values_from = Annual_count, values_fill = list(Annual_count = 0)) # Name rows as species rownames(All_years_5) <- paste(All_years_5$Binomial) # Cut off the Binomial column to make the final df for rarefying All_years_to_rarefy <- All_years_5 %>% dplyr::select(-Binomial) # Turn it into a dataframe All_years_to_rarefy <- as.data.frame(All_years_to_rarefy) # Data prep complete # # # # Calculating richness with iNEXT #### # Get the richness estimates Richness_df <- ChaoRichness(All_years_to_rarefy) # Now to get back the SITE and Year # Make a colunm of the rownames Richness_df$SITE_Year <- rownames(Richness_df) # Use regular expressions to extract the year and site numbers Richness_df$Year <- gsub("\\d+_", "", Richness_df$SITE_Year) # "\\d" means a digit and # the "+" means any number of... So this translates to: When you find any number of digits # following by an underscore, replace it with nothing # Do the same for SITE Richness_df$SITE <- gsub("_\\d+", "", Richness_df$SITE_Year) # Now rename columns to remember that these are richness values. Richness_df <- Richness_df %>% rename(Actual_sp_richness = Observed, Est_sp_richness = Estimator, Est_sp_richness_se = Est_s.e., Est_sp_richness_low = "95% Lower", Est_sp_richness_upp = "95% Upper") # Get rid of SITE_Year Richness_df$SITE_Year <- NULL # # # # Now do the same for Shannon and Simpson indicies # Calculating Shannon with iNEXT #### Shannon_df <- ChaoShannon(All_years_to_rarefy, transform = TRUE) # transform = TRUE to get it back to the # "effective number of common species scale" # Now to get back the SITE and Year # Make a colunm of the rownames Shannon_df$SITE_Year <- rownames(Shannon_df) # Use regular expressions to extract the year and site numbers Shannon_df$Year <- gsub("\\d+_", "", Shannon_df$SITE_Year) # Do the same for SITE Shannon_df$SITE <- gsub("_\\d+", "", Shannon_df$SITE_Year) # Now rename columns to remember that these are richness values. Shannon_df <- Shannon_df %>% rename(Actual_Shannon = Observed, Est_Shannon = Estimator, Est_Shannon_se = "Est_s.e", Est_Shannon_low = "95% Lower", Est_Shannon_upp = "95% Upper") # Get rid of SITE_Year Shannon_df$SITE_Year <- NULL # # # # Merge them Big_diversity_df <- merge(Richness_df, Shannon_df, by = c("SITE", "Year"), all = TRUE) # Save the df (this file is already provided in the Data folder so does not need to be rewritten) ##write.csv(Big_diversity_df, "Data/Diversity_stats_complete.csv", row.names = FALSE) # # # # End of script. This data frame is used in the next stage of the modeling in Script_2_General_analysis