# Moth declines are most severe in broadleaf woodlands despite a net gain in habitat availability - Blumgart et al., 2022

# Script_4_Species_models_each_habitat_region

# This script runs models for each species in each habitat, region or region within habitat using the 
# 'site indices' (sindex) for each species in each site-year. The sindex values were produced in the 
# R script "Script_3_Turning_raw_data_to_site_indices". The sindex value estimates what the annual
# abundance would have been at a site had that site been running continuously all year. Where the site
# had been running continuously all year, the sindex is equal to the raw annual abundance. 

# This script estimates a percentage change in abundance for the years 1968 - 2016 for each 
# species in each habitat/region or region within habitat. The percentage changes are then used
# for the next stage of modeling in R script "Script_5_species_specific_analysis".  

# The 7 habitat types are:
# Arable
# Broadleaf woodland
# Conifer plantation
# Improved grassland
# 'Other semi-natural'
# Upland
# Urban

# The two regions are North and South (split at the 4500 N gridline in the British National Grid)

# See Supporting Information: Land use data and habitat allocation

# # # 

# Libraries ####

library(dplyr) # For data manipulation
library(ggplot2) # For plotting
library(mgcv) # For GAMMs
library(poptrend) # For modelling abundance trends

# Data import ####

# Note you will need to change the address to import files

# Site info
Site_info <- read.csv("Data/Site_info.csv")

# Read in moth traits and extract only the names and numbers
Traits <- read.csv("Data/Moth_traits.csv")

names <- 
   Traits %>%
   dplyr::select(RIS_code, Binomial, Common_name) 

# # # 

# Preparing species lists #### 

# Locate the sindex files that I'll be working from. These are simply named by
# their RIS (Rothamsted Insect Survey) code 
f_list = list.files("Data/Sindex_files")

# Create species codes using the file names
spp_codes = as.numeric(gsub("([[:digit:]]{1,})[.]rds","\\1",f_list))

# Get rid of the three micros (Rush Veneer 1001, Rusty Dot Pearl 1015 and Diamond-backed Moth 2178)
spp_codes <- spp_codes[spp_codes != 1001 & spp_codes != 1015 & spp_codes != 2178]

# # # 

# ~~~ #####

# # # 

# Trends in each habitat ####

# Make a dataframe to store the results in.

# Use expand.grid to create every combination of each species in each habitat. Also include an 
# 'overall' category in which I will store results for species trends at the national level
Habitat_trends_outputs <- expand.grid(RIS_code = spp_codes, 
                                      Habitat = c("Overall", levels(as.factor(Site_info$Habitat_500m))))

# Add in the species names
Habitat_trends_outputs_2 <- merge(Habitat_trends_outputs, names, by = "RIS_code", all.x = TRUE)

 # Reorder columns
Habitat_trends_outputs_2 <-
   Habitat_trends_outputs_2 %>%
   dplyr::select(RIS_code, Common_name, Binomial, Habitat)

# Make blank columns for storing model outputs
Habitat_trends_outputs_2$Poptrend_perc_change <- NA
Habitat_trends_outputs_2$Poptrend_perc_change_upp <- NA
Habitat_trends_outputs_2$Poptrend_perc_change_low <- NA

# Column for the total number of moths counted in the time series.
Habitat_trends_outputs_2$Sample_size <- NA

# Columns for the total number of sites and site-years that went into the 
# calculation as this might be useful later
Habitat_trends_outputs_2$Total_sites <- NA
Habitat_trends_outputs_2$Total_site_years <- NA

# Add min and max years (i.e., the earliest and latest)
Habitat_trends_outputs_2$Min_year <- NA
Habitat_trends_outputs_2$Max_year <- NA

# Make a list of Habitats
Habitat_types <- c("Overall", levels(as.factor(Site_info$Habitat_500m)))

# # #

# ~~Start of loop ####
for(i_sp in 1:length(spp_codes)) {

# Read in a sindex file 
Sindex_file = readRDS(paste0("Data/Sindex_files/",spp_codes[i_sp],".rds"))

# Rename year
Sindex_file <- 
   Sindex_file %>%
   rename(Year = YEAR)

# Print progress to screen
print(paste0("Starting ", names$Common_name[names$RIS_code == spp_codes[i_sp]], ", No. ", spp_codes[i_sp]))
print(paste0("Total progress: ", round(i_sp/432*100),"%"))

# Paste on Site_info, keeping all rows from Sindex_file
Sindex_plus_sites <- merge(Sindex_file, Site_info, by = c("SITE"), all.x = TRUE)

# Make Site_name a factor
Sindex_plus_sites$Site_name <- as.factor(Sindex_plus_sites$Site_name)

# Cut out any site-year with a site-year completeness (NM) of less than 0.5 
Sindex_plus_sites_B <- 
   Sindex_plus_sites %>% 
   dplyr::filter(NM >= 0.5)

# Make an actual_years variable to merge on 
Actual_years_ran <-
   Sindex_plus_sites_B %>%
   dplyr::group_by(SITE) %>%
   dplyr::summarise(Actual_years = length(unique(Year)))

# Merge it on
Sindex_plus_sites_C <- merge(Sindex_plus_sites_B, Actual_years_ran, by = "SITE")

# And cut out any site with less than three years running
Sindex_plus_sites_2 <- 
   Sindex_plus_sites_C %>%
   dplyr::filter(Actual_years >= 3)
   

# ~~Start habitat loop ####

for(i_hab in 1:length(Habitat_types)){
   
# If Habitat_type = "Overall" then there will be no subsetting of data so the following code will all 
   # run with no subsets

   if(Habitat_types[i_hab] == "Overall"){
   
   # Print progress to screen 
   print(paste0("Doing ", Habitat_types[i_hab], " habitat for ", names$Common_name[names$RIS_code == spp_codes[i_sp]]))

# Run the model
Poptrend_my_raw_sindex <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                                    s(Site_name, bs = "re"),
                                 family = nb, data = Sindex_plus_sites_2)

# Extract the percentage change and round it to 2 decimal places
Poptrend_perc_change <- as.numeric(change(Poptrend_my_raw_sindex, start = 1968, end = 2016, alpha = 0.05)[1])
Poptrend_perc_change <- round(Poptrend_perc_change, digits = 2)

# Extract the upper and lower 95% CIs and round to 2 dp
# Extract both as a list first
Poptrend_perc_change_CIs <- change(Poptrend_my_raw_sindex, start = 1968, end = 2016, alpha = 0.05)[2]

# Extract upper
Poptrend_perc_change_upp <- as.numeric(Poptrend_perc_change_CIs$CI[2])
Poptrend_perc_change_upp <- round(Poptrend_perc_change_upp, digits = 2)

# Extract lower
Poptrend_perc_change_low <- as.numeric(Poptrend_perc_change_CIs$CI[1])
Poptrend_perc_change_low <- round(Poptrend_perc_change_low, digits = 2)


# Now add all these to the Habitat_trends_outputs_2 file

Habitat_trends_outputs_2$Poptrend_perc_change[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                        & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- Poptrend_perc_change
Habitat_trends_outputs_2$Poptrend_perc_change_upp[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                            & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- Poptrend_perc_change_upp
Habitat_trends_outputs_2$Poptrend_perc_change_low[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                            & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- Poptrend_perc_change_low

# Add the total sample size to the Habitat_trends_outputs_2
Habitat_trends_outputs_2$Sample_size[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                     & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- sum(Sindex_plus_sites_2$Raw_total)
# Add number of sites
Habitat_trends_outputs_2$Total_sites[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                     & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- length(unique(Sindex_plus_sites_2$SITE))
# Make a column for site-years
Sindex_plus_sites_2$Site_year <- paste0(Sindex_plus_sites_2$SITE,"_",Sindex_plus_sites_2$Year)

# Add the number of site-years to the Habitat_trends_outputs_2 df
Habitat_trends_outputs_2$Total_site_years[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                          & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- length(unique(Sindex_plus_sites_2$Site_info))

# Add min and max years
# Min
Habitat_trends_outputs_2$Min_year[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                  & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- min(Sindex_plus_sites_2$Year)
# Max
Habitat_trends_outputs_2$Max_year[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                  & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- max(Sindex_plus_sites_2$Year)


# # # 

# That's the Overall trend finished. Now to habitat subsets


   }else{ 
      
      # Make a subset of Sindex_plus_sites_2
      Sindex_plus_sites_2_habitat <-
         Sindex_plus_sites_2 %>%
         dplyr::filter(Habitat_500m == Habitat_types[i_hab])
      
      if((length(levels(droplevels(Sindex_plus_sites_2_habitat$Site_name))) >= 6) & 
         sum(unique(Sindex_plus_sites_2_habitat$Actual_years)) > 100){ # I've set it so that a habitat 
         # needs at least 6 sites to be modeled, else the model will often fail. I've also set a minimum
         # of 100 site-years to be included. 

      # Print progress to screen 
         print(paste0("Doing ", Habitat_types[i_hab], " habitat for ", names$Common_name[names$RIS_code == spp_codes[i_sp]]))

      # Run the model
      Poptrend_my_raw_sindex <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), 
                                       family = nb, data = Sindex_plus_sites_2_habitat)
      
      
      # Extract the percentage change and round it to 2 decimal places
      Poptrend_perc_change <- as.numeric(change(Poptrend_my_raw_sindex, start = 1968, end = 2016, alpha = 0.05)[1])
      Poptrend_perc_change <- round(Poptrend_perc_change, digits = 2)
      
      # Extract the upper and lower 95% CIs and round to 2 dp
      # Extract both as a list first
      Poptrend_perc_change_CIs <- change(Poptrend_my_raw_sindex, start = 1968, end = 2016, alpha = 0.05)[2]
      
      # Extract upper
      Poptrend_perc_change_upp <- as.numeric(Poptrend_perc_change_CIs$CI[2])
      Poptrend_perc_change_upp <- round(Poptrend_perc_change_upp, digits = 2)
      
      # Extract lower
      Poptrend_perc_change_low <- as.numeric(Poptrend_perc_change_CIs$CI[1])
      Poptrend_perc_change_low <- round(Poptrend_perc_change_low, digits = 2)
      
      
      # Now add all these to the Habitat_trends_outputs_2 file
      
      Habitat_trends_outputs_2$Poptrend_perc_change[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                               & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- Poptrend_perc_change
      Habitat_trends_outputs_2$Poptrend_perc_change_upp[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                                   & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- Poptrend_perc_change_upp
      Habitat_trends_outputs_2$Poptrend_perc_change_low[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                                   & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- Poptrend_perc_change_low
      
      
      # Add the total sample size within this habitat to the Habitat_trends_outputs_2
      Habitat_trends_outputs_2$Sample_size[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                           & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- sum(Sindex_plus_sites_2_habitat$Raw_total)
      #
      #
      Habitat_trends_outputs_2$Total_sites[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                           & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- length(unique(Sindex_plus_sites_2_habitat$SITE))
      # Make a column for site-years
      Sindex_plus_sites_2_habitat$Site_info <- paste0(Sindex_plus_sites_2_habitat$SITE,"_",Sindex_plus_sites_2_habitat$Year)
      # Add the number of site-yeras to the Habitat_trends_outputs_2 df
      Habitat_trends_outputs_2$Total_site_years[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- length(unique(Sindex_plus_sites_2_habitat$Site_info))
      
      # Add min and max years
      # Min
      Habitat_trends_outputs_2$Min_year[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                        & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- min(Sindex_plus_sites_2_habitat$Year)
      # Max
      
      Habitat_trends_outputs_2$Max_year[Habitat_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                        & Habitat_trends_outputs_2$Habitat == Habitat_types[i_hab]] <- max(Sindex_plus_sites_2_habitat$Year)

      
           }else{
         print(paste0("Not enough data for ", names$Common_name[names$RIS_code == spp_codes[i_sp]], " in ",Habitat_types[i_hab]))
         }
      }
   }


# Save the Habitat_trends_outputs_2 so I don't lose data if the whole thing crashes
# ***delete the ## below. I've kept this line hashed out so the data frame is not overwritten by accident
##write.csv(Habitat_trends_outputs_2, "Data/Hab_trends.csv", row.names = FALSE)

# Print progress to screen
print(paste0("Just finished ", names$Common_name[names$RIS_code == spp_codes[i_sp]], ", No. ", spp_codes[i_sp]))
print(paste0("Total progress = ", round(i_sp/length(spp_codes)*100),"%"))

}


# # # 

# End of habitat loop

# # # 

# ~~~ ####

# # #

# Trends in each region ####

# Make a dataframe to store the results in.
# Use expand.grid to create every combination of each species in each region (North or South)
Region_trends_outputs <- expand.grid(RIS_code = spp_codes, N_S = c(levels(as.factor(Site_info$N_S))))

# Add in the species names
Region_trends_outputs_2 <- merge(Region_trends_outputs, names, by = "RIS_code", all.x = TRUE)

# Reorder columns
Region_trends_outputs_2 <-
   Region_trends_outputs_2 %>%
   dplyr::select(RIS_code, Common_name, Binomial, N_S)

# Make blank columns for storing model outputs
Region_trends_outputs_2$Poptrend_perc_change <- NA
Region_trends_outputs_2$Poptrend_perc_change_upp <- NA
Region_trends_outputs_2$Poptrend_perc_change_low <- NA

# Add the total number of moths counted in the time series. Use the raw totals from the site-years that
# have an NM of 0.5 or more. That way I can give the actual number of moths that went into 
# the poptrend estimates.
Region_trends_outputs_2$Sample_size <- NA

# Add the total numebr of sites and site-years that went into the calculation as this might be useful later
Region_trends_outputs_2$Total_sites <- NA
Region_trends_outputs_2$Total_site_years <- NA

# Add min and max years
Region_trends_outputs_2$Min_year <- NA
Region_trends_outputs_2$Max_year <- NA

# # # 

# Within each species, each region will be looped through

# Make a list regions
N_S_regions <- c(levels(as.factor(Site_info$N_S)))


# ~~Start of loop ####

for(i_sp in 1:length(spp_codes)) {
   
   # Read in a sindex file 
   Sindex_file = readRDS(paste0("Data/Sindex_files/",spp_codes[i_sp],".rds"))
   
   # Rename year
   Sindex_file <- 
      Sindex_file %>%
      rename(Year = YEAR)
   
   # Print progress to screen
   print(paste0("Starting ", names$Common_name[names$RIS_code == spp_codes[i_sp]], ", No. ", spp_codes[i_sp]))
   print(paste0("Total progress = ", round(i_sp/432*100),"%"))
   
   # Paste on Site_info, keeping all rows for Sindex_file
   Sindex_plus_sites <- merge(Sindex_file, Site_info, by = c("SITE"), all.x = TRUE)
   
   # Cut out any site-year with a site-year completeness (NM) of less than 0.5 
   Sindex_plus_sites_B <- 
      Sindex_plus_sites %>% 
      dplyr::filter(NM >= 0.5)
   
   # Make an actual_years variable to merge on 
   Actual_years_ran <-
      Sindex_plus_sites_B %>%
      dplyr::group_by(SITE) %>%
      dplyr::summarise(Actual_years = length(unique(Year)))
   
   # Merge it on
   Sindex_plus_sites_C <- merge(Sindex_plus_sites_B, Actual_years_ran, by = "SITE")
   
   # And cut out any site with less than three years running
   Sindex_plus_sites_2 <- 
      Sindex_plus_sites_C %>%
      dplyr::filter(Actual_years >= 3)

   # Make Site_name a factor
   Sindex_plus_sites_2$Site_name <- as.factor(Sindex_plus_sites_2$Site_name)
   
   # # # 
   
   # ~~Start of region loop ####
   
   for(i_N_S in 1:length(N_S_regions)){
      
      # Subsets for North and South to be made
      
      # Make a subset of Sindex_plus_sites_2
      Sindex_plus_sites_2_region <-
         Sindex_plus_sites_2 %>%
         dplyr::filter(N_S == N_S_regions[i_N_S])
      
      if((length(levels(droplevels(Sindex_plus_sites_2_region$Site_name))) >= 6) & 
         sum(unique(Sindex_plus_sites_2_region$Actual_years)) > 100){ # I've set it so that a region 
         # needs at least 6 sites to be modeled, else the model will often fail. I've also set a minimum
         # of 100 site-years to be included. 
         

         # Print progress to screen 
         print(paste0("Doing ", N_S_regions[i_N_S], " for ", names$Common_name[names$RIS_code == spp_codes[i_sp]]))

         # Run the model
         Poptrend_my_raw_sindex <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), 
                                          family = nb, data = Sindex_plus_sites_2_region)
         
         
         # Extract the percentage change and round it to 2 decimal places
         Poptrend_perc_change <- as.numeric(change(Poptrend_my_raw_sindex, start = 1968, end = 2016, alpha = 0.05)[1])
         Poptrend_perc_change <- round(Poptrend_perc_change, digits = 2)
         
         # Extract the upper and lower 95% CIs and round to 2 dp
         # Extract both as a list first
         Poptrend_perc_change_CIs <- change(Poptrend_my_raw_sindex, start = 1968, end = 2016, alpha = 0.05)[2]
         
         # Extract upper
         Poptrend_perc_change_upp <- as.numeric(Poptrend_perc_change_CIs$CI[2])
         Poptrend_perc_change_upp <- round(Poptrend_perc_change_upp, digits = 2)
         
         # Extract lower
         Poptrend_perc_change_low <- as.numeric(Poptrend_perc_change_CIs$CI[1])
         Poptrend_perc_change_low <- round(Poptrend_perc_change_low, digits = 2)
         
         
         # Now add all these to the Region_trends_outputs_2 file
         
         Region_trends_outputs_2$Poptrend_perc_change[Region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                                 & Region_trends_outputs_2$N_S == N_S_regions[i_N_S]] <- Poptrend_perc_change
         Region_trends_outputs_2$Poptrend_perc_change_upp[Region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                                     & Region_trends_outputs_2$N_S == N_S_regions[i_N_S]] <- Poptrend_perc_change_upp
         Region_trends_outputs_2$Poptrend_perc_change_low[Region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                                     & Region_trends_outputs_2$N_S == N_S_regions[i_N_S]] <- Poptrend_perc_change_low
         
         
         # Add the total sample size within this habitat to the Region_trends_outputs_2
         Region_trends_outputs_2$Sample_size[Region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                             & Region_trends_outputs_2$N_S == N_S_regions[i_N_S]] <- sum(Sindex_plus_sites_2_region$Raw_total)
         #
         #
         Region_trends_outputs_2$Total_sites[Region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                             & Region_trends_outputs_2$N_S == N_S_regions[i_N_S]] <- length(unique(Sindex_plus_sites_2_region$SITE))
         # Make a column for site-years
         Sindex_plus_sites_2_region$Site_year <- paste0(Sindex_plus_sites_2_region$SITE,"_",Sindex_plus_sites_2_region$Year)
         
         # Add the number of site-years to the Region_trends_outputs_2 df
         Region_trends_outputs_2$Total_site_years[Region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                  & Region_trends_outputs_2$N_S == N_S_regions[i_N_S]] <- length(unique(Sindex_plus_sites_2_region$Site_year))
         
         # Add min and max years
         # Min
         Region_trends_outputs_2$Min_year[Region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                          & Region_trends_outputs_2$N_S == N_S_regions[i_N_S]] <- min(Sindex_plus_sites_2_region$Year)
         # Max
         
         Region_trends_outputs_2$Max_year[Region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                          & Region_trends_outputs_2$N_S == N_S_regions[i_N_S]] <- max(Sindex_plus_sites_2_region$Year)

         
         # # # 
         
      }else{
         print(paste0("Not enough data for ", names$Common_name[names$RIS_code == spp_codes[i_sp]], " in ",N_S_regions[i_N_S]))
      }
      
   }
   
      # Save the Region_trends_outputs_2 so I don't lose data if the whole thing crashes
   # ***delete the ## below. I've kept this line hashed out so the data frame is not overwritten by accident
   ##write.csv(Region_trends_outputs_2, "Data/Region_trends.csv", row.names = FALSE)

   
   # Print progress to screen
   print(paste0("Just finished ", names$Common_name[names$RIS_code == spp_codes[i_sp]], ", No. ", spp_codes[i_sp]))
   print(paste0("Total progress = ", round(i_sp/length(spp_codes)*100),"%"))
   
}


# # # 

# End loop for region

# # # 

# ~~~ ####

# # # 

# Trends in each region within habitats (broadleaf woodland only) ####

# Make a dataframe to store the results in.

# First make a new Hab_region variable
Site_info$Hab_region <- paste0(Site_info$N_S,"_",Site_info$Habitat_500m)

# Use expand.grid to create every combination of each species in each region (North or South) 
# but for broadleaf woodland sites only
Hab_region_trends_outputs <- 
   expand.grid(RIS_code = spp_codes, Hab_region = 
                  c(levels(as.factor(Site_info$Hab_region[Site_info$Habitat_500m == "Deciduous_woodland"]))))

# Add in the species names
Hab_region_trends_outputs_2 <- merge(Hab_region_trends_outputs, names, by = "RIS_code", all.x = TRUE)

# Reorder columns
Hab_region_trends_outputs_2 <-
   Hab_region_trends_outputs_2 %>%
   dplyr::select(RIS_code, Common_name, Binomial, Hab_region)

# Make blank columns for storing model outputs
Hab_region_trends_outputs_2$Poptrend_perc_change <- NA
Hab_region_trends_outputs_2$Poptrend_perc_change_upp <- NA
Hab_region_trends_outputs_2$Poptrend_perc_change_low <- NA

# Add the total number of moths counted in the time series.
Hab_region_trends_outputs_2$Sample_size <- NA

# Add the total numebr of sites and site-years that went into the calculation as this might be useful later
Hab_region_trends_outputs_2$Total_sites <- NA
Hab_region_trends_outputs_2$Total_site_years <- NA

# Add min and max years
Hab_region_trends_outputs_2$Min_year <- NA
Hab_region_trends_outputs_2$Max_year <- NA

# Make a list regions
Hab_regions <- c(levels(as.factor(Hab_region_trends_outputs_2$Hab_region)))

# # # 

# ~~~Start of loop ####

for(i_sp in 1:length(spp_codes)) {
   
   # Read in a sindex file 
   Sindex_file = readRDS(paste0("Data/Sindex_files/",spp_codes[i_sp],".rds"))
   
   # Rename year
   Sindex_file <- 
      Sindex_file %>%
      rename(Year = YEAR)
   
   # Print progress to screen
   print(paste0("Starting ", names$Common_name[names$RIS_code == spp_codes[i_sp]], ", No. ", spp_codes[i_sp]))
   print(paste0("Total progress = ", round(i_sp/432*100),"%"))
   
   # Paste on Site_info, keeping all rows for Sindex_file
   Sindex_plus_sites <- merge(Sindex_file, Site_info, by = c("SITE"), all.x = TRUE)
   
   # # # # 
   
   
   # Cut the data down to only Broadleaf woodland sites
   Sindex_plus_sites_cut <-
      Sindex_plus_sites %>%
      dplyr::filter(Habitat_500m == "Deciduous_woodland")
   
   # Cut out any site-year with a site-year completeness (NM) of less than 0.5 
   Sindex_plus_sites_B <- 
      Sindex_plus_sites_cut %>% 
      dplyr::filter(NM >= 0.5)
   
   # Make an actual_years variable to merge on 
   Actual_years_ran <-
      Sindex_plus_sites_B %>%
      dplyr::group_by(SITE) %>%
      dplyr::summarise(Actual_years = length(unique(Year)))
   
   # Merge it on
   Sindex_plus_sites_C <- merge(Sindex_plus_sites_B, Actual_years_ran, by = "SITE")
   
   # And cut out any site with less than three years running
   Sindex_plus_sites_2 <- 
      Sindex_plus_sites_C %>%
      dplyr::filter(Actual_years >= 3)
   
   # Make Site_name a factor
   Sindex_plus_sites_2$Site_name <- as.factor(Sindex_plus_sites_2$Site_name)

  
   # # # 
   
   # ~~~ Start of habitat_region loop ####
   
   for(i_Hab_region in 1:length(Hab_regions)){
      
      # Subsets for North and South to be made
      
      # Make a subset of Sindex_plus_sites_2
      Sindex_plus_sites_2_region <-
         Sindex_plus_sites_2 %>%
         dplyr::filter(Hab_region == Hab_regions[i_Hab_region])
      
      if((length(levels(droplevels(Sindex_plus_sites_2_region$Site_name))) >= 6) & 
         sum(unique(Sindex_plus_sites_2_region$Actual_years)) > 100){ # I've set it so that a hab_region 
         # needs at least 6 sites to be modeled, else the model will often fail. I've also set a minimum
         # of 100 site-years to be included.  

         # Print progress to screen 
         print(paste0("Doing ", Hab_regions[i_Hab_region], " for ", names$Common_name[names$RIS_code == spp_codes[i_sp]]))

         # Cut out any site-years with NM less than 0.5
         Sindex_plus_sites_region_decent <-
            Sindex_plus_sites_2_region %>%
            dplyr::filter(NM >= 0.5)

         
         # Run the model
         Poptrend_my_raw_sindex <- ptrend(My_raw_sindex ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Site_name, bs = "re"), 
                                          family = nb, data = Sindex_plus_sites_region_decent)
         
         
         # Extract the percentage change and round it to 2 decimal places
         Poptrend_perc_change <- as.numeric(change(Poptrend_my_raw_sindex, start = 1968, end = 2016, alpha = 0.05)[1])
         Poptrend_perc_change <- round(Poptrend_perc_change, digits = 2)
         
         # Extract the upper and lower 95% CIs and round to 2 dp
         # Extract both as a list first
         Poptrend_perc_change_CIs <- change(Poptrend_my_raw_sindex, start = 1968, end = 2016, alpha = 0.05)[2]
         
         # Extract upper
         Poptrend_perc_change_upp <- as.numeric(Poptrend_perc_change_CIs$CI[2])
         Poptrend_perc_change_upp <- round(Poptrend_perc_change_upp, digits = 2)
         
         # Extract lower
         Poptrend_perc_change_low <- as.numeric(Poptrend_perc_change_CIs$CI[1])
         Poptrend_perc_change_low <- round(Poptrend_perc_change_low, digits = 2)
         
         
         # Now add all these to the Hab_region_trends_outputs_2 file
         
         Hab_region_trends_outputs_2$Poptrend_perc_change[Hab_region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                                     & Hab_region_trends_outputs_2$Hab_region == Hab_regions[i_Hab_region]] <- Poptrend_perc_change
         Hab_region_trends_outputs_2$Poptrend_perc_change_upp[Hab_region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                                         & Hab_region_trends_outputs_2$Hab_region == Hab_regions[i_Hab_region]] <- Poptrend_perc_change_upp
         Hab_region_trends_outputs_2$Poptrend_perc_change_low[Hab_region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                                         & Hab_region_trends_outputs_2$Hab_region == Hab_regions[i_Hab_region]] <- Poptrend_perc_change_low
         
         
         # Add the total sample size within this habitat to the Hab_region_trends_outputs_2
         Hab_region_trends_outputs_2$Sample_size[Hab_region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                 & Hab_region_trends_outputs_2$Hab_region == Hab_regions[i_Hab_region]] <- sum(Sindex_plus_sites_region_decent$Raw_total)
         #
         #
         Hab_region_trends_outputs_2$Total_sites[Hab_region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                 & Hab_region_trends_outputs_2$Hab_region == Hab_regions[i_Hab_region]] <- length(unique(Sindex_plus_sites_region_decent$SITE))
         # Make a column for site-years
         Sindex_plus_sites_region_decent$Site_info <- paste0(Sindex_plus_sites_region_decent$SITE,"_",Sindex_plus_sites_region_decent$Year)
         
         # Add the number of site-years to the Hab_region_trends_outputs_2 df
         Hab_region_trends_outputs_2$Total_site_years[Hab_region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                                      & Hab_region_trends_outputs_2$Hab_region == Hab_regions[i_Hab_region]] <- length(unique(Sindex_plus_sites_region_decent$Site_info))
         
         # Add min and max years
         # Min
         Hab_region_trends_outputs_2$Min_year[Hab_region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                              & Hab_region_trends_outputs_2$Hab_region == Hab_regions[i_Hab_region]] <- min(Sindex_plus_sites_region_decent$Year)
         # Max
         
         Hab_region_trends_outputs_2$Max_year[Hab_region_trends_outputs_2$RIS_code == spp_codes[i_sp] 
                                              & Hab_region_trends_outputs_2$Hab_region == Hab_regions[i_Hab_region]] <- max(Sindex_plus_sites_region_decent$Year)

         
         # # # 
         
      }else{
         print(paste0("Not enough data for ", names$Common_name[names$RIS_code == spp_codes[i_sp]], " in ",Hab_regions[i_Hab_region]))
      }
      
   }
   

   # Save the Hab_region_trends_outputs_2 so I don't lose data if the whole thing crashes
   # ***delete the ## below. I've kept this line hashed out so the data frame is not overwritten by accident
   ##write.csv(Hab_region_trends_outputs_2, "Data/Hab_region_trends.csv", row.names = FALSE)

   # Print progress to screen
   print(paste0("Just finished ", names$Common_name[names$RIS_code == spp_codes[i_sp]], ", No. ", spp_codes[i_sp]))
   print(paste0("Total progress = ", round(i_sp/length(spp_codes)*100),"%"))
   
}

# End of script