# Moth declines are most severe in broadleaf woodlands despite a net gain in habitat availability - # Blumgart et al., 2022 # Script_2_General_analysis # This script looks at how four response variables have changed over time from 1968 - 2016. # These response variables are: abundance, biomass, species richness and diversity. It also looks # at how these changes vary between habitats, regions and regions within habitat. Originally, # the interaction between region and two habitat types was going to be analysed but a jackknife # analysis showed that trends in Improved grasslands in the north were highly sensitive to the # exclusion of single sites, so improved grassland was not included in the habitat-region analysis, # leaving only broadleaf woodland. The jackknife analysis is also presented here. # This script also assesses the effect of estmated deer damage at woodland sites on the trend in # the four response variables. # The following things will be analysed: # 1. Species richness and diversity for Hill numbers 0 and 1 (i.e. richness and Shannon diversity) # change across: # a) All sites # b) The north and south # c) The 7 habitat categories # d) in broadleaf woodland in the north/south # 2. Total abundance change across: # a) All sites # b) The north and south # c) The 7 habitat categories # d) in broadleaf woodland in the north/south # 3. Biomass change across: # a) All sites # b) The north and south # c) The 7 habitat categories # d) in broadleaf woodland in the north/south # Libraries and functions #### library(dplyr) # For data manipulation library(tidyr) # For data manipulation library(ggplot2) # For plotting library(mgcv) # For GAMMs library(lme4) # For GLMERs library(poptrend) # For detecting significant short-term trends library(emmeans) # For post hoc tests and marginal means library(MASS) # for glm.nb library(itsadug) # for visualising GAMMs library(mgcViz) # for visualising GAMMs library(MuMIn) # For AICc library(gridExtra) # For multiple panel ggplots # # # # Data import #### # For species richness I need the diversity stats (prepared in Script_1_Making_diversity_stats_data_frame) Diversity_stats <- read.csv("Data/Diversity_stats_complete.csv") # For abundance and biomass I need the annual totals for each species Annual_species_totals <- read.csv("Data/All_years_all_species_all_sites.csv") # For abundance I only need one total abundance value per site-year so this can be summarised down Annual_aggregate_totals <- Annual_species_totals %>% dplyr::group_by(CalYear, RIS.TrapCode) %>% dplyr::summarise(Annual_count = sum(Annual_count)) # Import the biomass for each species (from Kinsella et al., 2020) Biomass_df <- read.csv("Data/Biomass_df.csv") # Site_info Site_info <- read.csv("Data/Site_info.csv") # Site_year_completeness info Site_year_completeness <- read.csv("Data/Site_year_info.csv") # All I need from this is Paul's complete codes (these are determined manually by the Rothamsted Insect # Survey) Site_year_completeness <- Site_year_completeness %>% dplyr::select(SITE, Year, Complete_type_Paul) # # # # # # # 1 Species richness and diversity #### # 1. Species richness and diversity for Hill numbers 0 - 1 (ie richness and Shannon diversity) # change across: # a) All sites # b) The north and south # c) The 7 habitat categories # d) Habitat*region interaction (broadleaf woodland only) # 1. Data prep #### # Get variable names names(Diversity_stats) names(Site_info) names(Site_year_completeness) # Prepare to merge these together Site_info_2 <- Site_info Site_info_2$Site_name <- NULL # Merge Diversity_stats_2 <- merge(Diversity_stats, Site_info_2, by = c("SITE")) Diversity_stats_3 <- merge(Diversity_stats_2, Site_year_completeness, by = c("SITE", "Year")) # Check names(Diversity_stats_3) # Cut down data to the sites and years needed Diversity_stats_4 <- Diversity_stats_3 %>% dplyr::filter(Year >= 1968 & Year <= 2016) %>% dplyr::filter(Region != "Channel Islands") %>% # Get rid of Channel Isles and Ireland sites dplyr::filter(Region != "Ireland") %>% dplyr::filter(Complete_type_Paul == "Complete") # Use 'Complete' site-years only # Make a new variable for Actual_years ran (between 1968 and 2016) Actual_years_ran <- Diversity_stats_4 %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on Diversity_stats_5 <- merge(Diversity_stats_4, Actual_years_ran, by = "SITE") # And cut it down to sites with at least three Actual_years running Diversity_stats_6 <- Diversity_stats_5 %>% dplyr::filter(Actual_years >= 3) # Make a site number factor Diversity_stats_6$fSITE <- as.factor(paste0("S_",Diversity_stats_6$SITE)) # Make a year factor for a random effect Diversity_stats_6$fYear <- as.factor(paste0("Y_", Diversity_stats_6$Year)) # Make sure Habitat_500m is a factor rather than a character Diversity_stats_6$Habitat_500m <- as.factor(Diversity_stats_6$Habitat_500m) # Also get rid of site-years that have NA for any of the diversity indices as these will be # poorly sampled Diversity_stats_7 <- Diversity_stats_6 %>% dplyr::filter(!is.na(Est_sp_richness)) %>% dplyr::filter(!is.na(Est_Shannon)) %>% dplyr::filter(Est_Shannon != Inf)# # Data prep complete # # # # # # # 1a) Modelling - Species richness and diversity - All sites #### # ~~~ Wiggly GAMM richness #### # Run a GAMM with a non-linear year effect, a random intercept for site and a random # slope for year within site Overall_rich_GAMM_wiggly <- gam(Est_sp_richness ~ s(Year) + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Have a look Rich_viz = getViz(Overall_rich_GAMM_wiggly, nsim = 0) print(plot(Rich_viz), pages = 1) # Random effects good. Richness peaks in 1990s. check(Rich_viz) # Residuals good # Is the year effect significant? anova(Overall_rich_GAMM_wiggly) # edf Ref.df F p-value #s(Year) 5.300 5.577 3.966 0.000635 # Yes, year is significant # Get the AICc AICc(Overall_rich_GAMM_wiggly) # 28869.86 # Plot predictions for Supporting Info (Fig. S2c) plot_smooth(Overall_rich_GAMM_wiggly, view = "Year", main = "Species richness", ylab = "Species richness", ylim = c(0, 220)) # Saved as 5 x 5 inch pdf # What is the percentage change? newX <- expand.grid(Year = seq(from = min(Diversity_stats_7$Year), to = max(Diversity_stats_7$Year), length = 49), fSITE = "S_1", fYear = "Y_2016") # (Selecting # random effects at random. I have tested that changing random effects has no effect on predictions) newY <- predict(Overall_rich_GAMM_wiggly, newdata = newX, se.fit = TRUE, exclude = c("s(fSITE)", "s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) (addXY$fit[addXY$Year == 2016] - addXY$fit[addXY$Year == 1968])/ addXY$fit[addXY$Year == 1968]*100 # -6.04% (this is from the non-linear model and is just for interest - will not be reported) # # # # ~~~ Linear GAMM richness #### # Run a linear GAMM Overall_rich_GAMM_linear <- gam(Est_sp_richness ~ Year + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Do the checks and plot Rich_linear_viz = getViz(Overall_rich_GAMM_linear, nsim = 0) print(plot(Rich_linear_viz), pages = 1) # Random effects good. check(Rich_linear_viz) # Residuals good # Is year effect significant? anova(Overall_rich_GAMM_linear) #Parametric Terms: # df F p-value # Year 1 0.18 0.672 # Year has no effect # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Overall_rich_GAMM_linear, pairwise ~ NULL, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Overall_rich_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Perc_change) Just_percs # -1.20% # Merge on to Emtrends_df_2 Richness_overall_df <- merge(Emtrends_df_2, Just_percs) Richness_overall_df$Measure <- "Richness" # Plot ggplot(data = Richness_overall_df, aes(x = Measure, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Richness") # This plot will be added to other plots and saved at the end of this script # How do the linear vs non-linear models compare? AICc(Overall_rich_GAMM_wiggly) # 28869.86 AICc(Overall_rich_GAMM_linear) # 28873.81 AICc(Overall_rich_GAMM_wiggly) - AICc(Overall_rich_GAMM_linear) # = -3.95 # Non-linear is better # # # # ~~~~~~ Effect of deer damage in woodland on richness #### # Run a linear GAMM for woodland only BL_woodland_rich_GAMM_linear <- gam(Est_sp_richness ~ Year*Deer_damage_estimate + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = subset(Diversity_stats_7, Habitat_500m == "Deciduous_woodland")) anova(BL_woodland_rich_GAMM_linear) # Parametric Terms: # df F p-value # Year 1 1.140 0.286 # Deer_damage_estimate 1 0.128 0.720 # Year:Deer_damage_estimate 1 0.091 0.764 # No significant interaction # # # # ~~~ Wiggly GAMM Shannon diversity #### # Run a GAMM with a non-linear year effect Overall_Shan_GAMM_wiggly <- gam(Est_Shannon ~ s(Year) + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Model chekcs Shan_viz = getViz(Overall_Shan_GAMM_wiggly, nsim = 0) print(plot(Shan_viz), pages = 1) # Random effects good enough. Diversity peaks around 1990s then plateaus. check(Shan_viz) # Resisuals good enough # Plot model predictions (Fig. S2d) plot_smooth(Overall_Shan_GAMM_wiggly, view = "Year", main = "Shannon diversity", ylab = "Diversity (effect common species)", ylim = c(0, 62)) # Saved as 5 x 5 inch pdf # Is year effect sig? anova(Overall_Shan_GAMM_wiggly) # edf Ref.df F p-value # s(Year) 3.828 4.031 4.052 0.00284 # Get AICc AICc(Overall_Shan_GAMM_wiggly) # 23448.33 newX <- expand.grid(Year = seq(from = min(Diversity_stats_7$Year), to = max(Diversity_stats_7$Year), length = 49), fSITE = "S_1", fYear = "Y_2016") # (Selecting # random effects at random. I have tested that changing random effects has no effect on predictions) newY <- predict(Overall_Shan_GAMM_wiggly, newdata = newX, se.fit = TRUE, exclude = c("s(fSITE)", "s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) (addXY$fit[addXY$Year == 2016] - addXY$fit[addXY$Year == 1968])/ addXY$fit[addXY$Year == 1968]*100 # 6.6% non-linear change (will not be reported) # # # # ~~~ Linear GAMM Shannon diversity #### # Run a linear GAMM Overall_Shan_GAMM_linear <- gam(Est_Shannon ~ Year + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Do the checks and plot Shan_linear_viz = getViz(Overall_Shan_GAMM_linear, nsim = 0) print(plot(Shan_linear_viz), pages = 1) # Random effects ok. One outlying year check(Shan_linear_viz) # Residuals good enough # Is year effect significant? anova(Overall_Shan_GAMM_linear) #Parametric Terms: # df F p-value # Year 1 7.455 0.00637 # Get AICc AICc(Overall_Shan_GAMM_linear) # 23449.32 # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Overall_Shan_GAMM_linear, pairwise ~ NULL, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc hange using predict for teh first and last years newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Overall_Shan_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Perc_change) Just_percs # 10.4% # Merge on to Emtrends_df_2 Shannon_overall_df <- merge(Emtrends_df_2, Just_percs) Shannon_overall_df$Measure <- "Shannon" ggplot(data = Shannon_overall_df, aes(x = Measure, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Shannon diversity") # This plot will be added to other plots and saved at the end of this script # How do linear and non-linear AICcs compare? AICc(Overall_Shan_GAMM_wiggly) # 23448.33 AICc(Overall_Shan_GAMM_linear) # 23449.32 AICc(Overall_Shan_GAMM_wiggly) - AICc(Overall_Shan_GAMM_linear) # = -0.9889182 # Models are equivalent so I will go with linear # # # # ~~~~~~ Effect of deer damage in woodland on diversity #### # Run a linear GAMM BL_woodland_Shan_GAMM_linear <- gam(Est_Shannon ~ Year*Deer_damage_estimate + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = subset(Diversity_stats_7, Habitat_500m == "Deciduous_woodland")) anova(BL_woodland_Shan_GAMM_linear) # Parametric Terms: # df F p-value # Year 1 0.436 0.509 # Deer_damage_estimate 1 1.518 0.219 # Year:Deer_damage_estimate 1 1.597 0.207 # No significant interaction # # # # 1b) Modelling - Species richness and diversity - The north and south #### # ~~~ Wiggly GAMM richness #### # Make N_S a factor Diversity_stats_7$N_S <- as.factor(Diversity_stats_7$N_S) # Run a GAMM with a non-linear year effect and region interaction Region_rich_GAMM_wiggly <- gam(Est_sp_richness ~ s(Year, by = N_S) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Takes a while to run... # How does it look? Rich_region_viz = getViz(Region_rich_GAMM_wiggly, nsim = 0) print(plot(Rich_region_viz), pages = 1) # Random effects very good. Trend in north and south contrasting check(Rich_region_viz) # Residuals good # Plot model predictions (Fig. S3c) plot_smooth(Region_rich_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Species richness", ylab = "Species richness", ylim = c(0, 250)) # Saved as 5 x 5 inch pdf # Is the effect of year within each region significant? anova(Region_rich_GAMM_wiggly) # edf Ref.df F p-value # s(Year):N_SNorth 1.798 2.198 3.503 0.0326 # s(Year):N_SSouth 1.366 1.519 1.391 0.1469 # Trend in north is sig # Get AICc AICc(Region_rich_GAMM_wiggly) # 28851.79 # # # # ~~~ Linear GAMM richness #### # Run a linear GAMM Region_rich_GAMM_linear <- gam(Est_sp_richness ~ Year*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Do the checks and plot Rich_region_linear_viz = getViz(Region_rich_GAMM_linear, nsim = 0) print(plot(Rich_region_linear_viz), pages = 1) # Random effects good check(Rich_region_linear_viz) # Residuals good # Is the N_S*Year interaction sig? anova(gam) is analogous to drop1(glm) for parametric terms anova(Region_rich_GAMM_linear) # Parametric Terms: # df F p-value # Year 1 4.027 0.044884 # N_S 1 14.656 0.000132 # Year:N_S 1 13.388 0.000258 # There is a sig Year*N_S interaction # Get AICc AICc(Region_rich_GAMM_linear) # 28852.79 # Is linear or wiggly better? AICc(Region_rich_GAMM_wiggly) - AICc(Region_rich_GAMM_linear) # -0.9988898 # They are equivalent so I will go with linear # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Region_rich_GAMM_linear, pairwise ~ N_S, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for teh first and last years newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(as.factor(Diversity_stats_7$N_S))), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Region_rich_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(N_S, Perc_change) Just_percs # N_S Perc_change # North 9.08 # South -4.28 # Merge on to Emtrends_df_2 Richness_region_df <- merge(Emtrends_df_2, Just_percs) Richness_region_df$Measure <- "Richness" # Reorder north and south Richness_region_df$N_S <- factor(Richness_region_df$N_S, levels = c("South", "North")) # Plot ggplot(data = Richness_region_df, aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Richness") # This plot will be added to other plots and saved at the end of this script # # # # ~~~ Wiggly GAMM Shannon diversity #### # Make N_S a factor Diversity_stats_7$N_S <- as.factor(Diversity_stats_7$N_S) # Run a GAMM with a non-linear year effect and region interaction Region_Shan_GAMM_wiggly <- gam(Est_Shannon ~ s(Year, by = N_S) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Takes a while to run... # How does it look? Shan_region_viz = getViz(Region_Shan_GAMM_wiggly, nsim = 0) print(plot(Shan_region_viz), pages = 1) # Random effects good check(Shan_region_viz) # Residuals good but with one outlier. # The outlier is SITE 259, Santon Downham in 1985 (Conifer_plantation). Seems like a normal site-year # with high species richness so I've kept it in. # Plot model predictions (Fig. S3d) plot_smooth(Region_Shan_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Shannon diversity", ylab = "Diversity (effective common species)", ylim = c(0, 70)) # Saved as 5 x 5 inch pdf # Is year significant within each region? anova(Region_Shan_GAMM_wiggly) #Approximate significance of smooth terms: # edf Ref.df F p-value # s(Year):N_SNorth 4.078 4.580 4.908 0.000421 # s(Year):N_SSouth 7.765 8.330 3.163 0.001414 # Yes, sig in both north and south # Get the AICc AICc(Region_Shan_GAMM_wiggly) # 23430.11 # Find the significance of the interaction effect by running a model with no interaction and comparing Reduced_Region_Shan_GAMM_wiggly <- gam(Est_Shannon ~ s(Year) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) anova(Region_Shan_GAMM_wiggly, Reduced_Region_Shan_GAMM_wiggly, test = "Chisq") # Resid. Df Resid. Dev Df Deviance Pr(>Chi) # 1 2729.9 314785 # 2 2737.5 318101 -7.5766 -3316 0.0002288 *** # # # # ~~~ Linear GAMM Shannon diversity #### # Run a linear GAMM Region_Shan_GAMM_linear <- gam(Est_Shannon ~ Year*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Do the checks and plot Shan_region_linear_viz = getViz(Region_Shan_GAMM_linear, nsim = 0) print(plot(Shan_region_linear_viz), pages = 1) # Random effects ok check(Shan_region_linear_viz) # Residuals good enough # Is the N_S*Year interaction sig? anova(gam) is analogous to drop1(glm) anova(Region_Shan_GAMM_linear) # Parametric Terms: # df F p-value # Year 1 11.639 0.000655 # N_S 1 4.536 0.033268 # Year:N_S 1 4.135 0.042105 # There is a sig Year*N_S interaction (only just!) # Get AICc AICc(Region_Shan_GAMM_linear) # 23444.05 # Plotting # How does non-linear and linear compare? AICc(Region_Shan_GAMM_wiggly) - AICc(Region_Shan_GAMM_linear) # -13.93758 # The non-linear is better # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Region_Shan_GAMM_linear, pairwise ~ N_S, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for teh first and last years newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Diversity_stats_7$N_S)), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Region_Shan_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(N_S, Perc_change) Just_percs # N_S Perc_change # 1 North 20.7 # 2 South 6.97 # Merge on to Emtrends_df_2 Shannon_region_df <- merge(Emtrends_df_2, Just_percs) Shannon_region_df$Measure <- "Shannon" # Reorder north and south Shannon_region_df$N_S <- factor(Shannon_region_df$N_S, levels = c("South", "North")) ggplot(data = Shannon_region_df, aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Shannon diversity") # This plot will be added to other plots and saved at the end of this script # # # # 1c) Modelling - Species richness and diversity - 7 habitats #### # ~~~ Wiggly GAMM richness #### # Make Habitat_500m a factor Diversity_stats_7$Habitat_500m <- as.factor(Diversity_stats_7$Habitat_500m) # Run a GAMM with a non-linear year effect and habitat interaction Habitat_rich_GAMM_wiggly <- gam(Est_sp_richness ~ s(Year, by = Habitat_500m) + Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Takes a while to run... # How does it look? Rich_habitat_viz = getViz(Habitat_rich_GAMM_wiggly, nsim = 0) print(plot(Rich_habitat_viz), pages = 1) # Random effects good. Clear decline in woodland check(Rich_habitat_viz) # Residuals good anova(Habitat_rich_GAMM_wiggly) # edf Ref.df F p-value # s(Year):Habitat_500mArable 2.488 3.086 2.140 0.0942 # s(Year):Habitat_500mConifer_plantation 1.000 1.000 0.004 0.9510 # s(Year):Habitat_500mDeciduous_woodland 2.170 2.696 9.734 1.18e-05 # s(Year):Habitat_500mImproved_grassland 2.369 2.893 2.215 0.0808 # s(Year):Habitat_500mLowland_semi_natural 3.749 4.656 1.436 0.2243 # s(Year):Habitat_500mUpland 1.000 1.000 1.546 0.2138 # s(Year):Habitat_500mUrban 3.385 4.195 2.156 0.0684 # Woodland is the only sig smoother # Get the AICc AICc(Habitat_rich_GAMM_wiggly) # 28846.92 # Find the significance of the interaction effect by running a model with no interaction and comparing Reduced_Habitat_rich_GAMM_wiggly <- gam(Est_sp_richness ~ s(Year) + Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) anova(Habitat_rich_GAMM_wiggly, Reduced_Habitat_rich_GAMM_wiggly, test = "Chisq") # Resid. Df Resid. Dev Df Deviance Pr(>Chi) #1 2720.0 1849719 #2 2737.2 1880036 -17.211 -30317 0.0002738 *** # # # # ~~~~~~Plotting wiggly species richness (Habitat) for paper #### # Make a new data frame to predict from. newX <- expand.grid(Year = seq(from = min(Diversity_stats_7$Year), to = max(Diversity_stats_7$Year)), Habitat_500m = levels(as.factor(Diversity_stats_7$Habitat_500m)), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random) # Get predicted values newY <- predict(Habitat_rich_GAMM_wiggly, newdata = newX, se.fit = TRUE, exclude = c("s(fYear)","s(Year,fSITE)")) # I have tested changing randoms makes no diff # Add them together addXY <- data.frame(newX, newY) # Rename them and make the CIs addXY_2 <- addXY %>% mutate(Richness = (fit), lwr = (fit - 1.96*se.fit), upr = (fit + 1.96*se.fit)) # Rename and reorder Habitat levels in prep # Make it a character so it can be changed addXY_2$Habitat_500m <- as.character(addXY_2$Habitat_500m) # Correct names addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Get currentlevels levels(as.factor(addXY_2$Habitat_500m)) # Order them addXY_2$Habitat_500m <- factor(addXY_2$Habitat_500m, levels = c("Arable","Broadleaf woodland","Conifer plantation", "Improved grassland","'Other semi-natural'", "Upland","Urban")) # Final plot (Fig. S4c) ggplot(data = addXY_2, aes(x = Year, y = Richness)) + geom_hline(yintercept = mean(newY$fit), linetype = "dashed") + geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", colour = "black") + theme_bw() + theme(panel.border = element_blank()) + #theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Species richness") + #theme(strip.background = element_rect(color="black", fill="white", size=0.5, linetype="blank")) + facet_wrap(~Habitat_500m, nrow = 2) + ylim(0, 300) # Saved as 5 x 9 inch PDF # # # # ~~~ Linear GAMM richness #### # Run a linear GAMM Habitat_rich_GAMM_linear <- gam(Est_sp_richness ~ Year*Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Do the checks and plot Rich_habitat_linear_viz = getViz(Habitat_rich_GAMM_linear, nsim = 0) print(plot(Rich_habitat_linear_viz), pages = 1) # Random effects good check(Rich_habitat_linear_viz) # Residuals good anova(Habitat_rich_GAMM_linear) # Analogous to drop1 # Parametric Terms: # df F p-value # Year 1 0.020 0.888 # Habitat_500m 6 5.351 1.67e-05 # Year:Habitat_500m 6 5.057 3.61e-05 # There is a sig Year*Habitat interaction # Get the AICc AICc(Habitat_rich_GAMM_linear) # 28853.66 # How does linear compare to non-linear? AICc(Habitat_rich_GAMM_wiggly) - AICc(Habitat_rich_GAMM_linear) # -6.73521 # Non-linear is better # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Habitat_rich_GAMM_linear, pairwise ~ Habitat_500m, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for teh first and last years newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(Diversity_stats_7$Habitat_500m)), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Habitat_rich_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Habitat_500m, Perc_change) Just_percs # Habitat_500m Perc_change # Arable 0.571 # Conifer_plantation -0.649 # Deciduous_woodland -14.3 # Improved_grassland 2.05 # Lowland_semi_natural 7.78 # Upland 15.8 #7Urban 2.97 # Merge on to Emtrends_df_2 Richness_habitat_df <- merge(Emtrends_df_2, Just_percs) Richness_habitat_df$Measure <- "Richness" # Reorder and rename habitats # Turn them to a character so they can be renamed Richness_habitat_df$Habitat_500m <- as.character(Richness_habitat_df$Habitat_500m) # Correct names Richness_habitat_df$Habitat_500m[Richness_habitat_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" Richness_habitat_df$Habitat_500m[Richness_habitat_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" Richness_habitat_df$Habitat_500m[Richness_habitat_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland" Richness_habitat_df$Habitat_500m[Richness_habitat_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Get currentlevels levels(as.factor(Richness_habitat_df$Habitat_500m)) # Order them Richness_habitat_df$Habitat_500m <- factor(Richness_habitat_df$Habitat_500m, levels = c("Urban", "Upland", "'Other semi-natural'", "Improved grassland", "Conifer plantation", "Broadleaf woodland", "Arable")) # plot ggplot(data = Richness_habitat_df, aes(x = Habitat_500m, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + xlab("Habitat") + ggtitle("Richness") # This plot will be combined with other plots and saved at the end of this script # # # # ~~~ Wiggly GAMM Shannon diversity #### # Make Habitat_500m a factor Diversity_stats_7$Habitat_500m <- as.factor(Diversity_stats_7$Habitat_500m) # Run a GAMM with a non-linear year effect and region interaction Habitat_Shan_GAMM_wiggly <- gam(Est_Shannon ~ s(Year, by = Habitat_500m) + Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Takes a while to run... # How does it look? Shan_habitat_viz = getViz(Habitat_Shan_GAMM_wiggly, nsim = 0) print(plot(Shan_habitat_viz), pages = 1) # Random effects just about ok. Clear decline in woodland check(Rich_habitat_viz) # fine # Get significance of year smooth within each habitat anova(Habitat_Shan_GAMM_wiggly) #Approximate significance of smooth terms: # edf Ref.df F p-value #s(Year):Habitat_500mArable 1.561 1.891 8.273 0.001083 #s(Year):Habitat_500mConifer_plantation 2.544 3.225 0.635 0.647169 #s(Year):Habitat_500mDeciduous_woodland 1.562 1.904 7.855 0.001136 #s(Year):Habitat_500mImproved_grassland 2.465 3.040 5.294 0.001169 #s(Year):Habitat_500mLowland_semi_natural 1.596 1.973 1.718 0.214665 #s(Year):Habitat_500mUpland 2.892 3.589 1.568 0.185199 #s(Year):Habitat_500mUrban 1.000 1.000 12.177 0.000491 # Get AICc AICc(Habitat_Shan_GAMM_wiggly) # 23397.53 # ~~~~~~Plotting wiggly diveristy (Habitat) for paper #### # Make a new data frame to predict from. newX <- expand.grid(Year = seq(from = min(Diversity_stats_7$Year), to = max(Diversity_stats_7$Year)), Habitat_500m = levels(as.factor(Diversity_stats_7$Habitat_500m)), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random) # Get predicted values newY <- predict(Habitat_Shan_GAMM_wiggly, newdata = newX, se.fit = TRUE, exclude = c("s(fYear)","s(Year,fSITE)")) # I have tested changing randoms makes no diff # Add them together addXY <- data.frame(newX, newY) # Rename them and make the CIs addXY_2 <- addXY %>% mutate(Diversity = (fit), lwr = (fit - 1.96*se.fit), upr = (fit + 1.96*se.fit)) # Rename and reorder Habitat levels in prep # Make it a character so it can be changed addXY_2$Habitat_500m <- as.character(addXY_2$Habitat_500m) # Correct names addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Get currentlevels levels(as.factor(addXY_2$Habitat_500m)) # Order them addXY_2$Habitat_500m <- factor(addXY_2$Habitat_500m, levels = c("Arable","Broadleaf woodland","Conifer plantation", "Improved grassland","'Other semi-natural'", "Upland","Urban")) # Final plot (Fig. S4d) ggplot(data = addXY_2, aes(x = Year, y = Diversity)) + geom_hline(yintercept = mean(newY$fit), linetype = "dashed") + geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", colour = "black") + theme_bw() + theme(panel.border = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Diversity (effective common species)") + facet_wrap(~Habitat_500m, nrow = 2) + ylim(0, 90) # Saved as 5 x 9 inch PDF # # # # ~~~ Linear GAMM Shannon diversity #### # Run a linear GAMM Habitat_Shan_GAMM_linear <- gam(Est_Shannon ~ Year*Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_7) # Do the checks and plot Shan_habitat_linear_viz = getViz(Habitat_Shan_GAMM_linear, nsim = 0) print(plot(Shan_habitat_linear_viz), pages = 1) # Some random effects a little off check(Shan_habitat_linear_viz) # Residuals good enough # Is there a sig year*habitat interaction? anova(Habitat_Shan_GAMM_linear) # Analagous to drop1 #Parametric Terms: # df F p-value #Year 1 13.319 0.000268 #Habitat_500m 6 8.971 9.99e-10 #Year:Habitat_500m 6 8.606 2.70e-09 # There is a sig Year*Habitat interaction # Get the AICc AICc(Habitat_Shan_GAMM_linear) # 23401.16 # How does non-linear compare to linear AICc(Habitat_Shan_GAMM_wiggly) - AICc(Habitat_Shan_GAMM_linear) # -3.624516 #Non-linear is better # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Habitat_Shan_GAMM_linear, pairwise ~ Habitat_500m, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for teh first and last years newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(Diversity_stats_7$Habitat_500m)), fSITE = "S_1", fYear = "Y_2016") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Habitat_Shan_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Habitat_500m, Perc_change) Just_percs # Habitat_500m Perc_change #1 Arable 24.8 #2 Conifer_plantation 3.80 #3 Deciduous_woodland -15.1 #4 Improved_grassland 16.4 #5 Lowland_semi_natural 19.1 #6 Upland 28.1 #7 Urban 23.6 # Merge on to Emtrends_df_2 Shannon_habitat_df <- merge(Emtrends_df_2, Just_percs) Shannon_habitat_df$Measure <- "Shannon" # Reorder and rename habitats # Turn them to a character so they can be renamed Shannon_habitat_df$Habitat_500m <- as.character(Shannon_habitat_df$Habitat_500m) # Correct names Shannon_habitat_df$Habitat_500m[Shannon_habitat_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" Shannon_habitat_df$Habitat_500m[Shannon_habitat_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" Shannon_habitat_df$Habitat_500m[Shannon_habitat_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland" Shannon_habitat_df$Habitat_500m[Shannon_habitat_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Get currentlevels levels(as.factor(Shannon_habitat_df$Habitat_500m)) # Order them Shannon_habitat_df$Habitat_500m <- factor(Shannon_habitat_df$Habitat_500m, levels = c("Urban", "Upland", "'Other semi-natural'", "Improved grassland", "Conifer plantation", "Broadleaf woodland", "Arable")) # Plot ggplot(data = Shannon_habitat_df, aes(x = Habitat_500m, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + xlab("Habitat") + ggtitle("Shannon") # This plot will be added to other plots and saved at the end of this script # # # # 1d) Modelling - Species richness and diversity - Habitat*region interaction #### # Data prep # Get only the woodland sites and make a new Hab_region var Diversity_stats_hab_region <- Diversity_stats_7 %>% dplyr::filter(Habitat_500m == "Deciduous_woodland") %>% dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S)) # ~~~ Wiggly GAMM richness (broadleaf woodland only) #### # Make N_S a factor Diversity_stats_hab_region$N_S <- as.factor(Diversity_stats_hab_region$N_S) # Run a GAMM with a non-linear year effect and region interaction Hab_region_rich_GAMM_wiggly <- gam(Est_sp_richness ~ s(Year, by = N_S) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_hab_region) # How does it look? Rich_hab_region_viz = getViz(Hab_region_rich_GAMM_wiggly, nsim = 0) print(plot(Rich_hab_region_viz), pages = 1) # Random effects ok check(Rich_hab_region_viz) # Residuals fine # Plot model predictions (Fig. S6c) plot_smooth(Hab_region_rich_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Species richness", ylim = c(0,270), ylab = "Species richness") # Saved as 5 x 5 inch pdf summary(Hab_region_rich_GAMM_wiggly) # Both smoothers sig #Approximate significance of smooth terms: # edf Ref.df F p-value # s(Year):N_SNorth 5.705 6.705 3.202 0.00270 ** # s(Year):N_SSouth 2.380 2.916 8.777 2.38e-05 *** # Get the AICc AICc(Hab_region_rich_GAMM_wiggly) # 5349.431 # Get significance of interaction by comparing full to reduced model Reduced_Hab_region_rich_GAMM_wiggly <- gam(Est_sp_richness ~ s(Year) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_hab_region) anova(Hab_region_rich_GAMM_wiggly, Reduced_Hab_region_rich_GAMM_wiggly, test = "Chisq") # Resid. Df Resid. Dev Df Deviance Pr(>Chi) # 1 491.82 329107 # 2 497.26 335338 -5.4415 -6230.8 0.1116 # No sig interaction between region and year for broadleaf woodlands # # # # ~~~ Linear GAMM richness (broadleaf woodland only) #### # Run a linear GAMM Hab_region_rich_GAMM_linear <- gam(Est_sp_richness ~ Year*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_hab_region) # Do the checks and plot Rich_hab_region_linear_viz = getViz(Hab_region_rich_GAMM_linear, nsim = 0) print(plot(Rich_hab_region_linear_viz), pages = 1) # Random effects a little off check(Rich_hab_region_linear_viz) # Residuals good enough anova(Hab_region_rich_GAMM_linear) # Analagous to drop1 #Parametric Terms: # df F p-value #Year 1 5.503 0.0194 #N_S 1 1.084 0.2983 #Year:N_S 1 0.865 0.3528 # There is no sig Year*Region interaction for woodlands # Get AICc AICc(Hab_region_rich_GAMM_linear) # 5358.454 # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Hab_region_rich_GAMM_linear, pairwise ~ N_S, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Diversity_stats_hab_region$N_S)), fSITE = "S_46", fYear = "Y_1993") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Hab_region_rich_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(N_S, Perc_change) Just_percs # N_S Perc_change # 1 North -13.1 # 2 South -14.4 # Merge on to Emtrends_df_2 Richness_hab_region_df <- merge(Emtrends_df_2, Just_percs) Richness_hab_region_df$Measure <- "Richness" # Plot ggplot(data = Richness_hab_region_df, aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + xlab("Region") + ggtitle("Richness") # This plot will be added to other plots and saved at the end of this script # # # # ~~~ Wiggly GAMM Shannon diversity (broadleaf woodland only) #### # Make N_S a factor Diversity_stats_hab_region$N_S <- as.factor(Diversity_stats_hab_region$N_S) # Run a GAMM with a non-linear year effect and region interaction Hab_region_Shan_GAMM_wiggly <- gam(Est_Shannon ~ s(Year, by = N_S) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_hab_region) # Quick to run now for some reason # How does it look? Shan_hab_region_viz = getViz(Hab_region_Shan_GAMM_wiggly, nsim = 0) print(plot(Shan_hab_region_viz), pages = 1) # Random effects ok. Smoother shapes wigglier than expected check(Shan_hab_region_viz) # Plot (Fig. S6d) plot_smooth(Hab_region_Shan_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Shannon diversity", ylim = c(0,100), ylab = "Diversity (effective common species)") # Saved as 5 x 5 inch pdf summary(Hab_region_Shan_GAMM_wiggly) #Approximate significance of smooth terms: # edf Ref.df F p-value #s(Year):N_SNorth 1.832 2.232 0.766 0.5294 #s(Year):N_SSouth 8.387 8.803 5.311 6.45e-07 *** # Get AICc AICc(Hab_region_Shan_GAMM_wiggly) # 4465.368 # Find significance of interaction effect by comparing full and reuduced model Reduced_Hab_region_Shan_GAMM_wiggly <- gam(Est_Shannon ~ s(Year) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_hab_region) anova(Hab_region_Shan_GAMM_wiggly, Reduced_Hab_region_Shan_GAMM_wiggly, test = "Chisq") # Resid. Df Resid. Dev Df Deviance Pr(>Chi) #1 489.06 68440 #2 494.49 73040 -5.4302 -4600.2 4.137e-06 *** # # # # ~~~ Linear GAMM shannon diversity (broadleaf woodland only) #### # Run a linear GAMM Hab_region_Shan_GAMM_linear <- gam(Est_Shannon ~ Year*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Diversity_stats_hab_region) # Do the checks and plot Shan_hab_region_linear_viz = getViz(Hab_region_Shan_GAMM_linear, nsim = 0) print(plot(Shan_hab_region_linear_viz), pages = 1) # Random effects just about ok check(Shan_hab_region_linear_viz) # Residuals good enough anova(Hab_region_Shan_GAMM_linear) # Analagous to drop1 #Parametric Terms: # df F p-value #Year 1 0.200 0.6547 #N_S 1 5.714 0.0172 #Year:N_S 1 5.362 0.0210 # There is a sig Year*Region interaction in woodland # Get AICc AICc(Hab_region_Shan_GAMM_linear) # 4482.204 # How does linear compare to non-linear? AICc(Hab_region_Shan_GAMM_wiggly) - AICc(Hab_region_Shan_GAMM_linear) # -16.83565 # Non-linear is better # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Hab_region_Shan_GAMM_linear, pairwise ~ N_S, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Diversity_stats_hab_region$N_S)), fSITE = "S_46", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Hab_region_Shan_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(N_S, Perc_change) Just_percs # N_S Perc_change # 1 North -3.73 # 2 South -19.0 # Merge on to Emtrends_df_2 Shannon_hab_region_df <- merge(Emtrends_df_2, Just_percs) Shannon_hab_region_df$Measure <- "Shannon" # Plot ggplot(data = Shannon_hab_region_df, aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + xlab("Region") + ggtitle("Shannon diversity") # This plot will be added to other plots and saved at the end of this script # # # # ~~~~~ #### # 2. Aggregate abundance change #### # 2. Aggregate abundance change across: # a) All sites # b) The north and south # c) The 7 habitat categories # d) in broadleaf woodland/improved grassland in the north/south # In addition to the poptends and GAMMs, I will also model with linear model above for direct # comparison # ~ Data prep #### # The dfs I need are names(Annual_aggregate_totals) names(Site_info) names(Site_year_completeness) # Prepare to merge these together Site_info_2 <- Site_info Site_info_2$Site_name <- NULL Site_year_completeness_2 <- Site_year_completeness Site_year_completeness_2$Site_name_db <- NULL Site_year_completeness_2$Site_name_db_nogaps <- NULL # Some renaming Annual_aggregate_totals <- Annual_aggregate_totals %>% dplyr::rename(Year = CalYear, SITE = RIS.TrapCode) # Merge Annual_aggregate_totals_2 <- merge(Annual_aggregate_totals, Site_info_2, by = c("SITE")) Annual_aggregate_totals_3 <- merge(Annual_aggregate_totals_2, Site_year_completeness_2, by = c("SITE", "Year")) # Check names(Annual_aggregate_totals_3) # Now cut down data to desirable sites and site-years # Get rid of anything outside the 1968 - 2016 range and also the Ireland and Channel Isles sites # and keep only "Complete" site-years Annual_aggregate_totals_4 <- Annual_aggregate_totals_3 %>% dplyr::filter(Year >= 1968 & Year <= 2016) %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Region != "Ireland") %>% dplyr::filter(Complete_type_Paul == "Complete") # Make a new variables for Actual_years Actual_years_ran <- Annual_aggregate_totals_4 %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on Annual_aggregate_totals_5 <- merge(Annual_aggregate_totals_4, Actual_years_ran, by = "SITE") # And cut it down to sites with at least three Actual_years running Annual_aggregate_totals_6 <- Annual_aggregate_totals_5 %>% dplyr::filter(Actual_years >= 3) # Make Annual_count an interger too Annual_aggregate_totals_6$Annual_count <- as.integer(round(Annual_aggregate_totals_6$Annual_count)) # Make an fSITE number so the spaces in the site names doesn't mess things up Annual_aggregate_totals_6$fSITE <- as.factor(paste0("S_",Annual_aggregate_totals_6$SITE)) # Make a year factor for a random effect Annual_aggregate_totals_6$fYear <- as.factor(paste0("Y_", Annual_aggregate_totals_6$Year)) # Get some summary statistics. # How many moths in all? sum(Annual_aggregate_totals_6$Annual_count) # 8,829,484 moths # # # # 2a) Modelling - aggregate abundance - All sites #### # How many sites and site-years? Annual_aggregate_totals_6 %>% # group_by(Habitat_500m) %>% dplyr::summarise(Number = length(unique(fSITE))) # 266 sites # How many sites and site years? Annual_aggregate_totals_6 %>% # group_by(Habitat_500m) %>% dplyr::mutate(Site_year = paste0(fSITE,"_",fYear)) %>% dplyr::summarise(Number = length(unique(Site_year))) # 3055 site-years 3055/266 # mean of 11.5 years per site # ~~~ Wiggly GAMM abundance #### # Run a GAMM with a non-linear year effect Overall_ab_GAMM_wiggly <- gam(Annual_count ~ s(Year) + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_6) # Takes a while because of the nb # Model checks Ab_viz = getViz(Overall_ab_GAMM_wiggly, nsim = 0) print(plot(Ab_viz), pages = 1) # One of the site random effects a little off. Clear decline check(Ab_viz) # QQ plot not great # Plot model predictions (Fig. S2a) plot_smooth(Overall_ab_GAMM_wiggly, view = "Year", main = "Total abundance", transform = "exp", ylim = c(0,3000), ylab = "Total abundance") # Quite linear # Saved as 5 x 5 inch pdf newX <- expand.grid(Year = seq(from = min(Annual_aggregate_totals_6$Year), to = max(Annual_aggregate_totals_6$Year), length = 49), fSITE = "S_22", fYear = "Y_1968") # (Selecting # random effects at random. I have tested that changing random effects has no effect on predictions) newY <- predict(Overall_ab_GAMM_wiggly, newdata = newX, se.fit = TRUE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) addXY_2 <- addXY %>% mutate(fit = exp(fit), lwr = exp(fit - 1.96*se.fit), upr = exp(fit + 1.96*se.fit)) Abundance_perc_change <- (addXY_2$fit[addXY_2$Year == 2016] - addXY_2$fit[addXY_2$Year == 1968])/ addXY_2$fit[addXY_2$Year == 1968]*100 Abundance_perc_change # -33.91189% anova(Overall_ab_GAMM_wiggly) # edf Ref.df Chi.sq p-value #s(Year) 1.010 1.011 19.67 1.01e-05 AICc(Overall_ab_GAMM_wiggly) # 48543.94 # # # # # # # # ~~~ Linear GAMM abundance #### # Run a linear GAMM Overall_ab_GAMM_linear <- gam(Annual_count ~ Year + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_6) # Do the checks and plot Ab_linear_viz = getViz(Overall_ab_GAMM_linear, nsim = 0) print(plot(Ab_linear_viz), pages = 1) # A few outlying sites check(Ab_linear_viz) # QQ plot dodgy summary(Overall_ab_GAMM_linear) #Parametric coefficients: # Estimate Std. Error z value Pr(>|z|) # (Intercept) 24.783324 3.879914 6.388 1.69e-10 *** # Year -0.008629 0.001948 -4.430 9.43e-06 *** # Year has sig pos effect anova(Overall_ab_GAMM_linear) # Parametric Terms: # df Chi.sq p-value # Year 1 19.62 9.43e-06 # AICcs AICc(Overall_ab_GAMM_linear) # 48541.55 # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Overall_ab_GAMM_linear, pairwise ~ NULL, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Overall_ab_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Perc_change) Just_percs # -33.9% # Merge on to Emtrends_df_2 Ab_overall_df <- merge(Emtrends_df_2, Just_percs) Ab_overall_df$Measure <- "Abundance" ggplot(data = Ab_overall_df, aes(x = Measure, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Total abundance") # This plot will be combined with other plots and saved at the end of this script # # # # ~~~~~~ Effect of deer damage in woodland on abundance #### # Run a linear GAMM BL_woodland_ab_GAMM_linear <- gam(Annual_count ~ Year*Deer_damage_estimate + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Deciduous_woodland")) anova(BL_woodland_ab_GAMM_linear) # Parametric Terms: # df Chi.sq p-value # Year 1 10.455 0.00122 # Deer_damage_estimate 1 3.531 0.06022 # Year:Deer_damage_estimate 1 3.448 0.06335 # # # # ~~~~~**Site exclusion loop - jackknife sensitivity analysis ** #### # This loop removes one site from the dataset at a time and reruns the modl without it. This # is to see if there are any individual sites having overwhelming effects on the model # Use the df Ab_overall_df as made above to store results. The 'all sites' result will # be included as a control Ab_overall_df_exclusions <- Ab_overall_df Ab_overall_df_exclusions$Excluded_site <- 0 # Make a vector of sites Site_vec <- unique(Annual_aggregate_totals_6$SITE) # 266 sites # Run the loop for(i_SITE in 1:length(Site_vec)){ print(paste0("Excluding site ", Site_vec[i_SITE])) # Run model with one site missing Overall_ab_GAMM_linear_excl <- gam(Annual_count ~ Year + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, SITE != Site_vec[i_SITE])) # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Overall_ab_GAMM_linear_excl, pairwise ~ NULL, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) # If S_22 is excluded then I cannot use this as a random effect. Use an 'if' to get round this if(Site_vec[i_SITE] != 22){ newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") }else{ newX <- expand.grid(Year = c(1968,2016), fSITE = "S_1", fYear = "Y_1968") } newY <- predict(Overall_ab_GAMM_linear_excl, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Perc_change) print(Just_percs) # Merge on to Emtrends_df_2 Ab_overall_df_2 <- merge(Emtrends_df_2, Just_percs) Ab_overall_df_2$Measure <- "Abundance" Ab_overall_df_2$Excluded_site <- Site_vec[i_SITE] # Bind this on to the existing one Ab_overall_df_exclusions <- dplyr::bind_rows(Ab_overall_df_exclusions, Ab_overall_df_2) # Print progress to screen print(paste0("Total progress: ", round(i_SITE/length(Site_vec)*100), "%")) # Save csv (optional) ##write.csv(Ab_overall_df_exclusions, "Data/Jackknife_results/Ab_overall_df_exclusions.csv", row.names = FALSE) print(Sys.time()) } # Read back in if need be ##Ab_overall_df_exclusions <- read.csv("Data/Jackknife_results/Ab_overall_df_exclusions.csv") # Add on Actual_years Ab_overall_df_exclusions$SITE <- Ab_overall_df_exclusions$Excluded_site Runs <- Annual_aggregate_totals_6 %>% dplyr::select(SITE, Actual_years) %>% distinct() Exclusions <- merge(Ab_overall_df_exclusions, Runs, by = "SITE", all.x = TRUE) ggplot(Exclusions, aes(x = Actual_years, y = Perc_change)) + geom_point(alpha = 0.5, size = 2) + xlab("Years running of excluded trap") + ylab("Percentage change (1968 - 2016)") # Saved as 3 x 3 inch pdf (Fig S11a) # # # # # # # 2b) Modelling - aggregate abundance - The north and south #### # How many sites and site-years? Annual_aggregate_totals_6 %>% group_by(N_S) %>% dplyr::summarise(Number = length(unique(fSITE))) # N_S Number # North 89 # South 177 # How many sites and site years? Annual_aggregate_totals_6 %>% group_by(N_S) %>% dplyr::mutate(Site_year = paste0(fSITE,"_",fYear)) %>% dplyr::summarise(Number = length(unique(Site_year))) # N_S Number # North 1052 # South 2003 # Mean years per site were: 1052/89 # 11.8 north 2003/177 # 11.3 south # # # # ~~~ Wiggly GAMM abundance #### # Make N_S a factor Annual_aggregate_totals_6$N_S <- as.factor(Annual_aggregate_totals_6$N_S) # Run a GAMM with a non-linear year effect and region interaction Region_ab_GAMM_wiggly <- gam(Annual_count ~ s(Year, by = N_S) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_6) # Takes a while to run... # How does it look? Ab_region_viz = getViz(Region_ab_GAMM_wiggly, nsim = 0) print(plot(Ab_region_viz), pages = 1) # Random effects a little off. Clear decline in both. check(Ab_region_viz) # Residuals good enough # Plot model predictions (Fig. S3a) plot_smooth(Region_ab_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Total abundance", ylab = "Total abundance", ylim = c(0,3500), transform = "exp") # Saved as 5 x 5 inch pdf summary(Region_ab_GAMM_wiggly) # Approximate significance of smooth terms: # edf Ref.df Chi.sq p-value # s(Year):N_SNorth 1.133 1.193 5.164 0.0244 * # s(Year):N_SSouth 2.528 3.101 29.384 2.39e-06 *** # Both smoothers sig. North not strongly so AICc(Region_ab_GAMM_wiggly) # 48538.17 # # # # ~~~~~~Poptrends for abundance and region #### # To see if there are any notable periods of decline and increase, I will run a poptrend for each North_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, N_S == "North")) South_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, N_S == "South")) change(North_pop, 1968, 2016) # -24% (-41%, -2.5%) change(South_pop, 1968, 2016) # -38% (-49%, -23%) plot(North_pop) plot(South_pop) # No non-linear goings on, so I won't plot these # # # # ~~~ Linear GAMM abundance #### # Run a linear GAMM Region_ab_GAMM_linear <- gam(Annual_count ~ Year*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_6) # Do the checks and plot Ab_region_linear_viz = getViz(Region_ab_GAMM_linear, nsim = 0) print(plot(Ab_region_linear_viz), pages = 1) # Random effects good check(Ab_region_linear_viz) # Is the N_S*Year interaction sig? anova(gam) is analogous to drop1(glm) anova(Region_ab_GAMM_linear) # Parametric Terms: # df Chi.sq p-value # Year 1 7.010 0.00811 # N_S 1 5.993 0.01436 # Year:N_S 1 5.911 0.01505 # There is a sig Year*N_S interaction AICc(Region_ab_GAMM_linear) # 48537.04 # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Region_ab_GAMM_linear, pairwise ~ N_S, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_aggregate_totals_6$N_S)), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Region_ab_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(N_S, Perc_change) Just_percs #N_S Perc_change # 1 North -24.7 # 2 South -37.5 # Merge on to Emtrends_df_2 Ab_region_df <- merge(Emtrends_df_2, Just_percs) Ab_region_df$Measure <- "Abundance" # Reorder north and south Ab_region_df$N_S <- factor(Ab_region_df$N_S, levels = c("South", "North")) ggplot(data = Ab_region_df, aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Abundance") # This plot will be added to other plots and saved at the end of this script # # # # ~~~~~**Site exclusion loop - jackknife sensitivity analysis ** #### # This loop removes one site from the dataset at a time and reruns the modl without it. This # is to see if there are any individual sites having overwhelming effects on the model # Use the df Ab_region_df as made above to store results. The 'no sites excluded' result will # be included as a control Ab_region_df_exclusions <- Ab_region_df Ab_region_df_exclusions$Excluded_site <- 0 # Make a vector of sites Site_vec <- unique(Annual_aggregate_totals_6$SITE) # 266 sites # Run the loop for(i_SITE in 1:length(Site_vec)){ print(paste0("Excluding site ", Site_vec[i_SITE])) # Run model with one site missing Region_ab_GAMM_linear_excl <- gam(Annual_count ~ Year*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, SITE != Site_vec[i_SITE])) # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Region_ab_GAMM_linear_excl, pairwise ~ N_S, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years # If S_22 is excluded then I cannot use this as a random effect. Use an 'if' to get round this if(Site_vec[i_SITE] != 22){ newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_aggregate_totals_6$N_S)), fSITE = "S_22", fYear = "Y_1968") }else{ newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_aggregate_totals_6$N_S)), fSITE = "S_1", fYear = "Y_1968") } newY <- predict(Region_ab_GAMM_linear_excl, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(N_S, Perc_change) print(Just_percs) # Merge on to Emtrends_df_2 Ab_region_df_2 <- merge(Emtrends_df_2, Just_percs) Ab_region_df_2$Measure <- "Abundance" Ab_region_df_2$Excluded_site <- Site_vec[i_SITE] # Bind this on to the existing one Ab_region_df_exclusions <- dplyr::bind_rows(Ab_region_df_exclusions, Ab_region_df_2) # Save csv (optional) ##write.csv(Ab_region_df_exclusions, "Data/Jackknife_results/Ab_region_df_exclusions.csv", row.names = FALSE) # Print progress to screen print(paste0(round(i_SITE/length(Site_vec)*100, digits = 1), "% done")) } # Read back in if needs be ##Ab_region_df_exclusions <- read.csv("Data/Jackknife_results/Ab_region_df_exclusions.csv") # Add on Actual_years Ab_region_df_exclusions$SITE <- Ab_region_df_exclusions$Excluded_site Runs <- Annual_aggregate_totals_6 %>% dplyr::select(SITE, Actual_years, Site_name_db) %>% distinct() Exclusions_region <- merge(Ab_region_df_exclusions, Runs, by = "SITE", all.x = TRUE) # Cut out all site-region combos where the site is not in the region being modelled Site_and_region <- Site_info_2 %>% dplyr::select(SITE, N_S) Exclusions_region_2 <- merge(Site_and_region, Exclusions_region, by = c("SITE", "N_S"), all.x = TRUE) # Get rid of NAs Exclusions_region_2 <- na.omit(Exclusions_region_2) # Plot (Fig. S11b) ggplot(Exclusions_region_2, aes(x = Actual_years, y = Perc_change)) + geom_point(alpha = 0.5, size = 2) + facet_wrap(~N_S) + ylab("Percentage change (1968 - 2016)") + xlab("Years running of excluded trap") + geom_hline(yintercept = 0, linetype = "dashed") # Saved as 3 x 6 inch pdf # Plot effect of Actual_years with error bars ggplot(data = Exclusions_region_2, aes(x = Actual_years, y = Year.trend)) + geom_point(position = position_dodge(0.3)) + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), position = position_dodge(0.3), width = 0.1) + geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Total abundance") + facet_wrap(~N_S) # Trends in the south are very robust to the removal of single sites. # In the north, the decline could be as mild as -18% (non-sig) and as severe as -40%! # With no sites removed, the trend in the north was -25%, so that's a difference of 7% in one # direction and 15% in the other. # 2c) Modelling - aggregate abundance - 7 habitats #### # How many sites and site-years? Annual_aggregate_totals_6 %>% group_by(Habitat_500m) %>% dplyr::summarise(Number = length(unique(fSITE))) #Habitat_500m Number #1 Arable 60 #2 Conifer_plantation 11 #3 Deciduous_woodland 42 #4 Improved_grassland 76 #5 Lowland_semi_natural 12 #6 Upland 12 #7 Urban 53 # Site years Annual_aggregate_totals_6 %>% group_by(Habitat_500m) %>% dplyr::mutate(Site_year = paste0(fSITE,"_",fYear)) %>% dplyr::summarise(Number = length(unique(Site_year))) # Habitat_500m Number #1 Arable 609 #2 Conifer_plantation 138 #3 Deciduous_woodland 566 #4 Improved_grassland 868 #5 Lowland_semi_natural 158 #6 Upland 155 #7 Urban 561 # ~~~ Wiggly GAMM abundance #### # Make Habitat_500m a factor Annual_aggregate_totals_6$Habitat_500m <- as.factor(Annual_aggregate_totals_6$Habitat_500m) # Run a GAMM with a non-linear year effect and region interaction Habitat_ab_GAMM_wiggly <- gam(Annual_count ~ s(Year, by = Habitat_500m) + Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_6) # Takes a while to run... # How does it look? Ab_habitat_viz = getViz(Habitat_ab_GAMM_wiggly, nsim = 0) print(plot(Ab_habitat_viz), pages = 1) # Only one random site effect off. Smooths as expected check(Ab_habitat_viz) # QQ plot dodgy anova(Habitat_ab_GAMM_wiggly) #s(Year):Habitat_500mArable 1.012 1.023 0.395 0.537307 #s(Year):Habitat_500mConifer_plantation 3.720 4.681 11.981 0.034529 #s(Year):Habitat_500mDeciduous_woodland 3.437 4.307 61.613 < 2e-16 #s(Year):Habitat_500mImproved_grassland 1.001 1.002 9.642 0.001913 #s(Year):Habitat_500mLowland_semi_natural 1.339 1.600 3.470 0.149948 #s(Year):Habitat_500mUpland 2.576 3.212 20.830 0.000165 #s(Year):Habitat_500mUrban 4.914 6.012 52.256 < 2e-16 AICc(Habitat_ab_GAMM_wiggly) # 48458.11 # Make a reduced model with no interaction effect to get significance of year*habitat interaction Reduced_Habitat_ab_GAMM_wiggly <- gam(Annual_count ~ s(Year) + Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_6) # What is the significance of dropping the Year*Habitat interaction? anova(Habitat_ab_GAMM_wiggly, Reduced_Habitat_ab_GAMM_wiggly, test= "Chisq") # Resid. Df Resid. Dev Df Deviance Pr(>Chi) #1 2713.2 47720 #2 2740.5 47863 -27.313 -142.52 < 2.2e-16 *** # # # # ~~~~~~Plotting wiggly abundance (Habitat) for paper #### # Make a new data frame to predict from. newX <- expand.grid(Year = seq(from = min(Annual_aggregate_totals_6$Year), to = max(Annual_aggregate_totals_6$Year)), Habitat_500m = levels(as.factor(Annual_aggregate_totals_6$Habitat_500m)), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random) # Get predicted values newY <- predict(Habitat_ab_GAMM_wiggly, newdata = newX, se.fit = TRUE, exclude = c("s(fYear)","s(Year,fSITE)")) # I have tested changing randoms makes no diff # Add them together addXY <- data.frame(newX, newY) # Rename them and make the CIs addXY_2 <- addXY %>% mutate(Abundance = exp(fit), lwr = exp(fit - 1.96*se.fit), upr = exp(fit + 1.96*se.fit)) # Rename and reorder Habitat levels in prep # Make it a character so it can be changed addXY_2$Habitat_500m <- as.character(addXY_2$Habitat_500m) # Correct names addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Get currentlevels levels(as.factor(addXY_2$Habitat_500m)) # Order them addXY_2$Habitat_500m <- factor(addXY_2$Habitat_500m, levels = c("Arable","Broadleaf woodland","Conifer plantation", "Improved grassland","'Other semi-natural'", "Upland","Urban")) # Final plot (Fig. S4a) ggplot(data = addXY_2, aes(x = Year, y = Abundance)) + geom_hline(yintercept = mean(exp(newY$fit)), linetype = "dashed") + geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", colour = "black") + theme_bw() + theme(panel.border = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Total annual abundance") + facet_wrap(~Habitat_500m, nrow = 2) + ylim(0,6700) # Saved as 5 x 9 inch PDF # # # # ~~~~~~Poptrends for abundance and habitat #### # TO see if there are any notable periods of decline and increase, I will run a poptrend for each Arable_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Arable")) Grass_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Improved_grassland")) Woods_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Deciduous_woodland")) Conifer_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Conifer_plantation")) Semi_nat_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Lowland_semi_natural")) Urban_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Urban")) Upland_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Upland")) # Plot Fig. S5 plot(Arable_pop, main = "Arable", ylim = c(0.3, 1.8)) plot(Grass_pop, main = "Improved grassland", ylim = c(0.3, 1.8)) plot(Woods_pop, main = "Broadleaf woodland", ylim = c(0.3, 1.8)) plot(Conifer_pop, main = "Conifer plantation", ylim = c(0.3, 1.8)) plot(Semi_nat_pop, main = "'Other semi-natural", ylim = c(0.3, 1.8)) plot(Urban_pop, main = "Urban", ylim = c(0.3, 1.8)) plot(Upland_pop, main = "Upland", ylim = c(0.3, 1.8)) # All saved as 5 x 5 inch pdfs # # # # ~~~ Linear GAMM abundance #### # Run a linear GAMM Habitat_ab_GAMM_linear <- gam(Annual_count ~ Year*Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_6) # Do the checks and plot Ab_habitat_linear_viz = getViz(Habitat_ab_GAMM_linear, nsim = 0) print(plot(Ab_habitat_linear_viz), pages = 1) # Random effects good check(Ab_habitat_linear_viz) # Residuals good enough # anova(gam) is analogous to drop1(glm) anova(Habitat_ab_GAMM_linear) # Parametric Terms: # df Chi.sq p-value #Year 1 0.443 0.506 #Habitat_500m 6 40.369 3.85e-07 #Year:Habitat_500m 6 40.381 3.83e-07 # There is a sig Year*Habitat interaction AICc(Habitat_ab_GAMM_linear) # 48512.61 # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Habitat_ab_GAMM_linear, pairwise ~ Habitat_500m, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(as.factor(Annual_aggregate_totals_6$Habitat_500m))), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Habitat_ab_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Habitat_500m, Perc_change) Just_percs # Habitat_500m Perc_change #1 Arable -7.44 #2 Conifer_plantation -40.5 #3 Deciduous_woodland -51.2 #4 Improved_grassland -28.0 #5 Lowland_semi_natural -28.5 #6 Upland -42.6 #7 Urban -44.4 # Merge on to Emtrends_df_2 Ab_habitat_df <- merge(Emtrends_df_2, Just_percs) Ab_habitat_df$Measure <- "Abundance" # Reorder and rename habitats # Turn them to a character so they can be renamed Ab_habitat_df$Habitat_500m <- as.character(Ab_habitat_df$Habitat_500m) # Correct names Ab_habitat_df$Habitat_500m[Ab_habitat_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" Ab_habitat_df$Habitat_500m[Ab_habitat_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" Ab_habitat_df$Habitat_500m[Ab_habitat_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland" Ab_habitat_df$Habitat_500m[Ab_habitat_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Get currentlevels levels(as.factor(Ab_habitat_df$Habitat_500m)) # Order them Ab_habitat_df$Habitat_500m <- factor(Ab_habitat_df$Habitat_500m, levels = c("Urban", "Upland", "'Other semi-natural'", "Improved grassland", "Conifer plantation", "Broadleaf woodland", "Arable")) # Plot ggplot(data = Ab_habitat_df, aes(x = Habitat_500m, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Abundance") # This plot will be added to other plots later and saved at the end of the script # # # # ~~~~~**Site exclusion loop - jackknife sensitivity analysis** #### # This loop removes one site from the dataset at a time and reruns the modl without it. This # is to see if there are any individual sites having overwhelming effects on the model # Use the df Ab_habitat_df as made above to store results. The 'no sites excluded' result will # be included as a control Ab_habitat_df_exclusions <- Ab_habitat_df Ab_habitat_df_exclusions$Excluded_site <- 0 # Make a vector of sites Site_vec <- unique(Annual_aggregate_totals_6$SITE) # 266 sites # Run the loop for(i_SITE in 1:length(Site_vec)){ print(paste0("Excluding site ", Site_vec[i_SITE])) # Run model with one site missing Habitat_ab_GAMM_linear_excl <- gam(Annual_count ~ Year*Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_6, SITE != Site_vec[i_SITE])) # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Habitat_ab_GAMM_linear_excl, pairwise ~ Habitat_500m, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years # If S_22 is excluded then I cannot use this as a random effect. Use an 'if' to get round this if(Site_vec[i_SITE] != 22){ newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(as.factor(Annual_aggregate_totals_6$Habitat_500m))), fSITE = "S_22", fYear = "Y_1968") }else{ newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(as.factor(Annual_aggregate_totals_6$Habitat_500m))), fSITE = "S_1", fYear = "Y_1968") } newY <- predict(Habitat_ab_GAMM_linear_excl, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Habitat_500m, Perc_change) print(Just_percs) # Merge on to Emtrends_df_2 Ab_habitat_df_2 <- merge(Emtrends_df_2, Just_percs) Ab_habitat_df_2$Measure <- "Abundance" Ab_habitat_df_2$Excluded_site <- Site_vec[i_SITE] # Reorder and rename habitats # Turn them to a character so they can be renamed Ab_habitat_df_2$Habitat_500m <- as.character(Ab_habitat_df_2$Habitat_500m) # Correct names Ab_habitat_df_2$Habitat_500m[Ab_habitat_df_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" Ab_habitat_df_2$Habitat_500m[Ab_habitat_df_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" Ab_habitat_df_2$Habitat_500m[Ab_habitat_df_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland" Ab_habitat_df_2$Habitat_500m[Ab_habitat_df_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Bind this on to the existing one Ab_habitat_df_exclusions <- dplyr::bind_rows(Ab_habitat_df_exclusions, Ab_habitat_df_2) # Save csv (optional) ##write.csv(Ab_habitat_df_exclusions, "Data/Jackknife_results/Ab_habitat_df_exclusions.csv", row.names = FALSE) # Print prgress to screen print(paste0(round(i_SITE/length(Site_vec)*100, digits = 1), "% done")) } # Read back in if needs be ##Ab_habitat_df_exclusions <- read.csv("Data/Jackknife_results/Ab_habitat_df_exclusions.csv") # Add on Actual_years Ab_habitat_df_exclusions$SITE <- Ab_habitat_df_exclusions$Excluded_site Runs <- Annual_aggregate_totals_6 %>% dplyr::select(SITE, Actual_years) %>% distinct() Exclusions_habitat <- merge(Ab_habitat_df_exclusions, Runs, by = "SITE", all.x = TRUE) # Cut out all site-region combos where the site is not in the region being modeled Site_and_hab <- Site_info_2 %>% dplyr::select(SITE, Habitat_500m) # Rename habitats to match other data frame Site_and_hab$Habitat_500m <- as.factor(Site_and_hab$Habitat_500m) levels(as.factor(Site_and_hab$Habitat_500m)) levels(Site_and_hab$Habitat_500m) <- c("Arable", "Conifer plantation", "Broadleaf woodland", "Improved grassland", "'Other semi-natural'", "Upland", "Urban") levels(as.factor(Site_and_hab$Habitat_500m)) # Merge on to the new data frame Exclusions_habitat_2 <- merge(Site_and_hab, Exclusions_habitat, by = c("SITE", "Habitat_500m"), all.x = TRUE) # Get rid of NAs Exclusions_habitat_2 <- na.omit(Exclusions_habitat_2) # Plot (Fig. S11c) ggplot(Exclusions_habitat_2, aes(x = Actual_years, y = Perc_change)) + geom_point(alpha = 0.5, size = 2) + facet_wrap(~Habitat_500m) + ylab("Percentage change (1968 - 2016)") + xlab("Years running of excluded trap") + geom_hline(yintercept = 0, linetype = "dashed") # Saved as 6 x 6 inch pdf # Plot effect of Actual_years with error bars ggplot(data = Exclusions_habitat, aes(x = Actual_years, y = Year.trend)) + geom_point(position = position_dodge(0.3)) + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), position = position_dodge(0.3), width = 0.1) + geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Total abundance") + facet_wrap(~Habitat_500m) # Of the seven habitat types, only two (Broadleaf woodland and Urban) were truly robust to removal of # single sites. The trend in 'other semi-natural' habitat could be altered quite severely by # the removal of single sites) # # # # 2d) Modelling - abundance - Habitat*region interaction #### # Data prep # Get only the woodland and grassland sites and make a new Hab_region var Annual_aggregate_totals_hab_region <- Annual_aggregate_totals_6 %>% dplyr::filter(Habitat_500m == "Deciduous_woodland" | Habitat_500m == "Improved_grassland") %>% dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S)) # ~~~~~**Site exclusion loop including improved grassland - jackknife sensitivity analysis ** #### # This loop removes one site from the dataset at a time and reruns the modl without it. This # is to see if there are any individual sites having overwhelming effects on the model # I will not include inproved grassland in the final model as the estimates were much to unreliable # for this habitat in the north. This section is just to demonstrate why I left improved grassland out. # Run a linear GAMM (full model with no excluded sites) Hab_region_ab_GAMM_linear <- gam(Annual_count ~ Year*Habitat_500m*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_hab_region) # Get model predictions for first and last year newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(as.factor(Annual_aggregate_totals_hab_region$N_S))), Habitat_500m = c(levels(droplevels(as.factor(Annual_aggregate_totals_hab_region$Habitat_500m)))), fSITE = "S_1", fYear = "Y_1968") newY <- predict(Hab_region_ab_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) # Get percentage change estimates Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Habitat_500m, N_S, Perc_change) Just_percs # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Hab_region_ab_GAMM_linear, pairwise ~ N_S|Habitat_500m, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Merge on to Emtrends_df_2 Ab_hab_region_df <- merge(Emtrends_df_2, Just_percs) Ab_hab_region_df$Measure <- "Abundance" # Recreate and rename hab_regions Ab_hab_region_df$Hab_region[Ab_hab_region_df$Habitat_500m == "Deciduous_woodland" & Ab_hab_region_df$N_S == "North"] <- "Broadleaf woodland (North)" Ab_hab_region_df$Hab_region[Ab_hab_region_df$Habitat_500m == "Deciduous_woodland" & Ab_hab_region_df$N_S == "South"] <- "Broadleaf woodland (South)" Ab_hab_region_df$Hab_region[Ab_hab_region_df$Habitat_500m == "Improved_grassland" & Ab_hab_region_df$N_S == "North"] <- "Improved grassland (North)" Ab_hab_region_df$Hab_region[Ab_hab_region_df$Habitat_500m == "Improved_grassland" & Ab_hab_region_df$N_S == "South"] <- "Improved grassland (South)" # Use the df Ab_hab_region_df as made above to store results. The 'no sites excluded' result will # be included as a control Ab_hab_region_df_exclusions <- Ab_hab_region_df Ab_hab_region_df_exclusions$Excluded_site <- 0 # Make a vector of sites Site_vec <- unique(Annual_aggregate_totals_hab_region$SITE) # 118 sites # Run the loop for(i_SITE in 1:length(Site_vec)){ # Print progress to screen print(paste0("Excluding site ", Site_vec[i_SITE])) # Run model with one site excluded at a time Hab_region_ab_GAMM_linear_excl <- gam(Annual_count ~ Year*N_S*Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_hab_region, SITE != Site_vec[i_SITE])) # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Hab_region_ab_GAMM_linear_excl, pairwise ~ N_S|Habitat_500m, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years # If S_34 is excluded then I cannot use this as a random effect. Use an 'if' to get round this if(Site_vec[i_SITE] != 34){ newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(as.factor(Annual_aggregate_totals_hab_region$N_S))), Habitat_500m = c(levels(droplevels(as.factor(Annual_aggregate_totals_hab_region$Habitat_500m)))), fSITE = "S_34", fYear = "Y_1968") }else{ newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(as.factor(Annual_aggregate_totals_hab_region$N_S))), Habitat_500m = c(levels(droplevels(as.factor(Annual_aggregate_totals_hab_region$Habitat_500m)))), fSITE = "S_1", fYear = "Y_1968") } newY <- predict(Hab_region_ab_GAMM_linear_excl, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Habitat_500m, N_S, Perc_change) print(Just_percs) # Merge on to Emtrends_df_2 Ab_hab_region_df_2 <- merge(Emtrends_df_2, Just_percs) Ab_hab_region_df_2$Measure <- "Abundance" Ab_hab_region_df_2$Excluded_site <- Site_vec[i_SITE] # Bind this on to the existing one Ab_hab_region_df_exclusions <- dplyr::bind_rows(Ab_hab_region_df_exclusions, Ab_hab_region_df_2) # Recreate and rename hab_regions Ab_hab_region_df_exclusions$Hab_region[Ab_hab_region_df_exclusions$Habitat_500m == "Deciduous_woodland" & Ab_hab_region_df_exclusions$N_S == "North"] <- "Broadleaf woodland (North)" Ab_hab_region_df_exclusions$Hab_region[Ab_hab_region_df_exclusions$Habitat_500m == "Deciduous_woodland" & Ab_hab_region_df_exclusions$N_S == "South"] <- "Broadleaf woodland (South)" Ab_hab_region_df_exclusions$Hab_region[Ab_hab_region_df_exclusions$Habitat_500m == "Improved_grassland" & Ab_hab_region_df_exclusions$N_S == "North"] <- "Improved grassland (North)" Ab_hab_region_df_exclusions$Hab_region[Ab_hab_region_df_exclusions$Habitat_500m == "Improved_grassland" & Ab_hab_region_df_exclusions$N_S == "South"] <- "Improved grassland (South)" # Save csv (optional) ##write.csv(Ab_hab_region_df_exclusions, "Data/Jackknife_results/Ab_hab_region_df_exclusions.csv", row.names = FALSE) # Print progress to screen print(paste0(round(i_SITE/length(Site_vec)*100, digits = 1), "% done")) } # Read back in if need be ##Ab_hab_region_df_exclusions <- read.csv("Data/Jackknife_results/Ab_hab_region_df_exclusions.csv") # Add on Actual_years Ab_hab_region_df_exclusions$SITE <- Ab_hab_region_df_exclusions$Excluded_site Runs <- Annual_aggregate_totals_hab_region %>% dplyr::select(SITE, Actual_years) %>% distinct() Exclusions_hab_region <- merge(Ab_hab_region_df_exclusions, Runs, by = "SITE", all.x = TRUE) # Cut out all site-hab_region combos where the site is not in the region being modeled Site_and_hab_region <- Site_info_2 %>% dplyr::select(SITE, Habitat_500m, N_S) %>% dplyr::filter(Habitat_500m == "Deciduous_woodland" | Habitat_500m == "Improved_grassland") %>% dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S)) # Rename habitats to match other data frame Site_and_hab_region$Hab_region <- as.factor(Site_and_hab_region$Hab_region) levels(as.factor(Site_and_hab_region$Hab_region)) levels(Site_and_hab_region$Hab_region) <- c("Broadleaf woodland (North)", "Broadleaf woodland (South)", "Improved grassland (North)", "Improved grassland (South)") levels(as.factor(Site_and_hab_region$Hab_region)) # Merge on to the new data frame Exclusions_hab_region_2 <- merge(Site_and_hab_region, Exclusions_hab_region, by = c("SITE", "Hab_region"), all.x = TRUE) # Get rid of NAs Exclusions_hab_region_2 <- na.omit(Exclusions_hab_region_2) # Plot (Fig. S11d) ggplot(Exclusions_hab_region_2, aes(x = Actual_years, y = Perc_change)) + geom_point(alpha = 0.5, size = 2) + facet_wrap(~Hab_region) + xlab("Years running of excluded trap") + ylab("Percentage change (1968 - 2016)")+ geom_hline(yintercept = 0, linetype = "dashed") # Saved as 5 x 5 inch pdf # # # # ~~~ Wiggly GAMM abundance (broadleaf woodland only) #### # Data prep # Get only the broadleaf woodland sites and make a new Hab_region var Annual_aggregate_totals_hab_region <- Annual_aggregate_totals_6 %>% dplyr::filter(Habitat_500m == "Deciduous_woodland") %>% dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S)) # How many sites and site-years? Annual_aggregate_totals_hab_region %>% group_by(Hab_region) %>% dplyr::summarise(Number = length(unique(fSITE))) # Hab_region Number #1 Deciduous_woodland_North 18 #2 Deciduous_woodland_South 24 # Site years Annual_aggregate_totals_hab_region %>% group_by(Hab_region) %>% dplyr::mutate(Site_year = paste0(fSITE,"_",fYear)) %>% dplyr::summarise(Number = length(unique(Site_year))) # Hab_region Number #1 Deciduous_woodland_North 237 #2 Deciduous_woodland_South 329 # Make Region a factor Annual_aggregate_totals_hab_region$N_S <- as.factor(Annual_aggregate_totals_hab_region$N_S) # Run a GAMM with a non-linear year effect and region interaction Hab_region_ab_GAMM_wiggly <- gam(Annual_count ~ s(Year, by = N_S) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_hab_region) # How does it look? Ab_hab_region_viz = getViz(Hab_region_ab_GAMM_wiggly, nsim = 0) print(plot(Ab_hab_region_viz), pages = 1) # Random effects fine. Smoother shapes as expected check(Ab_hab_region_viz) # # Residuals fine # Plot model predictions (Fig. S6a) plot_smooth(Hab_region_ab_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Abundance", transform = "exp", ylim = c(0,6100), ylab = "Total abundance") # Saved as 5 x 5 inch pdf summary(Hab_region_ab_GAMM_wiggly) # All smoothers sig #Approximate significance of smooth terms: # edf Ref.df Chi.sq p-value # s(Year):N_SNorth 3.875 4.706 38.78 1.25e-06 *** # s(Year):N_SSouth 1.849 2.093 57.37 < 2e-16 *** # Make a reduced model Reduced_Hab_region_ab_GAMM_wiggly <- gam(Annual_count ~ s(Year) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_hab_region) # Is the year*region interaction significant? anova(Hab_region_ab_GAMM_wiggly, Reduced_Hab_region_ab_GAMM_wiggly, test = "Chisq") # Resid. Df Resid. Dev Df Deviance Pr(>Chi) #1 471.68 9053.1 #2 477.55 9089.0 -5.8651 -35.851 2.568e-06 *** AICc(Hab_region_ab_GAMM_wiggly) # 9258.684 # # # # ~~~ Linear GAMM abundance (broadleaf woodland only) #### # Run a linear GAMM Hab_region_ab_GAMM_linear <- gam(Annual_count ~ Year*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), family = nb, data = Annual_aggregate_totals_hab_region) # Do the checks and plot Ab_hab_region_linear_viz = getViz(Hab_region_ab_GAMM_linear, nsim = 0) print(plot(Ab_hab_region_linear_viz), pages = 1) # Random effects a little off check(Ab_hab_region_linear_viz) # Residuals fine anova(Hab_region_ab_GAMM_linear) # Analagous to drop1 # Parametric Terms: # df Chi.sq p-value #Year 1 9.833 0.00171 #N_S 1 9.881 0.00167 #Year:N_S 1 9.836 0.00171 # There is a sig Year*Region interaction in woodlands AICc(Hab_region_ab_GAMM_linear) # 9273.784 # How do linear and non-linear compare? AICc(Hab_region_ab_GAMM_wiggly) - AICc(Hab_region_ab_GAMM_linear) # -15.09963. Non-linear is better # Plotting # First get rid of any levels clinging on Annual_aggregate_totals_hab_region <- droplevels(Annual_aggregate_totals_hab_region) # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Hab_region_ab_GAMM_linear, pairwise ~ N_S, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_aggregate_totals_hab_region$N_S)), fSITE = "S_46", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Hab_region_ab_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Get it back on response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(N_S, Perc_change) Just_percs #N_S Perc_change # 1 North -34.2 # 2 South -56.0 # Merge on to Emtrends_df_2 Ab_hab_region_df <- merge(Emtrends_df_2, Just_percs) Ab_hab_region_df$Measure <- "Abundance" # Plot ggplot(data = Ab_hab_region_df, aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + xlab("Region") + ggtitle("Abundance") # This plot will be combined with other plots and saved at the end of this script # # # # ~~~~~ poptrend for broadleaf woodland north and south #### North_woods_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_hab_region, N_S == "North")) South_woods_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + s(Year, fSITE, bs = "re"), family = nb, data = subset(Annual_aggregate_totals_hab_region, N_S == "South")) # Plot (Fig. S7) plot(North_woods_pop, main = "North \n Broadleaf woodland", ylim = c(0.3, 2.6)) plot(South_woods_pop, main = "South \n Broadleaf woodland", ylim = c(0.3, 2.6)) # Each saved as a 5 x 5 inch pdf # # # # ~~~ #### # 3. Biomass change #### # ~ Data prep #### # The dfs I need are names(Annual_species_totals) names(Site_info) names(Site_year_completeness) # First get a biomass for each site-year # Cut out only the biomass from Biomass_df Biomass_df <- Biomass_df %>% dplyr::select(RIS_code, Biomass_Kinsella_direct) # Rename Annual_species_totals in prep Annual_species_totals_2 <- Annual_species_totals %>% dplyr::rename(Year = CalYear, SITE = "RIS.TrapCode", RIS_code = "Code.RISgeneric") # Merge it on Annual_species_totals_3 <- merge(Annual_species_totals_2, Biomass_df, by = "RIS_code", all.x = TRUE) # Create a Total_biomass by multiplying the biomass per species by the number of that species caught Annual_species_totals_3$Total_biomass <- Annual_species_totals_3$Annual_count*Annual_species_totals_3$Biomass_Kinsella_direct # Squash it down to one total per site-year Annual_biomass <- Annual_species_totals_3 %>% dplyr::group_by(SITE, Year) %>% dplyr::summarise(Total_biomass = sum(Total_biomass, na.rm = TRUE), Annual_count = sum(Annual_count)) # Prepare to merge on site info Site_year_completeness_2 <- Site_year_completeness Site_year_completeness_2$Site_name_db <- NULL Site_year_completeness_2$Site_name_db_nogaps <- NULL # Merge Annual_biomass_2 <- merge(Annual_biomass, Site_info, by = c("SITE")) Annual_biomass_3 <- merge(Annual_biomass_2, Site_year_completeness_2, by = c("SITE", "Year")) # Check names(Annual_biomass_3) # Now cut down data to desirable sites and site-years # First, get rid of anything outside the 1968 - 2016 range and also the Ireland and Channel Isles sites # and Annual_biomass_4 <- Annual_biomass_3 %>% dplyr::filter(Year >= 1968 & Year <= 2016) %>% dplyr::filter(Region != "Channel Islands") %>% dplyr::filter(Region != "Ireland") %>% dplyr::filter(Complete_type_Paul == "Complete") # Make a new log(biomass) variable Annual_biomass_4$Log_biomass <- log(Annual_biomass_4$Total_biomass) # Make a new variables for Actual_years Actual_years_ran <- Annual_biomass_4 %>% dplyr::group_by(SITE) %>% dplyr::summarise(Actual_years = length(unique(Year))) # Merge it on Annual_biomass_5 <- merge(Annual_biomass_4, Actual_years_ran, by = "SITE") # And cut it down to sites with at least three Actual_years running Annual_biomass_6 <- Annual_biomass_5 %>% dplyr::filter(Actual_years >= 3) # Make an fSITE number so the spaces in the site names doesn't mess things up Annual_biomass_6$fSITE <- as.factor(paste0("S_",Annual_biomass_6$SITE)) # Make a year factor for a random effect Annual_biomass_6$fYear <- as.factor(paste0("Y_", Annual_biomass_6$Year)) # # # # 3a) Modelling - Biomass - All sites #### # ~~~ Wiggly GAMM biomass #### # Run a GAMM with a non-linear year effect Overall_biomass_GAMM_wiggly <- gam(Log_biomass ~ s(Year) + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_6) # Model checks Biomass_viz = getViz(Overall_biomass_GAMM_wiggly, nsim = 0) print(plot(Biomass_viz), pages = 1) # Some of the site random effects a little off. Clear decline check(Biomass_viz) # Residuals good enough # Plot model predictions (Fig. S2b) plot_smooth(Overall_biomass_GAMM_wiggly, view = "Year", main = "Biomass", transform = "exp", ylim = c(0,80000), ylab = "Total biomass (mg)") # Saved as 5 x 5 inch pdf # newX <- expand.grid(Year = seq(from = min(Annual_biomass_6$Year), to = max(Annual_biomass_6$Year), length = 49), fSITE = "S_22", fYear = "Y_1968") # (Selecting # random effects at random. I have tested that changing random effects has no effect on predictions) newY <- predict(Overall_biomass_GAMM_wiggly, newdata = newX, se.fit = TRUE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back on response scale addXY$fit <- exp(addXY$fit) (addXY$fit[addXY$Year == 2016] - addXY$fit[addXY$Year == 1968])/ addXY$fit[addXY$Year == 1968]*100 # -39.26477 anova(Overall_biomass_GAMM_wiggly) # edf Ref.df F p-value #s(Year) 1.00 1.00 29.87 <2e-16 AICc(Overall_biomass_GAMM_wiggly) # 2335.065 # # # # ~~~ Linear GAMM biomass #### # Run a linear GAMM Overall_biomass_GAMM_linear <- gam(Log_biomass ~ Year + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_6) # Do the checks and plot Biomass_linear_viz = getViz(Overall_biomass_GAMM_linear, nsim = 0) print(plot(Biomass_linear_viz), pages = 1) # A few outlying sites check(Biomass_linear_viz) # Residuals good enough anova(Overall_biomass_GAMM_linear) #Parametric Terms: # df F p-value # Year 1 29.87 5.04e-08 AICc(Overall_biomass_GAMM_linear) # 2335.064 # How does linear and non-linear compare? AICc(Overall_biomass_GAMM_wiggly) - AICc(Overall_biomass_GAMM_linear) # 0.0005776261 # Models are equivalent so I will stick with linear # # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Overall_biomass_GAMM_linear, pairwise ~ NULL, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Overall_biomass_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Perc_change) Just_percs # -39.3% # Merge on to Emtrends_df_2 Biomass_overall_df <- merge(Emtrends_df_2, Just_percs) Biomass_overall_df$Measure <- "Biomass" ggplot(data = Biomass_overall_df, aes(x = Measure, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Biomass") # This plot will be combined with other plots and saved at the end if this script ### # Redo this but with MacGregor et al. (2019)'s sites only #### MacG_sites_data <- Annual_biomass_6 %>% dplyr::filter(SITE == 1 | SITE == 22 | SITE == 46 | SITE == 34 | SITE == 39 | SITE == 88 | SITE == 97 | SITE == 131 | SITE == 149 | SITE == 168 | SITE == 212 | SITE == 187 | SITE == 293 | SITE == 261 | SITE == 277 | SITE == 274 | SITE == 292 | SITE == 264 | SITE == 111 | SITE == 289 | SITE == 350 | SITE == 336 | SITE == 346 | SITE == 331 | SITE == 368 | SITE == 382 | SITE == 370 | SITE == 398 | SITE == 424 | SITE == 416 | SITE == 446 | SITE == 451 | SITE == 467 | SITE == 477) # Run a linear GAMM Overall_biomass_GAMM_linear_MacG <- gam(Log_biomass ~ Year + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = MacG_sites_data) anova(Overall_biomass_GAMM_linear_MacG) #Parametric Terms: # df F p-value # Year 1 10.88 0.001 # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Overall_biomass_GAMM_linear_MacG, pairwise ~ NULL, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Overall_biomass_GAMM_linear_MacG, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Perc_change) Just_percs # -29.7% A bit less severe than when including all sites # # # # Deer damage and woodland - Section added for second draft Jan 2022 #### # Run a linear GAMM BL_woodland_biomass_GAMM_linear <- gam(Log_biomass ~ Year*Deer_damage_estimate + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = subset(Annual_biomass_6, Habitat_500m == "Deciduous_woodland")) anova(BL_woodland_biomass_GAMM_linear) # Parametric Terms: # df F p-value # Year 1 11.401 0.000804 # Deer_damage_estimate 1 3.803 0.051830 # Year:Deer_damage_estimate 1 3.748 0.053544 # No significant year interaction # # # # 3b) Modelling - biomass - The north and south #### # ~~~ Wiggly GAMM biomass #### # Make N_S a factor Annual_biomass_6$N_S <- as.factor(Annual_biomass_6$N_S) # Run a GAMM with a non-linear year effect and region interaction Region_biomass_GAMM_wiggly <- gam(Log_biomass ~ s(Year, by = N_S) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_6) # How does it look? Biomass_region_viz = getViz(Region_biomass_GAMM_wiggly, nsim = 0) print(plot(Biomass_region_viz), pages = 1) # Some random effects a little off. check(Biomass_region_viz) ## Resiudals good enough # Plot model predictions (Fig. S3b) plot_smooth(Region_biomass_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Biomass", ylab = "Biomass (mg)", transform = "exp", ylim = c(0, 90000)) # Saved as 5 x 5 inch pdf summary(Region_biomass_GAMM_wiggly) # Only south sig (v. strongly so) # Approximate significance of smooth terms: # edf Ref.df F p-value #s(Year):N_SNorth 2.276 2.772 1.957 0.152 #s(Year):N_SSouth 1.858 2.120 20.415 <2e-16 *** # Get the significance of the interaction Reduced_Region_biomass_GAMM_wiggly <- gam(Log_biomass ~ s(Year) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_6) anova(Region_biomass_GAMM_wiggly, Reduced_Region_biomass_GAMM_wiggly, test = "Chisq") # Resid. Df Resid. Dev Df Deviance Pr(>Chi) # 1 2738.2 304.30 # 2 2741.6 307.99 -3.4397 -3.693 4.926e-07 *** AICc(Region_biomass_GAMM_wiggly) # 2305.6 # # # # ~~~ Linear GAMM biomass #### # Run a linear GAMM Region_biomass_GAMM_linear <- gam(Log_biomass ~ Year*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_6) # Do the checks and plot Biomass_region_linear_viz = getViz(Region_biomass_GAMM_linear, nsim = 0) print(plot(Biomass_region_linear_viz), pages = 1) # Random effects slightly off check(Biomass_region_linear_viz) # Residuals good enough # anova(gam) is analogous to drop1(glm) anova(Region_biomass_GAMM_linear) #Parametric Terms: # df F p-value #Year 1 4.406 0.0359 #N_S 1 25.767 4.11e-07 #Year:N_S 1 25.465 4.80e-07 # There is a sig Year*N_S interaction AICc(Region_biomass_GAMM_linear) # 18414.32 # How does linear compare to non-linear? AICc(Region_biomass_GAMM_wiggly) - AICc(Region_biomass_GAMM_linear) # -0.794888 # Models are equivalent so I will stick with linear # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Region_biomass_GAMM_linear, pairwise ~ N_S, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_biomass_6$N_S)), fSITE = "S_1", fYear = "Y_2016") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Region_biomass_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(N_S, Perc_change) Just_percs # N_S Perc_change #1 North -20.0 #2 South -46.3 # Merge on to Emtrends_df_2 Biomass_region_df <- merge(Emtrends_df_2, Just_percs) Biomass_region_df$Measure <- "Biomass" # Reorder north and south Biomass_region_df$N_S <- factor(Biomass_region_df$N_S, levels = c("South", "North")) ggplot(data = Biomass_region_df, aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Biomass") # This plot will be added to other plots and saved at the end of this script # # # # 3c) Modelling - biomass - 7 habitats #### # ~~~ Wiggly GAMM biomass #### # Make Habitat_500m a factor Annual_biomass_6$Habitat_500m <- as.factor(Annual_biomass_6$Habitat_500m) # Run a GAMM with a non-linear year effect and region interaction Habitat_biomass_GAMM_wiggly <- gam(Log_biomass ~ s(Year, by = Habitat_500m) + Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_6) # How does it look? Biomass_habitat_viz = getViz(Habitat_biomass_GAMM_wiggly, nsim = 0) print(plot(Biomass_habitat_viz), pages = 1) # Some random effects a little off check(Biomass_habitat_viz) # Residuals good enough summary(Habitat_biomass_GAMM_wiggly) # All smooths apart from Upland and Semi-nat sig. # edf Ref.df F p-value # s(Year):Habitat_500mArable 7.224 8.217 2.175 0.02942 * # s(Year):Habitat_500mConifer_plantation 3.868 4.849 2.818 0.01842 * # s(Year):Habitat_500mDeciduous_woodland 2.507 3.149 16.825 < 2e-16 *** # s(Year):Habitat_500mImproved_grassland 1.000 1.000 18.256 2.06e-05 *** # s(Year):Habitat_500mLowland_semi_natural 1.369 1.643 5.481 0.01283 * # s(Year):Habitat_500mUpland 2.777 3.457 4.092 0.00459 ** # s(Year):Habitat_500mUrban 4.840 5.915 7.897 < 2e-16 *** # Make a reduced model to test significance of interaction effect Reduced_Habitat_biomass_GAMM_wiggly <- gam(Log_biomass ~ s(Year) + Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_6) anova(Habitat_biomass_GAMM_wiggly, Reduced_Habitat_biomass_GAMM_wiggly, test = "Chisq") # Resid. Df Resid. Dev Df Deviance Pr(>Chi) #1 2714.8 296.8 #2 2741.8 308.2 -26.995 -11.405 3.882e-11 *** AICc(Habitat_biomass_GAMM_wiggly) # 2273.865 # # # # ~~~~~~Plotting wiggly biomass (Habitat) for paper #### # Make a new data frame to predict from. newX <- expand.grid(Year = seq(from = min(Annual_biomass_6$Year), to = max(Annual_biomass_6$Year)), Habitat_500m = levels(as.factor(Annual_biomass_6$Habitat_500m)), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random) # Get predicted values newY <- predict(Habitat_biomass_GAMM_wiggly, newdata = newX, se.fit = TRUE, exclude = c("s(fYear)","s(Year,fSITE)")) # I have tested changing randoms makes no diff # Add them together addXY <- data.frame(newX, newY) # Rename them and make the CIs addXY_2 <- addXY %>% mutate(Biomass = exp(fit), lwr = exp(fit - 1.96*se.fit), upr = exp(fit + 1.96*se.fit)) # Rename and reorder Habitat levels in prep # Make it a character so it can be changed addXY_2$Habitat_500m <- as.character(addXY_2$Habitat_500m) # Correct names addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland" addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Get current levels levels(as.factor(addXY_2$Habitat_500m)) # Order them addXY_2$Habitat_500m <- factor(addXY_2$Habitat_500m, levels = c("Arable","Broadleaf woodland","Conifer plantation", "Improved grassland","'Other semi-natural'", "Upland","Urban")) # Plot (Fig. S4b) ggplot(data = addXY_2, aes(x = Year, y = Biomass)) + geom_hline(yintercept = mean(exp(newY$fit)), linetype = "dashed") + geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", colour = "black") + theme_bw() + theme(panel.border = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Total annual biomass") + facet_wrap(~Habitat_500m, nrow = 2) + ylim(0,165000) # Saved as 5 x 9 inch PDF # # # # ~~~ Linear GAMM biomass #### # Run a linear GAMM Habitat_biomass_GAMM_linear <- gam(Log_biomass ~ Year*Habitat_500m + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_6) # Do the checks and plot Biomass_habitat_linear_viz = getViz(Habitat_biomass_GAMM_linear, nsim = 0) print(plot(Biomass_habitat_linear_viz), pages = 1) # Random effects good check(Biomass_habitat_linear_viz) # Resiudals good enough # anova(gam) is analogous to drop1(glm) anova(Habitat_biomass_GAMM_linear) #Parametric Terms: # df F p-value # Year 1 7.102 0.00775 # Habitat_500m 6 2.845 0.00917 # Year:Habitat_500m 6 2.825 0.00961 # There is a sig Year*Habitat interaction AICc(Habitat_biomass_GAMM_linear) # 2329.442 # How does linear and non-linear compare? AICc(Habitat_biomass_GAMM_wiggly) - AICc(Habitat_biomass_GAMM_linear) # -55.57666 # Non-linear is better # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Habitat_biomass_GAMM_linear, pairwise ~ Habitat_500m, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(Annual_biomass_6$Habitat_500m)), fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Habitat_biomass_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Put back onto response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(Habitat_500m, Perc_change) Just_percs # Arable -26.5 # 2 Conifer_plantation -47.4 # 3 Deciduous_woodland -51.5 # 4 Improved_grassland -36.7 # 5 Lowland_semi_natural -43.4 # 6 Upland -27.3 # 7 Urban -45.9 # Merge on to Emtrends_df_2 Biomass_habitat_df <- merge(Emtrends_df_2, Just_percs) Biomass_habitat_df$Measure <- "Biomass" # Reorder and rename habitats # Turn them to a character so they can be renamed Biomass_habitat_df$Habitat_500m <- as.character(Biomass_habitat_df$Habitat_500m) # Correct names Biomass_habitat_df$Habitat_500m[Biomass_habitat_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" Biomass_habitat_df$Habitat_500m[Biomass_habitat_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" Biomass_habitat_df$Habitat_500m[Biomass_habitat_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland" Biomass_habitat_df$Habitat_500m[Biomass_habitat_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Get currentlevels levels(as.factor(Biomass_habitat_df$Habitat_500m)) # Order them Biomass_habitat_df$Habitat_500m <- factor(Biomass_habitat_df$Habitat_500m, levels = c("Urban", "Upland", "'Other semi-natural'", "Improved grassland", "Conifer plantation", "Broadleaf woodland", "Arable")) # Plot ggplot(data = Biomass_habitat_df, aes(x = Habitat_500m, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + ggtitle("Biomass") # This plot will be added to other plots and saved at the end of this script # # # # 3d) Modelling - biomass - Habitat*region interaction #### # Get only the woodland sites and make a new Hab_region var Annual_biomass_hab_region <- Annual_biomass_6 %>% dplyr::filter(Habitat_500m == "Deciduous_woodland") %>% dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S)) # ~~~ Wiggly GAMM biomass (broadleaf woodland only) #### # Make N_S a factor Annual_biomass_hab_region$N_S <- as.factor(Annual_biomass_hab_region$N_S) # Run a GAMM with a non-linear year region interaction Hab_region_biomass_GAMM_wiggly <- gam(Log_biomass ~ s(Year, by = N_S) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_hab_region) # How does it look? Biomass_hab_region_viz = getViz(Hab_region_biomass_GAMM_wiggly, nsim = 0) print(plot(Biomass_hab_region_viz), pages = 1) # Random effects good check(Biomass_hab_region_viz) # Residuals good # Plot model predictions (Fig. S6b) plot_smooth(Hab_region_biomass_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Biomass", transform = "exp", ylim = c(0,150000), ylab = "Total biomass (mg)") # Saved as 5 x 5 inch pdf summary(Hab_region_biomass_GAMM_wiggly) # edf Ref.df F p-value # s(Year):N_SNorth 3.545 4.306 7.821 3.25e-06 *** # s(Year):N_SSouth 1.982 2.279 34.008 < 2e-16 *** # Make a reduced model Reduced_Hab_region_biomass_GAMM_wiggly <- gam(Log_biomass ~ s(Year) + N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_hab_region) anova(Hab_region_biomass_GAMM_wiggly, Reduced_Hab_region_biomass_GAMM_wiggly, test = "Chisq") #Resid. Df Resid. Dev Df Deviance Pr(>Chi) #1 474.89 30.348 #2 479.70 32.899 -4.8117 -2.5511 8.524e-08 *** AICc(Hab_region_biomass_GAMM_wiggly) # 143.8704 # # # # ~~~ Linear GAMM biomass (broadleaf woodland only) #### # Run a linear GAMM Hab_region_biomass_GAMM_linear <- gam(Log_biomass ~ Year*N_S + s(fYear, bs = "re") + s(Year, fSITE, bs = "re"), data = Annual_biomass_hab_region) # Do the checks and plot Biomass_hab_region_linear_viz = getViz(Hab_region_biomass_GAMM_linear, nsim = 0) print(plot(Biomass_hab_region_linear_viz), pages = 1) # Random effects fine check(Biomass_hab_region_linear_viz) # Residuals fine anova(Hab_region_biomass_GAMM_linear) # Analagous to drop1 # Parametric Terms: #df F p-value #Year 1 6.093 0.0139 #N_S 1 19.759 1.09e-05 #Year:N_S 1 19.751 1.09e-05 # There is a sig Year*Region interaction AICc(Hab_region_biomass_GAMM_linear) # 160.2371 # How does linear and non-linear compare? AICc(Hab_region_biomass_GAMM_wiggly) - AICc(Hab_region_biomass_GAMM_linear) # -16.36673 # Non-linear is better # Plotting # Get the estimated year coefficients with their 95% CIs and make them a df Emtrends_df <- emtrends(Hab_region_biomass_GAMM_linear, pairwise ~ N_S, var = "Year") Emtrends_df_2 <- data.frame(Emtrends_df$emtrends) # Get perc change using predict for the first and last years newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_biomass_hab_region$N_S)), fSITE = "S_46", fYear = "Y_1968") # (Selecting random effects at random. I have tested that changing random effects has no # effect on predictions) newY <- predict(Hab_region_biomass_GAMM_linear, newdata = newX, se.fit = FALSE, exclude = c("s(fYear)","s(Year,fSITE)")) addXY <- data.frame(newX, newY) # Get it back on response scale addXY$newY <- exp(addXY$newY) Perc_change <- addXY %>% pivot_wider(names_from = Year, values_from = newY) %>% rename(First = "1968", Last = "2016") %>% mutate(Perc_change = ((Last-First)/First)*100) Just_percs <- Perc_change %>% dplyr::select(N_S, Perc_change) Just_percs # N_S Perc_change # 1 North -29.3 # 2 South -60.9 # Merge on to Emtrends_df_2 Biomass_hab_region_df <- merge(Emtrends_df_2, Just_percs) Biomass_hab_region_df$Measure <- "Biomass" # Plot ggplot(data = Biomass_hab_region_df, aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + ylab("Year coefficient") + xlab("Region") + ggtitle("Biomass") # This plot will be added to other plots and saved at the end of this script # # # # ~~~ #### # Making final combined plots #### # Effect of Region on the four responses (linear) #### # Need to make a plot that is four panels wide (responses) and three panels # high (region (including all-sites)) # This will show the year coefficients from the linear models with 95% CIs. # I need the following dfs which are already made above Richness_overall_df Shannon_overall_df Ab_overall_df Biomass_overall_df Richness_region_df Shannon_region_df Ab_region_df Biomass_region_df # First need to give a new var to the overall dfs to show they are both regions combined Richness_overall_df$N_S <- "All sites" Shannon_overall_df$N_S <- "All sites" Ab_overall_df$N_S <- "All sites" Biomass_overall_df$N_S <- "All sites" # also get rid of the dud column Richness_overall_df$X1 <- NULL Shannon_overall_df$X1 <- NULL Ab_overall_df$X1 <- NULL Biomass_overall_df$X1 <- NULL # bind em all up Region_plot_df <- dplyr::bind_rows(Ab_overall_df, Biomass_overall_df, Richness_overall_df, Shannon_overall_df, Ab_region_df, Biomass_region_df, Richness_region_df, Shannon_region_df) # Change the levels order so north comes out on top Region_plot_df$N_S <- factor(Region_plot_df$N_S, levels = c("All sites", "South", "North")) # Plot them. I can't get the scales to act nicely, so I will have to plot each one separately #Abundance a <- ggplot(data = subset(Region_plot_df, Measure == "Abundance"), aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + # Leave this in/out depending on requirements ylab("Year coefficient") + ggtitle("Total abundance") + xlab(NULL) + ylim(-0.015, 0.015) # Biomass b <- ggplot(data = subset(Region_plot_df, Measure == "Biomass"), aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + ylab("Year coefficient") + ggtitle("Biomass") + xlab(NULL) + ylim(-0.019, 0.019) # Richness r <- ggplot(data = subset(Region_plot_df, Measure == "Richness"), aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + ylab("Year coefficient") + ggtitle("Species richness") + xlab(NULL) + ylim(-0.65,0.65) # Shannon d <- ggplot(data = subset(Region_plot_df, Measure == "Shannon"), aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + ylab("Year coefficient") + ggtitle("Diversity") + xlab(NULL) + ylim(-0.4,0.4) grid.arrange(a,b,r,d, nrow = 1) # Saved as 4 x 8 inch pdf (Fig. 1) # # # # Effect of Habitat on the four responses (linear) #### # Need to make a plot that is four panels wide (responses) and eight panels # high (habitat (including all-sites)) # This will show the year coefficients from the linear models with 95% CIs. # I need the following dfs which are already made above Richness_overall_df Shannon_overall_df Ab_overall_df Biomass_overall_df Richness_habitat_df Shannon_habitat_df Ab_habitat_df Biomass_habitat_df # First need to give a new var to the overall dfs to show they are both regions combined Richness_overall_df$Habitat_500m <- "All sites" Shannon_overall_df$Habitat_500m <- "All sites" Ab_overall_df$Habitat_500m <- "All sites" Biomass_overall_df$Habitat_500m <- "All sites" # also get rid of the dud column Richness_overall_df$X1 <- NULL Shannon_overall_df$X1 <- NULL Ab_overall_df$X1 <- NULL Biomass_overall_df$X1 <- NULL # bind em all up Habitat_plot_df <- dplyr::bind_rows(Ab_overall_df, Biomass_overall_df, Richness_overall_df, Shannon_overall_df, Ab_habitat_df, Biomass_habitat_df, Richness_habitat_df, Shannon_habitat_df) # change names of habitat levels # Make it a character so it can be changed Habitat_plot_df$Habitat_500m <- as.character(Habitat_plot_df$Habitat_500m) # Correct names Habitat_plot_df$Habitat_500m[Habitat_plot_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation" Habitat_plot_df$Habitat_500m[Habitat_plot_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland" Habitat_plot_df$Habitat_500m[Habitat_plot_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland" Habitat_plot_df$Habitat_500m[Habitat_plot_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'" # Change the levels Habitat_plot_df$Habitat_500m <- factor(Habitat_plot_df$Habitat_500m, levels = c("All sites", "Urban", "Upland", "'Other semi-natural'", "Improved grassland", "Conifer plantation", "Broadleaf woodland", "Arable")) # Plot them. I can't get the scales to act nicely, so I will have to plot each one separately #Abundance a <- ggplot(data = subset(Habitat_plot_df, Measure == "Abundance"), aes(x = Habitat_500m, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + # Leave this in/out depending on requirements ylab("Year coefficient") + ggtitle("Total abundance") + xlab(NULL) + ylim(-0.02, 0.02) # Biomass b <- ggplot(data = subset(Habitat_plot_df, Measure == "Biomass"), aes(x = Habitat_500m, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + ylab("Year coefficient") + ggtitle("Biomass") + xlab(NULL) + ylim(-0.025, 0.025) # Richness r <- ggplot(data = subset(Habitat_plot_df, Measure == "Richness"), aes(x = Habitat_500m, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + ylab("Year coefficient") + ggtitle("Species richness") + xlab(NULL) + ylim(-1.1, 1.1) # Shannon d <- ggplot(data = subset(Habitat_plot_df, Measure == "Shannon"), aes(x = Habitat_500m, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + ylab("Year coefficient") + ggtitle("Diversity") + xlab(NULL) + ylim(-0.45,0.45) grid.arrange(a,b,r,d, nrow = 1) # Saved as 4 x 8 inch pdf as (Fig. 2) # # # # Effect of Region (broadleaf woodland only) on the four responses (linear) #### # Need to make a plot that is four panels wide (responses) and three panels # high (region (including all-sites)) # This will show the year coefficients from the linear models with 95% CIs. # I need the following dfs which are already made above Richness_habitat_df Shannon_habitat_df Ab_habitat_df Biomass_habitat_df Richness_hab_region_df Shannon_hab_region_df Ab_hab_region_df Biomass_hab_region_df # Cut out only the woodland sites from the habitat dfs Richness_habitat_df_woods <- Richness_habitat_df %>% dplyr::filter(Habitat_500m == "Broadleaf woodland") Shannon_habitat_df_woods <- Shannon_habitat_df %>% dplyr::filter(Habitat_500m == "Broadleaf woodland") Ab_habitat_df_woods <- Ab_habitat_df %>% dplyr::filter(Habitat_500m == "Broadleaf woodland") Biomass_habitat_df_woods <- Biomass_habitat_df %>% dplyr::filter(Habitat_500m == "Broadleaf woodland") # First need to give a new var to the overall dfs to show they are both regions combined Richness_habitat_df_woods$N_S <- "All woodland sites" Shannon_habitat_df_woods$N_S <- "All woodland sites" Ab_habitat_df_woods$N_S <- "All woodland sites" Biomass_habitat_df_woods$N_S <- "All woodland sites" # bind em all up Hab_region_plot_df <- dplyr::bind_rows(Richness_habitat_df_woods, Biomass_habitat_df_woods, Ab_habitat_df_woods, Shannon_habitat_df_woods, Ab_hab_region_df, Biomass_hab_region_df, Richness_hab_region_df, Shannon_hab_region_df) # Change the levels order so north comes out on top Hab_region_plot_df$N_S <- factor(Hab_region_plot_df$N_S, levels = c("All woodland sites", "South", "North")) # Plot them. I can't get the scales to act nicely, so I will have to plot each one separately #Abundance a <- ggplot(data = subset(Hab_region_plot_df, Measure == "Abundance"), aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + # Leave this in/out depending on requirements ylab("Year coefficient") + ggtitle("Total abundance") + xlab(NULL) + ylim(-0.028, 0.028) # Biomass b <- ggplot(data = subset(Hab_region_plot_df, Measure == "Biomass"), aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + ylab("Year coefficient") + ggtitle("Biomass") + xlab(NULL) + ylim(-0.026, 0.026) # Richness r <- ggplot(data = subset(Hab_region_plot_df, Measure == "Richness"), aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + ylab("Year coefficient") + ggtitle("Species richness") + xlab(NULL) + ylim(-1.3, 1.3) # Shannon d <- ggplot(data = subset(Hab_region_plot_df, Measure == "Shannon"), aes(x = N_S, y = Year.trend)) + geom_point() + geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1 geom_hline(yintercept = 0, linetype = 2) + coord_flip() + theme_bw() + theme(panel.border = element_blank()) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + theme(axis.line = element_line(colour = "black")) + theme(axis.text.y = element_blank()) + ylab("Year coefficient") + ggtitle("Diversity") + xlab(NULL) + ylim(-0.6, 0.6) grid.arrange(a,b,r,d, nrow = 1) # Saved as 4 x 8 inch pdf as (Fig. 3) # # # # # End of script