# Moth declines are most severe in broadleaf woodlands despite a net gain in habitat availability - 
# Blumgart et al., 2022

# Script_2_General_analysis

# This script looks at how four response variables have changed over time from 1968 - 2016.
# These response variables are: abundance, biomass, species richness and diversity. It also looks
# at how these changes vary between habitats, regions and regions within habitat. Originally, 
# the interaction between region and two habitat types was going to be analysed but a jackknife
# analysis showed that trends in Improved grasslands in the north were highly sensitive to the 
# exclusion of single sites, so improved grassland was not included in the habitat-region analysis, 
# leaving only broadleaf woodland. The jackknife analysis is also presented here. 

# This script also assesses the effect of estmated deer damage at woodland sites on the trend in 
# the four response variables. 

# The following things will be analysed:

# 1. Species richness and diversity for Hill numbers 0 and 1 (i.e. richness and Shannon diversity)
#         change across:
#         a) All sites
#         b) The north and south
#         c) The 7 habitat categories
#         d) in broadleaf woodland in the north/south

# 2. Total abundance change across:
#         a) All sites
#         b) The north and south
#         c) The 7 habitat categories
#         d) in broadleaf woodland in the north/south

# 3. Biomass change across:
#         a) All sites
#         b) The north and south
#         c) The 7 habitat categories
#         d) in broadleaf woodland in the north/south


# Libraries and functions ####

library(dplyr) # For data manipulation
library(tidyr) # For data manipulation
library(ggplot2) # For plotting
library(mgcv) # For GAMMs
library(lme4) # For GLMERs
library(poptrend) # For detecting significant short-term trends
library(emmeans) # For post hoc tests and marginal means
library(MASS) # for glm.nb
library(itsadug) # for visualising GAMMs
library(mgcViz) # for visualising GAMMs
library(MuMIn) # For AICc
library(gridExtra) # For multiple panel ggplots

# # # 

# Data import ####

# For species richness I need the diversity stats (prepared in Script_1_Making_diversity_stats_data_frame)
Diversity_stats <- read.csv("Data/Diversity_stats_complete.csv")

# For abundance and biomass I need the annual totals for each species
Annual_species_totals <- read.csv("Data/All_years_all_species_all_sites.csv")

# For abundance I only need one total abundance value per site-year so this can be summarised down
Annual_aggregate_totals <- 
  Annual_species_totals %>%
  dplyr::group_by(CalYear, RIS.TrapCode) %>%
  dplyr::summarise(Annual_count = sum(Annual_count))

# Import the biomass for each species (from Kinsella et al., 2020)
Biomass_df <- read.csv("Data/Biomass_df.csv")

# Site_info
Site_info <- read.csv("Data/Site_info.csv")

# Site_year_completeness info
Site_year_completeness <- read.csv("Data/Site_year_info.csv")

# All I need from this is Paul's complete codes (these are determined manually by the Rothamsted Insect 
# Survey) 
Site_year_completeness <- 
  Site_year_completeness %>%
  dplyr::select(SITE, Year, Complete_type_Paul)

# # # 

# # # 


# 1 Species richness and diversity ####

# 1. Species richness and diversity for Hill numbers 0 - 1 (ie richness and Shannon diversity)
#         change across:
#         a) All sites
#         b) The north and south
#         c) The 7 habitat categories
#         d) Habitat*region interaction (broadleaf woodland only)

# 1. Data prep ####

# Get variable names 
names(Diversity_stats)
names(Site_info)
names(Site_year_completeness)

# Prepare to merge these together
Site_info_2 <- Site_info
Site_info_2$Site_name <- NULL

# Merge
Diversity_stats_2 <- merge(Diversity_stats, Site_info_2, by = c("SITE"))
Diversity_stats_3 <- merge(Diversity_stats_2, Site_year_completeness, by = c("SITE", "Year"))

# Check
names(Diversity_stats_3)

# Cut down data to the sites and years needed
Diversity_stats_4 <- 
  Diversity_stats_3 %>%
  dplyr::filter(Year >= 1968 & Year <= 2016) %>%
  dplyr::filter(Region != "Channel Islands") %>% # Get rid of Channel Isles and Ireland sites
  dplyr::filter(Region != "Ireland") %>%
  dplyr::filter(Complete_type_Paul == "Complete") # Use 'Complete' site-years only

# Make a new variable for Actual_years ran (between 1968 and 2016)
Actual_years_ran <-
  Diversity_stats_4 %>%
  dplyr::group_by(SITE) %>%
  dplyr::summarise(Actual_years = length(unique(Year)))

# Merge it on
Diversity_stats_5 <- merge(Diversity_stats_4, Actual_years_ran, by = "SITE")

# And cut it down to sites with at least three Actual_years running 
Diversity_stats_6 <- 
  Diversity_stats_5 %>%
  dplyr::filter(Actual_years >= 3)

# Make a site number factor
Diversity_stats_6$fSITE <- as.factor(paste0("S_",Diversity_stats_6$SITE))

# Make a year factor for a random effect
Diversity_stats_6$fYear <- as.factor(paste0("Y_", Diversity_stats_6$Year))

# Make sure Habitat_500m is a factor rather than a character
Diversity_stats_6$Habitat_500m <- as.factor(Diversity_stats_6$Habitat_500m)

# Also get rid of site-years that have NA for any of the diversity indices as these will be
# poorly sampled
Diversity_stats_7 <- 
  Diversity_stats_6 %>%
  dplyr::filter(!is.na(Est_sp_richness)) %>%
  dplyr::filter(!is.na(Est_Shannon)) %>%
  dplyr::filter(Est_Shannon != Inf)#

# Data prep complete

# # # 

# # # 

# 1a) Modelling - Species richness and diversity - All sites ####

# ~~~ Wiggly GAMM richness ####

# Run a GAMM with a non-linear year effect, a random intercept for site and a random 
# slope for year within site
Overall_rich_GAMM_wiggly <- gam(Est_sp_richness ~ s(Year) + 
                                    s(fYear, bs = "re") + 
                                    s(Year, fSITE, bs = "re"), 
                                  data = Diversity_stats_7)

# Have a look
Rich_viz = getViz(Overall_rich_GAMM_wiggly, nsim = 0)
print(plot(Rich_viz), pages = 1) # Random effects good. Richness peaks in 1990s.
check(Rich_viz) # Residuals good

# Is the year effect significant?
anova(Overall_rich_GAMM_wiggly)
#                  edf  Ref.df      F  p-value
#s(Year)         5.300   5.577  3.966 0.000635

# Yes, year is significant

# Get the AICc 
AICc(Overall_rich_GAMM_wiggly) # 28869.86

# Plot predictions for Supporting Info (Fig. S2c)
plot_smooth(Overall_rich_GAMM_wiggly, view = "Year", main = "Species richness", 
            ylab = "Species richness", ylim = c(0, 220)) 
# Saved as 5 x 5 inch pdf

# What is the percentage change?
newX <- expand.grid(Year = seq(from = min(Diversity_stats_7$Year), to = max(Diversity_stats_7$Year),
                               length = 49), fSITE = "S_1", fYear = "Y_2016") # (Selecting 
# random effects at random. I have tested that changing random effects has no effect on predictions)
newY <- predict(Overall_rich_GAMM_wiggly, newdata = newX, se.fit = TRUE, 
                exclude = c("s(fSITE)", "s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)
(addXY$fit[addXY$Year == 2016] - addXY$fit[addXY$Year == 1968])/
  addXY$fit[addXY$Year == 1968]*100
# -6.04% (this is from the non-linear model and is just for interest - will not be reported)

# # # 

# ~~~ Linear GAMM richness ####

# Run a linear GAMM
Overall_rich_GAMM_linear <- gam(Est_sp_richness ~ Year + 
                                  s(fYear, bs = "re") +
                                  s(Year, fSITE, bs = "re"),
                                data = Diversity_stats_7)

# Do the checks and plot
Rich_linear_viz = getViz(Overall_rich_GAMM_linear, nsim = 0)
print(plot(Rich_linear_viz), pages = 1) # Random effects good. 
check(Rich_linear_viz) # Residuals good

# Is year effect significant?
anova(Overall_rich_GAMM_linear)

#Parametric Terms:
#      df    F p-value
# Year  1 0.18   0.672

# Year has no effect

# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Overall_rich_GAMM_linear, pairwise ~ NULL, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Overall_rich_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Perc_change) 
Just_percs # -1.20%

# Merge on to Emtrends_df_2
Richness_overall_df <- merge(Emtrends_df_2, Just_percs)

Richness_overall_df$Measure <- "Richness"

# Plot
ggplot(data = Richness_overall_df, aes(x = Measure, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Richness")
# This plot will be added to other plots and saved at the end of this script


# How do the linear vs non-linear models compare?
AICc(Overall_rich_GAMM_wiggly) # 28869.86
AICc(Overall_rich_GAMM_linear) # 28873.81

AICc(Overall_rich_GAMM_wiggly) - AICc(Overall_rich_GAMM_linear) # = -3.95
# Non-linear is better

# # # 

# ~~~~~~ Effect of deer damage in woodland on richness ####

# Run a linear GAMM for woodland only
BL_woodland_rich_GAMM_linear <- gam(Est_sp_richness ~ Year*Deer_damage_estimate + 
                                  s(fYear, bs = "re") +
                                  s(Year, fSITE, bs = "re"),
                                data = subset(Diversity_stats_7,  Habitat_500m == "Deciduous_woodland"))

anova(BL_woodland_rich_GAMM_linear)
# Parametric Terms:
#                           df     F p-value
# Year                       1 1.140   0.286
# Deer_damage_estimate       1 0.128   0.720
# Year:Deer_damage_estimate  1 0.091   0.764

# No significant interaction

# # #

# ~~~ Wiggly GAMM Shannon diversity ####

# Run a GAMM with a non-linear year effect
Overall_Shan_GAMM_wiggly <- gam(Est_Shannon ~ s(Year) + 
                                  s(fYear, bs = "re") +
                                  s(Year, fSITE, bs = "re"), 
                                data = Diversity_stats_7)

# Model chekcs
Shan_viz = getViz(Overall_Shan_GAMM_wiggly, nsim = 0)
print(plot(Shan_viz), pages = 1) # Random effects good enough. Diversity peaks around 1990s then plateaus.
check(Shan_viz) # Resisuals good enough

# Plot model predictions (Fig. S2d)
plot_smooth(Overall_Shan_GAMM_wiggly, view = "Year", main = "Shannon diversity", 
            ylab = "Diversity (effect common species)", ylim = c(0, 62))
# Saved as 5 x 5 inch pdf

# Is year effect sig?
anova(Overall_Shan_GAMM_wiggly)
#                   edf  Ref.df      F p-value
# s(Year)         3.828   4.031  4.052 0.00284

# Get AICc
AICc(Overall_Shan_GAMM_wiggly) # 23448.33

newX <- expand.grid(Year = seq(from = min(Diversity_stats_7$Year), to = max(Diversity_stats_7$Year),
                               length = 49), fSITE = "S_1", fYear = "Y_2016") # (Selecting 
# random effects at random. I have tested that changing random effects has no effect on predictions)
newY <- predict(Overall_Shan_GAMM_wiggly, newdata = newX, se.fit = TRUE, 
                exclude = c("s(fSITE)", "s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)
(addXY$fit[addXY$Year == 2016] - addXY$fit[addXY$Year == 1968])/
  addXY$fit[addXY$Year == 1968]*100
# 6.6% non-linear change (will not be reported)

# # #


# ~~~ Linear GAMM Shannon diversity ####

# Run a linear GAMM
Overall_Shan_GAMM_linear <- gam(Est_Shannon ~ Year + 
                                  s(fYear, bs = "re") +
                                  s(Year, fSITE, bs = "re"),
                                data = Diversity_stats_7)

# Do the checks and plot
Shan_linear_viz = getViz(Overall_Shan_GAMM_linear, nsim = 0)
print(plot(Shan_linear_viz), pages = 1) # Random effects ok. One outlying year
check(Shan_linear_viz) # Residuals good enough

# Is year effect significant?
anova(Overall_Shan_GAMM_linear)
#Parametric Terms:
#      df     F p-value
# Year  1 7.455 0.00637

# Get AICc
AICc(Overall_Shan_GAMM_linear) # 23449.32


# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Overall_Shan_GAMM_linear, pairwise ~ NULL, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc hange using predict for teh first and last years
newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Overall_Shan_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Perc_change)
Just_percs # 10.4%

# Merge on to Emtrends_df_2
Shannon_overall_df <- merge(Emtrends_df_2, Just_percs)

Shannon_overall_df$Measure <- "Shannon"

ggplot(data = Shannon_overall_df, aes(x = Measure, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Shannon diversity")
# This plot will be added to other plots and saved at the end of this script

# How do linear and non-linear AICcs compare?

AICc(Overall_Shan_GAMM_wiggly) # 23448.33
AICc(Overall_Shan_GAMM_linear) # 23449.32
AICc(Overall_Shan_GAMM_wiggly) - AICc(Overall_Shan_GAMM_linear) # = -0.9889182

# Models are equivalent so I will go with linear

# # #

# ~~~~~~ Effect of deer damage in woodland on diversity ####

# Run a linear GAMM
BL_woodland_Shan_GAMM_linear <- gam(Est_Shannon ~ Year*Deer_damage_estimate + 
                                  s(fYear, bs = "re") +
                                  s(Year, fSITE, bs = "re"),
                                data = subset(Diversity_stats_7,  Habitat_500m == "Deciduous_woodland"))

anova(BL_woodland_Shan_GAMM_linear)
# Parametric Terms:
#                           df     F p-value
# Year                       1 0.436   0.509
# Deer_damage_estimate       1 1.518   0.219
# Year:Deer_damage_estimate  1 1.597   0.207

# No significant interaction

# # #

# 1b) Modelling - Species richness and diversity - The north and south ####

# ~~~ Wiggly GAMM richness ####

# Make N_S a factor
Diversity_stats_7$N_S <- as.factor(Diversity_stats_7$N_S)

# Run a GAMM with a non-linear year effect and region interaction
Region_rich_GAMM_wiggly <- gam(Est_sp_richness ~ 
                                 s(Year, by = N_S) + 
                                 N_S + 
                                 s(fYear, bs = "re") + 
                                 s(Year, fSITE, bs = "re"),
                               data = Diversity_stats_7) # Takes a while to run...

# How does it look?
Rich_region_viz = getViz(Region_rich_GAMM_wiggly, nsim = 0)
print(plot(Rich_region_viz), pages = 1) # Random effects very good. Trend in north and south contrasting
check(Rich_region_viz) # Residuals good

# Plot model predictions (Fig. S3c)
plot_smooth(Region_rich_GAMM_wiggly, view = "Year",  plot_all = c("N_S"), main = "Species richness", 
            ylab = "Species richness", ylim = c(0, 250))
# Saved as 5 x 5 inch pdf 

# Is the effect of year within each region significant?
anova(Region_rich_GAMM_wiggly)
#                      edf  Ref.df      F p-value
# s(Year):N_SNorth   1.798   2.198  3.503  0.0326
# s(Year):N_SSouth   1.366   1.519  1.391  0.1469

# Trend in north is sig

# Get AICc
AICc(Region_rich_GAMM_wiggly) # 28851.79


# # #


# ~~~ Linear GAMM richness ####

# Run a linear GAMM
Region_rich_GAMM_linear <- gam(Est_sp_richness ~ Year*N_S + 
                                 s(fYear, bs = "re") +
                                 s(Year, fSITE, bs = "re"),
                               data = Diversity_stats_7)

# Do the checks and plot
Rich_region_linear_viz = getViz(Region_rich_GAMM_linear, nsim = 0)
print(plot(Rich_region_linear_viz), pages = 1) # Random effects good
check(Rich_region_linear_viz) # Residuals good

# Is the N_S*Year interaction sig? anova(gam) is analogous to drop1(glm) for parametric terms
anova(Region_rich_GAMM_linear)
# Parametric Terms:
#          df      F  p-value
# Year      1  4.027 0.044884
# N_S       1 14.656 0.000132
# Year:N_S  1 13.388 0.000258

# There is a sig Year*N_S interaction

# Get AICc
AICc(Region_rich_GAMM_linear) # 28852.79

# Is linear or wiggly better?
AICc(Region_rich_GAMM_wiggly) - AICc(Region_rich_GAMM_linear) #  -0.9988898
# They are equivalent so I will go with linear

# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Region_rich_GAMM_linear, pairwise ~ N_S, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for teh first and last years
newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(as.factor(Diversity_stats_7$N_S))), 
                    fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Region_rich_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(N_S, Perc_change) 
Just_percs
#  N_S   Perc_change
# North        9.08
# South       -4.28

# Merge on to Emtrends_df_2
Richness_region_df <- merge(Emtrends_df_2, Just_percs)

Richness_region_df$Measure <- "Richness"

# Reorder north and south

Richness_region_df$N_S <- factor(Richness_region_df$N_S, levels = c("South", "North"))

# Plot
ggplot(data = Richness_region_df, aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Richness")
# This plot will be added to other plots and saved at the end of this script


# # # 

# ~~~ Wiggly GAMM Shannon diversity ####

# Make N_S a factor
Diversity_stats_7$N_S <- as.factor(Diversity_stats_7$N_S)

# Run a GAMM with a non-linear year effect and region interaction
Region_Shan_GAMM_wiggly <- gam(Est_Shannon ~ 
                                 s(Year, by = N_S) + 
                                 N_S + 
                                 s(fYear, bs = "re") + 
                                 s(Year, fSITE, bs = "re"),
                               data = Diversity_stats_7) # Takes a while to run...

# How does it look?
Shan_region_viz = getViz(Region_Shan_GAMM_wiggly, nsim = 0)
print(plot(Shan_region_viz), pages = 1) # Random effects good
check(Shan_region_viz) # Residuals good but with one outlier.
# The outlier is SITE 259, Santon Downham in 1985 (Conifer_plantation). Seems like a normal site-year
# with high species richness so I've kept it in.

# Plot model predictions (Fig. S3d)
plot_smooth(Region_Shan_GAMM_wiggly, view = "Year",  plot_all = c("N_S"), 
            main = "Shannon diversity", ylab = "Diversity (effective common species)", ylim = c(0, 70))
# Saved as 5 x 5 inch pdf

# Is year significant within each region?
anova(Region_Shan_GAMM_wiggly)
#Approximate significance of smooth terms:
#                      edf  Ref.df      F  p-value
# s(Year):N_SNorth   4.078   4.580  4.908 0.000421
# s(Year):N_SSouth   7.765   8.330  3.163 0.001414

# Yes, sig in both north and south

# Get the AICc
AICc(Region_Shan_GAMM_wiggly) # 23430.11


# Find the significance of the interaction effect by running a model with no interaction and comparing
Reduced_Region_Shan_GAMM_wiggly <- gam(Est_Shannon ~ 
                                         s(Year) + 
                                         N_S + 
                                         s(fYear, bs = "re") + 
                                         s(Year, fSITE, bs = "re"),
                                       data = Diversity_stats_7)

anova(Region_Shan_GAMM_wiggly, Reduced_Region_Shan_GAMM_wiggly, test = "Chisq")
#       Resid. Df Resid. Dev     Df Deviance  Pr(>Chi)    
# 1    2729.9     314785                               
# 2    2737.5     318101 -7.5766    -3316 0.0002288 ***


# # # 


# ~~~ Linear GAMM Shannon diversity ####

# Run a linear GAMM
Region_Shan_GAMM_linear <- gam(Est_Shannon ~ Year*N_S + 
                                 s(fYear, bs = "re") +
                                 s(Year, fSITE, bs = "re"),
                               data = Diversity_stats_7)

# Do the checks and plot
Shan_region_linear_viz = getViz(Region_Shan_GAMM_linear, nsim = 0)
print(plot(Shan_region_linear_viz), pages = 1) # Random effects ok
check(Shan_region_linear_viz) # Residuals good enough

# Is the N_S*Year interaction sig? anova(gam) is analogous to drop1(glm)
anova(Region_Shan_GAMM_linear)
#  Parametric Terms:
#          df      F  p-value
# Year      1 11.639 0.000655
# N_S       1  4.536 0.033268
# Year:N_S  1  4.135 0.042105

# There is a sig Year*N_S interaction (only just!)

# Get AICc
AICc(Region_Shan_GAMM_linear) # 23444.05

# Plotting

# How does non-linear and linear compare?
AICc(Region_Shan_GAMM_wiggly) - AICc(Region_Shan_GAMM_linear) # -13.93758
# The non-linear is better


# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Region_Shan_GAMM_linear, pairwise ~ N_S, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for teh first and last years
newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Diversity_stats_7$N_S)), 
                    fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Region_Shan_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(N_S, Perc_change) 
Just_percs
#  N_S   Perc_change
# 1 North       20.7 
# 2 South        6.97

# Merge on to Emtrends_df_2
Shannon_region_df <- merge(Emtrends_df_2, Just_percs)

Shannon_region_df$Measure <- "Shannon"

# Reorder north and south

Shannon_region_df$N_S <- factor(Shannon_region_df$N_S, levels = c("South", "North"))

ggplot(data = Shannon_region_df, aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Shannon diversity")
# This plot will be added to other plots and saved at the end of this script


# # # 

# 1c) Modelling - Species richness and diversity - 7 habitats ####

# ~~~ Wiggly GAMM richness ####

# Make Habitat_500m a factor
Diversity_stats_7$Habitat_500m <- as.factor(Diversity_stats_7$Habitat_500m)

# Run a GAMM with a non-linear year effect and habitat interaction
Habitat_rich_GAMM_wiggly <- gam(Est_sp_richness ~ 
                                 s(Year, by = Habitat_500m) + 
                                  Habitat_500m + 
                                 s(fYear, bs = "re") + 
                                 s(Year, fSITE, bs = "re"),
                               data = Diversity_stats_7) # Takes a while to run...

# How does it look?
Rich_habitat_viz = getViz(Habitat_rich_GAMM_wiggly, nsim = 0)
print(plot(Rich_habitat_viz), pages = 1) # Random effects good. Clear decline in woodland
check(Rich_habitat_viz) # Residuals good

anova(Habitat_rich_GAMM_wiggly)
#                                               edf  Ref.df      F  p-value
# s(Year):Habitat_500mArable                 2.488   3.086  2.140   0.0942
# s(Year):Habitat_500mConifer_plantation     1.000   1.000  0.004   0.9510
# s(Year):Habitat_500mDeciduous_woodland     2.170   2.696  9.734 1.18e-05
# s(Year):Habitat_500mImproved_grassland     2.369   2.893  2.215   0.0808
# s(Year):Habitat_500mLowland_semi_natural   3.749   4.656  1.436   0.2243
# s(Year):Habitat_500mUpland                 1.000   1.000  1.546   0.2138
# s(Year):Habitat_500mUrban                  3.385   4.195  2.156   0.0684

# Woodland is the only sig smoother

# Get the AICc
AICc(Habitat_rich_GAMM_wiggly) # 28846.92

# Find the significance of the interaction effect by running a model with no interaction and comparing
Reduced_Habitat_rich_GAMM_wiggly <- gam(Est_sp_richness ~ 
                                  s(Year) + 
                                  Habitat_500m + 
                                  s(fYear, bs = "re") + 
                                  s(Year, fSITE, bs = "re"),
                                data = Diversity_stats_7)


anova(Habitat_rich_GAMM_wiggly, Reduced_Habitat_rich_GAMM_wiggly, test = "Chisq")
#         Resid. Df Resid. Dev      Df Deviance  Pr(>Chi)    
#1    2720.0    1849719                               
#2    2737.2    1880036 -17.211   -30317 0.0002738 ***

# # #

# ~~~~~~Plotting wiggly species richness (Habitat) for paper ####

# Make a new data frame to predict from. 
newX <- expand.grid(Year = seq(from = min(Diversity_stats_7$Year), to = max(Diversity_stats_7$Year)),
                    Habitat_500m = levels(as.factor(Diversity_stats_7$Habitat_500m)),
                    fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random)

# Get predicted values
newY <- predict(Habitat_rich_GAMM_wiggly, newdata = newX, se.fit = TRUE, 
                exclude = c("s(fYear)","s(Year,fSITE)")) # I have tested changing randoms makes no diff

# Add them together
addXY <- data.frame(newX, newY)

# Rename them and make the CIs
addXY_2 <-
  addXY %>%
  mutate(Richness = (fit),
         lwr = (fit - 1.96*se.fit), 
         upr = (fit + 1.96*se.fit))

# Rename and reorder Habitat levels in prep

# Make it a character so it can be changed
addXY_2$Habitat_500m <- as.character(addXY_2$Habitat_500m)

# Correct names
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"
# Get currentlevels
levels(as.factor(addXY_2$Habitat_500m))
# Order them
addXY_2$Habitat_500m <- factor(addXY_2$Habitat_500m, 
                               levels = c("Arable","Broadleaf woodland","Conifer plantation",
                                          "Improved grassland","'Other semi-natural'",
                                          "Upland","Urban"))

# Final plot (Fig. S4c)
ggplot(data = addXY_2, aes(x = Year, y = Richness)) +
  geom_hline(yintercept =  mean(newY$fit), linetype = "dashed") +
  geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", colour = "black") +
  theme_bw() +
  theme(panel.border = element_blank()) +
  #theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Species richness") +
  #theme(strip.background = element_rect(color="black", fill="white", size=0.5, linetype="blank")) +
  facet_wrap(~Habitat_500m, nrow = 2) +
  ylim(0, 300)
# Saved as 5 x 9 inch PDF 



# # # 


# ~~~ Linear GAMM richness ####

# Run a linear GAMM
Habitat_rich_GAMM_linear <- gam(Est_sp_richness ~ Year*Habitat_500m + 
                                 s(fYear, bs = "re") +
                                 s(Year, fSITE, bs = "re"),
                               data = Diversity_stats_7)

# Do the checks and plot
Rich_habitat_linear_viz = getViz(Habitat_rich_GAMM_linear, nsim = 0)
print(plot(Rich_habitat_linear_viz), pages = 1) # Random effects good
check(Rich_habitat_linear_viz) # Residuals good

anova(Habitat_rich_GAMM_linear) # Analogous to drop1
# Parametric Terms:
#                    df     F  p-value
# Year               1 0.020    0.888
# Habitat_500m       6 5.351 1.67e-05
# Year:Habitat_500m  6 5.057 3.61e-05

# There is a sig Year*Habitat interaction

# Get the AICc
AICc(Habitat_rich_GAMM_linear) # 28853.66

# How does linear compare to non-linear?
AICc(Habitat_rich_GAMM_wiggly) - AICc(Habitat_rich_GAMM_linear) # -6.73521
# Non-linear is better

# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Habitat_rich_GAMM_linear, pairwise ~ Habitat_500m, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for teh first and last years
newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(Diversity_stats_7$Habitat_500m)), 
                    fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Habitat_rich_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Habitat_500m, Perc_change) 
Just_percs
#  Habitat_500m         Perc_change
# Arable                     0.571
# Conifer_plantation        -0.649
# Deciduous_woodland       -14.3  
# Improved_grassland         2.05 
# Lowland_semi_natural       7.78 
# Upland                    15.8  
#7Urban                      2.97 

# Merge on to Emtrends_df_2
Richness_habitat_df <- merge(Emtrends_df_2, Just_percs)

Richness_habitat_df$Measure <- "Richness"

# Reorder and rename habitats
# Turn them to a character so they can be renamed
Richness_habitat_df$Habitat_500m <- as.character(Richness_habitat_df$Habitat_500m)
# Correct names
Richness_habitat_df$Habitat_500m[Richness_habitat_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
Richness_habitat_df$Habitat_500m[Richness_habitat_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
Richness_habitat_df$Habitat_500m[Richness_habitat_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
Richness_habitat_df$Habitat_500m[Richness_habitat_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"
# Get currentlevels
levels(as.factor(Richness_habitat_df$Habitat_500m))
# Order them
Richness_habitat_df$Habitat_500m <- factor(Richness_habitat_df$Habitat_500m, 
                                           levels = c("Urban", "Upland", "'Other semi-natural'",
                                                      "Improved grassland", "Conifer plantation",
                                                      "Broadleaf woodland", "Arable"))

# plot
ggplot(data = Richness_habitat_df, aes(x = Habitat_500m, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  xlab("Habitat") +
  ggtitle("Richness")
# This plot will be combined with other plots and saved at the end of this script

# # # 

# ~~~ Wiggly GAMM Shannon diversity ####

# Make Habitat_500m a factor
Diversity_stats_7$Habitat_500m <- as.factor(Diversity_stats_7$Habitat_500m)

# Run a GAMM with a non-linear year effect and region interaction
Habitat_Shan_GAMM_wiggly <- gam(Est_Shannon ~ 
                                  s(Year, by = Habitat_500m) + 
                                  Habitat_500m + 
                                  s(fYear, bs = "re") + 
                                  s(Year, fSITE, bs = "re"),
                                data = Diversity_stats_7) # Takes a while to run...

# How does it look?
Shan_habitat_viz = getViz(Habitat_Shan_GAMM_wiggly, nsim = 0)
print(plot(Shan_habitat_viz), pages = 1) # Random effects just about ok. Clear decline in woodland
check(Rich_habitat_viz) # fine

# Get significance of year smooth within each habitat
anova(Habitat_Shan_GAMM_wiggly)
#Approximate significance of smooth terms:
#                                             edf  Ref.df      F  p-value
#s(Year):Habitat_500mArable                 1.561   1.891  8.273 0.001083
#s(Year):Habitat_500mConifer_plantation     2.544   3.225  0.635 0.647169
#s(Year):Habitat_500mDeciduous_woodland     1.562   1.904  7.855 0.001136
#s(Year):Habitat_500mImproved_grassland     2.465   3.040  5.294 0.001169
#s(Year):Habitat_500mLowland_semi_natural   1.596   1.973  1.718 0.214665
#s(Year):Habitat_500mUpland                 2.892   3.589  1.568 0.185199
#s(Year):Habitat_500mUrban                  1.000   1.000 12.177 0.000491

# Get AICc
AICc(Habitat_Shan_GAMM_wiggly) #  23397.53



# ~~~~~~Plotting wiggly diveristy (Habitat) for paper ####

# Make a new data frame to predict from. 
newX <- expand.grid(Year = seq(from = min(Diversity_stats_7$Year), to = max(Diversity_stats_7$Year)),
                    Habitat_500m = levels(as.factor(Diversity_stats_7$Habitat_500m)),
                    fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random)

# Get predicted values
newY <- predict(Habitat_Shan_GAMM_wiggly, newdata = newX, se.fit = TRUE, 
                exclude = c("s(fYear)","s(Year,fSITE)")) # I have tested changing randoms makes no diff

# Add them together
addXY <- data.frame(newX, newY)

# Rename them and make the CIs
addXY_2 <-
  addXY %>%
  mutate(Diversity = (fit),
         lwr = (fit - 1.96*se.fit), 
         upr = (fit + 1.96*se.fit))

# Rename and reorder Habitat levels in prep

# Make it a character so it can be changed
addXY_2$Habitat_500m <- as.character(addXY_2$Habitat_500m)

# Correct names
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"
# Get currentlevels
levels(as.factor(addXY_2$Habitat_500m))
# Order them
addXY_2$Habitat_500m <- factor(addXY_2$Habitat_500m, 
                               levels = c("Arable","Broadleaf woodland","Conifer plantation",
                                          "Improved grassland","'Other semi-natural'",
                                          "Upland","Urban"))

# Final plot (Fig. S4d)
ggplot(data = addXY_2, aes(x = Year, y = Diversity)) +
  geom_hline(yintercept =  mean(newY$fit), linetype = "dashed") +
  geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", colour = "black") +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Diversity (effective common species)") +
  facet_wrap(~Habitat_500m, nrow = 2) +
  ylim(0, 90)
# Saved as 5 x 9 inch PDF 

# # # 

# ~~~ Linear GAMM Shannon diversity ####

# Run a linear GAMM
Habitat_Shan_GAMM_linear <- gam(Est_Shannon ~ Year*Habitat_500m + 
                                  s(fYear, bs = "re") +
                                  s(Year, fSITE, bs = "re"),
                                data = Diversity_stats_7)

# Do the checks and plot
Shan_habitat_linear_viz = getViz(Habitat_Shan_GAMM_linear, nsim = 0)
print(plot(Shan_habitat_linear_viz), pages = 1) # Some random effects a little off
check(Shan_habitat_linear_viz) # Residuals good enough

# Is there a sig year*habitat interaction?
anova(Habitat_Shan_GAMM_linear) # Analagous to drop1
#Parametric Terms:
#                   df      F  p-value
#Year               1 13.319 0.000268
#Habitat_500m       6  8.971 9.99e-10
#Year:Habitat_500m  6  8.606 2.70e-09


# There is a sig Year*Habitat interaction

# Get the AICc
AICc(Habitat_Shan_GAMM_linear) # 23401.16

# How does non-linear compare to linear
AICc(Habitat_Shan_GAMM_wiggly) - AICc(Habitat_Shan_GAMM_linear) # -3.624516
#Non-linear is better


# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Habitat_Shan_GAMM_linear, pairwise ~ Habitat_500m, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for teh first and last years
newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(Diversity_stats_7$Habitat_500m)), 
                    fSITE = "S_1", fYear = "Y_2016") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Habitat_Shan_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Habitat_500m, Perc_change) 
Just_percs
# Habitat_500m         Perc_change
#1 Arable                     24.8 
#2 Conifer_plantation          3.80
#3 Deciduous_woodland        -15.1 
#4 Improved_grassland         16.4 
#5 Lowland_semi_natural       19.1 
#6 Upland                     28.1 
#7 Urban                      23.6 

# Merge on to Emtrends_df_2
Shannon_habitat_df <- merge(Emtrends_df_2, Just_percs)

Shannon_habitat_df$Measure <- "Shannon"

# Reorder and rename habitats
# Turn them to a character so they can be renamed
Shannon_habitat_df$Habitat_500m <- as.character(Shannon_habitat_df$Habitat_500m)
# Correct names
Shannon_habitat_df$Habitat_500m[Shannon_habitat_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
Shannon_habitat_df$Habitat_500m[Shannon_habitat_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
Shannon_habitat_df$Habitat_500m[Shannon_habitat_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
Shannon_habitat_df$Habitat_500m[Shannon_habitat_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"
# Get currentlevels
levels(as.factor(Shannon_habitat_df$Habitat_500m))
# Order them
Shannon_habitat_df$Habitat_500m <- factor(Shannon_habitat_df$Habitat_500m, 
                                           levels = c("Urban", "Upland", "'Other semi-natural'",
                                                      "Improved grassland", "Conifer plantation",
                                                      "Broadleaf woodland", "Arable"))

# Plot
ggplot(data = Shannon_habitat_df, aes(x = Habitat_500m, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  xlab("Habitat") +
  ggtitle("Shannon")
# This plot will be added to other plots and saved at  the end of this script

# # # 


# 1d) Modelling - Species richness and diversity - Habitat*region interaction ####

# Data prep

# Get only the woodland sites and make a new Hab_region var
Diversity_stats_hab_region <-
  Diversity_stats_7 %>%
  dplyr::filter(Habitat_500m == "Deciduous_woodland") %>%
  dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S))

# ~~~ Wiggly GAMM richness (broadleaf woodland only) ####

# Make N_S a factor
Diversity_stats_hab_region$N_S <- as.factor(Diversity_stats_hab_region$N_S)

# Run a GAMM with a non-linear year effect and region interaction
Hab_region_rich_GAMM_wiggly <- gam(Est_sp_richness ~ 
                                     s(Year, by = N_S) +
                                     N_S + 
                                     s(fYear, bs = "re") + 
                                     s(Year, fSITE, bs = "re"),
                                   data = Diversity_stats_hab_region) 

# How does it look?
Rich_hab_region_viz = getViz(Hab_region_rich_GAMM_wiggly, nsim = 0)
print(plot(Rich_hab_region_viz), pages = 1) # Random effects ok
check(Rich_hab_region_viz) # Residuals fine

# Plot model predictions (Fig. S6c)
plot_smooth(Hab_region_rich_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Species richness",
            ylim = c(0,270), ylab = "Species richness")

# Saved as 5 x 5 inch pdf


summary(Hab_region_rich_GAMM_wiggly) # Both smoothers sig
#Approximate significance of smooth terms:
#                         edf Ref.df      F  p-value    
# s(Year):N_SNorth  5.705  6.705  3.202  0.00270 ** 
# s(Year):N_SSouth  2.380  2.916  8.777 2.38e-05 ***


# Get the AICc
AICc(Hab_region_rich_GAMM_wiggly) # 5349.431

# Get significance of interaction by comparing full to reduced model
Reduced_Hab_region_rich_GAMM_wiggly <- gam(Est_sp_richness ~ 
                                             s(Year) +
                                             N_S + 
                                             s(fYear, bs = "re") + 
                                             s(Year, fSITE, bs = "re"),
                                           data = Diversity_stats_hab_region) 

anova(Hab_region_rich_GAMM_wiggly, Reduced_Hab_region_rich_GAMM_wiggly, test = "Chisq")
#    Resid. Df Resid. Dev      Df Deviance Pr(>Chi)  
# 1    491.82     329107                          
# 2    497.26     335338 -5.4415  -6230.8   0.1116

# No sig interaction between region and year for broadleaf woodlands

# # # 

# ~~~ Linear GAMM richness  (broadleaf woodland only) ####

# Run a linear GAMM
Hab_region_rich_GAMM_linear <- gam(Est_sp_richness ~ Year*N_S + 
                                     s(fYear, bs = "re") +
                                     s(Year, fSITE, bs = "re"),
                                   data = Diversity_stats_hab_region)

# Do the checks and plot
Rich_hab_region_linear_viz = getViz(Hab_region_rich_GAMM_linear, nsim = 0)
print(plot(Rich_hab_region_linear_viz), pages = 1) # Random effects a little off
check(Rich_hab_region_linear_viz) # Residuals good enough

anova(Hab_region_rich_GAMM_linear) # Analagous to drop1
#Parametric Terms:
#          df     F p-value
#Year      1 5.503  0.0194
#N_S       1 1.084  0.2983
#Year:N_S  1 0.865  0.3528
# There is no sig Year*Region interaction for woodlands

# Get AICc
AICc(Hab_region_rich_GAMM_linear) # 5358.454


# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Hab_region_rich_GAMM_linear, pairwise ~ N_S, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016),
                    N_S = c(levels(Diversity_stats_hab_region$N_S)),
                    fSITE = "S_46", fYear = "Y_1993") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Hab_region_rich_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(N_S, Perc_change) 
Just_percs
#   N_S   Perc_change
# 1 North       -13.1
# 2 South       -14.4

# Merge on to Emtrends_df_2
Richness_hab_region_df <- merge(Emtrends_df_2, Just_percs)

Richness_hab_region_df$Measure <- "Richness"

# Plot
ggplot(data = Richness_hab_region_df, aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  xlab("Region") +
  ggtitle("Richness")
# This plot will be added to other plots and saved at the end of this script

# # # 


# ~~~ Wiggly GAMM Shannon diversity  (broadleaf woodland only) ####

# Make N_S a factor
Diversity_stats_hab_region$N_S <- as.factor(Diversity_stats_hab_region$N_S)

# Run a GAMM with a non-linear year effect and region interaction
Hab_region_Shan_GAMM_wiggly <- gam(Est_Shannon ~ 
                                     s(Year, by = N_S) +
                                     N_S + 
                                     s(fYear, bs = "re") + 
                                     s(Year, fSITE, bs = "re"),
                                   data = Diversity_stats_hab_region) # Quick to run now for some reason

# How does it look?
Shan_hab_region_viz = getViz(Hab_region_Shan_GAMM_wiggly, nsim = 0)
print(plot(Shan_hab_region_viz), pages = 1) # Random effects ok. Smoother shapes wigglier than expected
check(Shan_hab_region_viz)

# Plot (Fig. S6d)
plot_smooth(Hab_region_Shan_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Shannon diversity",
            ylim = c(0,100), ylab = "Diversity (effective common species)")
# Saved as 5 x 5 inch pdf


summary(Hab_region_Shan_GAMM_wiggly) 
#Approximate significance of smooth terms:
#                     edf Ref.df      F p-value    
#s(Year):N_SNorth  1.832  2.232  0.766   0.5294    
#s(Year):N_SSouth  8.387  8.803  5.311 6.45e-07 ***

# Get AICc
AICc(Hab_region_Shan_GAMM_wiggly) #  4465.368

# Find significance of interaction effect by comparing full and reuduced model
Reduced_Hab_region_Shan_GAMM_wiggly <- gam(Est_Shannon ~ 
                                             s(Year) +
                                             N_S + 
                                             s(fYear, bs = "re") + 
                                             s(Year, fSITE, bs = "re"),
                                           data = Diversity_stats_hab_region)


anova(Hab_region_Shan_GAMM_wiggly, Reduced_Hab_region_Shan_GAMM_wiggly, test = "Chisq")
#       Resid. Df Resid. Dev      Df Deviance  Pr(>Chi)    
#1    489.06      68440                               
#2    494.49      73040 -5.4302  -4600.2 4.137e-06 ***


# # # 

# ~~~ Linear GAMM shannon diversity (broadleaf woodland only) ####

# Run a linear GAMM
Hab_region_Shan_GAMM_linear <- gam(Est_Shannon ~ Year*N_S + 
                                     s(fYear, bs = "re") +
                                     s(Year, fSITE, bs = "re"),
                                   data = Diversity_stats_hab_region)

# Do the checks and plot
Shan_hab_region_linear_viz = getViz(Hab_region_Shan_GAMM_linear, nsim = 0)
print(plot(Shan_hab_region_linear_viz), pages = 1) # Random effects just about ok
check(Shan_hab_region_linear_viz) # Residuals good enough

anova(Hab_region_Shan_GAMM_linear) # Analagous to drop1
#Parametric Terms:
#          df     F p-value
#Year      1 0.200  0.6547
#N_S       1 5.714  0.0172
#Year:N_S  1 5.362  0.0210

# There is a sig Year*Region interaction in woodland

# Get AICc
AICc(Hab_region_Shan_GAMM_linear) # 4482.204

# How does linear compare to non-linear?
AICc(Hab_region_Shan_GAMM_wiggly) - AICc(Hab_region_Shan_GAMM_linear) # -16.83565
# Non-linear is better
  

# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Hab_region_Shan_GAMM_linear, pairwise ~ N_S, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016),
                    N_S = c(levels(Diversity_stats_hab_region$N_S)),
                    fSITE = "S_46", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Hab_region_Shan_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(N_S, Perc_change) 
Just_percs
# N_S   Perc_change
# 1 North       -3.73
# 2 South      -19.0 

# Merge on to Emtrends_df_2
Shannon_hab_region_df <- merge(Emtrends_df_2, Just_percs)

Shannon_hab_region_df$Measure <- "Shannon"

# Plot
ggplot(data = Shannon_hab_region_df, aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  xlab("Region") +
  ggtitle("Shannon diversity")
# This plot will be added to other plots and saved at the end of this script


# # # 

# ~~~~~ ####

# 2. Aggregate abundance change ####

# 2. Aggregate abundance change across:
#         a) All sites
#         b) The north and south
#         c) The 7 habitat categories
#         d) in broadleaf woodland/improved grassland in the north/south

# In addition to the poptends and GAMMs, I will also model with linear model above for direct
# comparison

# ~ Data prep ####

# The dfs I need are
names(Annual_aggregate_totals)
names(Site_info)
names(Site_year_completeness)

# Prepare to merge these together
Site_info_2 <- Site_info
Site_info_2$Site_name <- NULL

Site_year_completeness_2 <- Site_year_completeness
Site_year_completeness_2$Site_name_db <- NULL
Site_year_completeness_2$Site_name_db_nogaps <- NULL

# Some renaming
Annual_aggregate_totals <-
  Annual_aggregate_totals %>%
  dplyr::rename(Year = CalYear, SITE = RIS.TrapCode)

# Merge
Annual_aggregate_totals_2 <- merge(Annual_aggregate_totals, Site_info_2, by = c("SITE"))
Annual_aggregate_totals_3 <- merge(Annual_aggregate_totals_2, Site_year_completeness_2, by = c("SITE", "Year"))

# Check 
names(Annual_aggregate_totals_3)

# Now cut down data to desirable sites and site-years
# Get rid of anything outside the 1968 - 2016 range and also the Ireland and Channel Isles sites
# and keep only "Complete" site-years
Annual_aggregate_totals_4 <- 
  Annual_aggregate_totals_3 %>%
  dplyr::filter(Year >= 1968 & Year <= 2016) %>%
  dplyr::filter(Region != "Channel Islands") %>%
  dplyr::filter(Region != "Ireland") %>%
  dplyr::filter(Complete_type_Paul == "Complete") 

# Make a new variables for Actual_years
Actual_years_ran <-
  Annual_aggregate_totals_4 %>%
  dplyr::group_by(SITE) %>%
  dplyr::summarise(Actual_years = length(unique(Year)))

# Merge it on
Annual_aggregate_totals_5 <- merge(Annual_aggregate_totals_4, Actual_years_ran, by = "SITE")

# And cut it down to sites with at least three Actual_years running 
Annual_aggregate_totals_6 <- 
  Annual_aggregate_totals_5 %>%
  dplyr::filter(Actual_years >= 3)

# Make Annual_count an interger too
Annual_aggregate_totals_6$Annual_count <- as.integer(round(Annual_aggregate_totals_6$Annual_count))

# Make an fSITE number so the spaces in the site names doesn't mess things up
Annual_aggregate_totals_6$fSITE <- as.factor(paste0("S_",Annual_aggregate_totals_6$SITE))

# Make a year factor for a random effect
Annual_aggregate_totals_6$fYear <- as.factor(paste0("Y_", Annual_aggregate_totals_6$Year))

# Get some summary statistics.

# How many moths in all?
sum(Annual_aggregate_totals_6$Annual_count) # 8,829,484 moths


# # #

# 2a) Modelling - aggregate abundance - All sites ####

# How many sites and site-years?
Annual_aggregate_totals_6 %>% #   group_by(Habitat_500m) %>%
  dplyr::summarise(Number = length(unique(fSITE))) # 266 sites

# How many sites and site years?
Annual_aggregate_totals_6 %>% #   group_by(Habitat_500m) %>%
  dplyr::mutate(Site_year = paste0(fSITE,"_",fYear)) %>%
  dplyr::summarise(Number = length(unique(Site_year))) # 3055 site-years

3055/266 # mean of 11.5 years per site

# ~~~ Wiggly GAMM abundance ####


# Run a GAMM with a non-linear year effect
Overall_ab_GAMM_wiggly <- gam(Annual_count ~ s(Year) + 
                                  s(fYear, bs = "re") +
                                  s(Year, fSITE, bs = "re"),
                                family = nb,
                                data = Annual_aggregate_totals_6) # Takes a while because of the nb

# Model checks
Ab_viz = getViz(Overall_ab_GAMM_wiggly, nsim = 0)
print(plot(Ab_viz), pages = 1) # One of the site random effects a little off. Clear decline
check(Ab_viz) # QQ plot not great

# Plot model predictions (Fig. S2a)
plot_smooth(Overall_ab_GAMM_wiggly, view = "Year", main = "Total abundance", transform = "exp", ylim = c(0,3000), ylab = "Total abundance") # Quite linear
# Saved as 5 x 5 inch pdf


newX <- expand.grid(Year = seq(from = min(Annual_aggregate_totals_6$Year), to = max(Annual_aggregate_totals_6$Year),
                               length = 49), fSITE = "S_22", fYear = "Y_1968") # (Selecting 
# random effects at random. I have tested that changing random effects has no effect on predictions)
newY <- predict(Overall_ab_GAMM_wiggly, newdata = newX, se.fit = TRUE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)
addXY_2 <-
  addXY %>%
  mutate(fit = exp(fit),
         lwr = exp(fit - 1.96*se.fit), 
         upr = exp(fit + 1.96*se.fit))
  
Abundance_perc_change <- (addXY_2$fit[addXY_2$Year == 2016] - addXY_2$fit[addXY_2$Year == 1968])/
  addXY_2$fit[addXY_2$Year == 1968]*100
Abundance_perc_change
# -33.91189% 

anova(Overall_ab_GAMM_wiggly)
#                 edf  Ref.df   Chi.sq  p-value
#s(Year)         1.010   1.011    19.67 1.01e-05

AICc(Overall_ab_GAMM_wiggly) # 48543.94


# # # 

#


# # #

# ~~~ Linear GAMM abundance ####

# Run a linear GAMM
Overall_ab_GAMM_linear <- gam(Annual_count ~ Year + 
                                  s(fYear, bs = "re") +
                                  s(Year, fSITE, bs = "re"),
                                family = nb,
                                data = Annual_aggregate_totals_6)

# Do the checks and plot
Ab_linear_viz = getViz(Overall_ab_GAMM_linear, nsim = 0)
print(plot(Ab_linear_viz), pages = 1) # A few outlying sites
check(Ab_linear_viz) # QQ plot dodgy

summary(Overall_ab_GAMM_linear)
#Parametric coefficients:
#               Estimate Std. Error z value Pr(>|z|)    
#  (Intercept) 24.783324   3.879914   6.388 1.69e-10 ***
#  Year        -0.008629   0.001948  -4.430 9.43e-06 ***

# Year has sig pos effect
anova(Overall_ab_GAMM_linear)
# Parametric Terms:
#      df Chi.sq  p-value
# Year  1  19.62 9.43e-06

# AICcs
AICc(Overall_ab_GAMM_linear) # 48541.55


# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Overall_ab_GAMM_linear, pairwise ~ NULL, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Overall_ab_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

# Put back onto response scale
addXY$newY <- exp(addXY$newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Perc_change)
Just_percs # -33.9%

# Merge on to Emtrends_df_2
Ab_overall_df <- merge(Emtrends_df_2, Just_percs)

Ab_overall_df$Measure <- "Abundance"

ggplot(data = Ab_overall_df, aes(x = Measure, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Total abundance")
# This plot will be combined with other plots and saved at  the end of this script

# # #

# ~~~~~~ Effect of deer damage in woodland on abundance ####

# Run a linear GAMM
BL_woodland_ab_GAMM_linear <- gam(Annual_count ~ Year*Deer_damage_estimate + 
                                s(fYear, bs = "re") +
                                s(Year, fSITE, bs = "re"),
                              family = nb,
                              data = subset(Annual_aggregate_totals_6,  Habitat_500m == "Deciduous_woodland"))

anova(BL_woodland_ab_GAMM_linear)
#   Parametric Terms:
#                           df Chi.sq p-value
# Year                       1 10.455 0.00122
# Deer_damage_estimate       1  3.531 0.06022
# Year:Deer_damage_estimate  1  3.448 0.06335

# # #

#  ~~~~~**Site exclusion loop - jackknife sensitivity analysis ** ####

# This loop removes one site from the dataset at a time and reruns the modl without it. This
# is to see if there are any individual sites having overwhelming effects on the model

# Use the df Ab_overall_df as made above to store results. The 'all sites' result will
# be included as a control
Ab_overall_df_exclusions <- Ab_overall_df

Ab_overall_df_exclusions$Excluded_site <- 0


# Make a vector of sites 
Site_vec <- unique(Annual_aggregate_totals_6$SITE) # 266 sites

# Run the loop
for(i_SITE in 1:length(Site_vec)){
  
  print(paste0("Excluding site ", Site_vec[i_SITE]))
  
  # Run model with one site missing
  Overall_ab_GAMM_linear_excl <- gam(Annual_count ~ Year + 
                                  s(fYear, bs = "re") +
                                  s(Year, fSITE, bs = "re"),
                                family = nb,
                                data = subset(Annual_aggregate_totals_6, SITE != Site_vec[i_SITE]))
  
  # Get the estimated year coefficients with their 95% CIs and make them a df
  Emtrends_df <- emtrends(Overall_ab_GAMM_linear_excl, pairwise ~ NULL, var = "Year")
  Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)
  
  # Get perc change using predict for the first and last years
  newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") 
  # (Selecting random effects at random. I have tested that changing random effects has no 
  #       effect on predictions)
  
  # If S_22 is excluded then I cannot use this as a random effect. Use an 'if' to get round this
  if(Site_vec[i_SITE] != 22){
    newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") 
  }else{
    newX <- expand.grid(Year = c(1968,2016), fSITE = "S_1", fYear = "Y_1968") 
  }
  
  newY <- predict(Overall_ab_GAMM_linear_excl, newdata = newX, se.fit = FALSE, 
                  exclude = c("s(fYear)","s(Year,fSITE)"))
  addXY <- data.frame(newX, newY)
  
  # Put back onto response scale
  addXY$newY <- exp(addXY$newY)
  
  Perc_change <- 
    addXY %>%
    pivot_wider(names_from = Year, values_from = newY) %>%
    rename(First = "1968", Last = "2016") %>%
    mutate(Perc_change = ((Last-First)/First)*100)
  
  Just_percs <- 
    Perc_change %>%
    dplyr::select(Perc_change)
  print(Just_percs)
  
  # Merge on to Emtrends_df_2
  Ab_overall_df_2 <- merge(Emtrends_df_2, Just_percs)
  
  Ab_overall_df_2$Measure <- "Abundance"
  Ab_overall_df_2$Excluded_site <- Site_vec[i_SITE]
 
  
  # Bind this on to the existing one
  Ab_overall_df_exclusions <- dplyr::bind_rows(Ab_overall_df_exclusions, 
                                               Ab_overall_df_2)
  
  # Print progress to screen
  print(paste0("Total progress: ", round(i_SITE/length(Site_vec)*100), "%"))
  
  
  # Save csv (optional)
  ##write.csv(Ab_overall_df_exclusions, "Data/Jackknife_results/Ab_overall_df_exclusions.csv", row.names = FALSE)
  
  print(Sys.time())
  
  
}


# Read back in if need be
##Ab_overall_df_exclusions <- read.csv("Data/Jackknife_results/Ab_overall_df_exclusions.csv")

# Add on Actual_years
Ab_overall_df_exclusions$SITE <- Ab_overall_df_exclusions$Excluded_site
Runs <- Annual_aggregate_totals_6 %>% dplyr::select(SITE, Actual_years) %>% distinct()
Exclusions <- merge(Ab_overall_df_exclusions, Runs, by = "SITE", all.x = TRUE)

ggplot(Exclusions, aes(x = Actual_years, y = Perc_change)) +
  geom_point(alpha = 0.5, size = 2) + 
  xlab("Years running of excluded trap") +
  ylab("Percentage change (1968 - 2016)")
# Saved as 3 x 3 inch pdf (Fig S11a)

# # # 

# # #

# 2b) Modelling - aggregate abundance - The north and south ####

# How many sites and site-years?
Annual_aggregate_totals_6 %>%
  group_by(N_S) %>%
  dplyr::summarise(Number = length(unique(fSITE))) 
# N_S   Number
# North     89
# South    177

# How many sites and site years?
Annual_aggregate_totals_6 %>%
  group_by(N_S) %>%
  dplyr::mutate(Site_year = paste0(fSITE,"_",fYear)) %>%
  dplyr::summarise(Number = length(unique(Site_year)))
# N_S   Number
# North   1052
# South   2003

# Mean years per site were:
1052/89 # 11.8 north
2003/177 # 11.3 south

# # #

# ~~~ Wiggly GAMM abundance ####

# Make N_S a factor
Annual_aggregate_totals_6$N_S <- as.factor(Annual_aggregate_totals_6$N_S)

# Run a GAMM with a non-linear year effect and region interaction
Region_ab_GAMM_wiggly <- gam(Annual_count ~ 
                                 s(Year, by = N_S) + 
                                 N_S + 
                                 s(fYear, bs = "re") + 
                                 s(Year, fSITE, bs = "re"),
                                 family = nb,
                               data = Annual_aggregate_totals_6) # Takes a while to run...

# How does it look?
Ab_region_viz = getViz(Region_ab_GAMM_wiggly, nsim = 0)
print(plot(Ab_region_viz), pages = 1) # Random effects a little off. Clear decline in both.
check(Ab_region_viz) # Residuals good enough

# Plot model predictions (Fig. S3a)
plot_smooth(Region_ab_GAMM_wiggly, view = "Year",  plot_all = c("N_S"), main = "Total abundance", 
            ylab = "Total abundance", ylim = c(0,3500), transform = "exp")
# Saved as 5 x 5 inch pdf 

summary(Region_ab_GAMM_wiggly)
#             Approximate significance of smooth terms:
# edf  Ref.df    Chi.sq  p-value    
# s(Year):N_SNorth   1.133   1.193     5.164   0.0244 *  
# s(Year):N_SSouth   2.528   3.101    29.384 2.39e-06 ***

 # Both smoothers sig. North not strongly so

AICc(Region_ab_GAMM_wiggly) # 48538.17


# # # 

# ~~~~~~Poptrends for abundance and region ####

# To see if there are any notable periods of decline and increase, I will run a poptrend for each

North_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                      s(Year, fSITE, bs = "re"), 
                    family = nb, data = subset(Annual_aggregate_totals_6, N_S == "North"))

South_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                      s(Year, fSITE, bs = "re"), 
                    family = nb, data = subset(Annual_aggregate_totals_6, N_S == "South"))

change(North_pop, 1968, 2016) # -24% (-41%, -2.5%)
change(South_pop, 1968, 2016) # -38% (-49%, -23%)

plot(North_pop)
plot(South_pop)
# No non-linear goings on, so I won't plot these

# # #


# ~~~ Linear GAMM abundance ####

# Run a linear GAMM
Region_ab_GAMM_linear <- gam(Annual_count ~ Year*N_S + 
                                 s(fYear, bs = "re") +
                                 s(Year, fSITE, bs = "re"),
                                 family = nb,
                               data = Annual_aggregate_totals_6)

# Do the checks and plot
Ab_region_linear_viz = getViz(Region_ab_GAMM_linear, nsim = 0)
print(plot(Ab_region_linear_viz), pages = 1) # Random effects good
check(Ab_region_linear_viz)

# Is the N_S*Year interaction sig? anova(gam) is analogous to drop1(glm)
anova(Region_ab_GAMM_linear)
# Parametric Terms:
#          df Chi.sq p-value
# Year      1  7.010 0.00811
# N_S       1  5.993 0.01436
# Year:N_S  1  5.911 0.01505

# There is a sig Year*N_S interaction

AICc(Region_ab_GAMM_linear) # 48537.04


# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Region_ab_GAMM_linear, pairwise ~ N_S, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_aggregate_totals_6$N_S)), 
                    fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Region_ab_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

# Put back onto response scale
addXY$newY <- exp(addXY$newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(N_S, Perc_change) 
Just_percs
#N_S   Perc_change
# 1 North       -24.7
# 2 South       -37.5

# Merge on to Emtrends_df_2
Ab_region_df <- merge(Emtrends_df_2, Just_percs)

Ab_region_df$Measure <- "Abundance"

# Reorder north and south

Ab_region_df$N_S <- factor(Ab_region_df$N_S, levels = c("South", "North"))

ggplot(data = Ab_region_df, aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Abundance")
# This plot will be added to other plots and saved at the end of this script

# # # 


#  ~~~~~**Site exclusion loop - jackknife sensitivity analysis ** ####

# This loop removes one site from the dataset at a time and reruns the modl without it. This
# is to see if there are any individual sites having overwhelming effects on the model

# Use the df Ab_region_df as made above to store results. The 'no sites excluded' result will
# be included as a control
Ab_region_df_exclusions <- Ab_region_df

Ab_region_df_exclusions$Excluded_site <- 0

# Make a vector of sites 
Site_vec <- unique(Annual_aggregate_totals_6$SITE) # 266 sites

# Run the loop
for(i_SITE in 1:length(Site_vec)){
  
  print(paste0("Excluding site ", Site_vec[i_SITE]))
  
  # Run model with one site missing
  Region_ab_GAMM_linear_excl <- gam(Annual_count ~ Year*N_S + 
                                       s(fYear, bs = "re") +
                                       s(Year, fSITE, bs = "re"),
                                     family = nb,
                                     data = subset(Annual_aggregate_totals_6, SITE != Site_vec[i_SITE]))
  
  
  # Get the estimated year coefficients with their 95% CIs and make them a df
  Emtrends_df <- emtrends(Region_ab_GAMM_linear_excl, pairwise ~ N_S, var = "Year")
  Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)
  
  # Get perc change using predict for the first and last years
  
  # If S_22 is excluded then I cannot use this as a random effect. Use an 'if' to get round this
  if(Site_vec[i_SITE] != 22){
    newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_aggregate_totals_6$N_S)), 
                        fSITE = "S_22", fYear = "Y_1968") 
  }else{
    newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_aggregate_totals_6$N_S)), 
                        fSITE = "S_1", fYear = "Y_1968")
  }
  
  newY <- predict(Region_ab_GAMM_linear_excl, newdata = newX, se.fit = FALSE, 
                  exclude = c("s(fYear)","s(Year,fSITE)"))
  addXY <- data.frame(newX, newY)
  
  # Put back onto response scale
  addXY$newY <- exp(addXY$newY)
  
  Perc_change <- 
    addXY %>%
    pivot_wider(names_from = Year, values_from = newY) %>%
    rename(First = "1968", Last = "2016") %>%
    mutate(Perc_change = ((Last-First)/First)*100)
  
  Just_percs <- 
    Perc_change %>%
    dplyr::select(N_S, Perc_change)
  print(Just_percs)
  
  # Merge on to Emtrends_df_2
  Ab_region_df_2 <- merge(Emtrends_df_2, Just_percs)
  
  Ab_region_df_2$Measure <- "Abundance"
  Ab_region_df_2$Excluded_site <- Site_vec[i_SITE]
  

  # Bind this on to the existing one
  Ab_region_df_exclusions <- dplyr::bind_rows(Ab_region_df_exclusions, 
                                               Ab_region_df_2)
  
  # Save csv (optional)
  ##write.csv(Ab_region_df_exclusions, "Data/Jackknife_results/Ab_region_df_exclusions.csv", row.names = FALSE)
  
  # Print progress to screen
  print(paste0(round(i_SITE/length(Site_vec)*100, digits = 1), "% done"))
  

}

# Read back in if needs be
##Ab_region_df_exclusions <- read.csv("Data/Jackknife_results/Ab_region_df_exclusions.csv")

# Add on Actual_years
Ab_region_df_exclusions$SITE <- Ab_region_df_exclusions$Excluded_site
Runs <- Annual_aggregate_totals_6 %>% dplyr::select(SITE, Actual_years, Site_name_db) %>% distinct()
Exclusions_region <- merge(Ab_region_df_exclusions, Runs, by = "SITE", all.x = TRUE)

# Cut out all site-region combos where the site is not in the region being modelled
Site_and_region <- 
  Site_info_2 %>%
  dplyr::select(SITE, N_S)

Exclusions_region_2 <- merge(Site_and_region, Exclusions_region, by = c("SITE", "N_S"), all.x = TRUE)

# Get rid of NAs
Exclusions_region_2 <- na.omit(Exclusions_region_2)

# Plot (Fig. S11b)
ggplot(Exclusions_region_2, aes(x = Actual_years, y = Perc_change)) +
  geom_point(alpha = 0.5, size = 2) + 
  facet_wrap(~N_S) +
  ylab("Percentage change (1968 - 2016)") +
  xlab("Years running of excluded trap") +
  geom_hline(yintercept = 0, linetype = "dashed")
# Saved as 3 x 6 inch pdf 
  

# Plot effect of Actual_years with error bars
ggplot(data = Exclusions_region_2, aes(x = Actual_years, y = Year.trend)) +
  geom_point(position = position_dodge(0.3)) + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), position = position_dodge(0.3), width = 0.1) +
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Total abundance") +
  facet_wrap(~N_S)
# Trends in the south are very robust to the removal of single sites. 
# In the north, the decline could be as mild as -18% (non-sig) and as severe as -40%! 
# With no sites removed, the trend in the north was -25%, so that's a difference of 7% in one
# direction and 15% in the other. 


# 2c) Modelling - aggregate abundance - 7 habitats ####

# How many sites and site-years?
Annual_aggregate_totals_6 %>%
  group_by(Habitat_500m) %>%
  dplyr::summarise(Number = length(unique(fSITE))) 
#Habitat_500m         Number
#1 Arable                   60
#2 Conifer_plantation       11
#3 Deciduous_woodland       42
#4 Improved_grassland       76
#5 Lowland_semi_natural     12
#6 Upland                   12
#7 Urban                    53

# Site years
Annual_aggregate_totals_6 %>%
  group_by(Habitat_500m) %>%
  dplyr::mutate(Site_year = paste0(fSITE,"_",fYear)) %>%
  dplyr::summarise(Number = length(unique(Site_year)))
#   Habitat_500m         Number
#1 Arable                  609
#2 Conifer_plantation      138
#3 Deciduous_woodland      566
#4 Improved_grassland      868
#5 Lowland_semi_natural    158
#6 Upland                  155
#7 Urban                   561


# ~~~ Wiggly GAMM abundance ####

# Make Habitat_500m a factor
Annual_aggregate_totals_6$Habitat_500m <- as.factor(Annual_aggregate_totals_6$Habitat_500m)

# Run a GAMM with a non-linear year effect and region interaction
Habitat_ab_GAMM_wiggly <- gam(Annual_count ~ 
                               s(Year, by = Habitat_500m) + 
                                Habitat_500m + 
                               s(fYear, bs = "re") + 
                               s(Year, fSITE, bs = "re"),
                             family = nb,
                             data = Annual_aggregate_totals_6) # Takes a while to run...

# How does it look?
Ab_habitat_viz = getViz(Habitat_ab_GAMM_wiggly, nsim = 0)
print(plot(Ab_habitat_viz), pages = 1) # Only one random site effect off. Smooths as expected
check(Ab_habitat_viz) # QQ plot dodgy

anova(Habitat_ab_GAMM_wiggly)
#s(Year):Habitat_500mArable                 1.012   1.023     0.395 0.537307
#s(Year):Habitat_500mConifer_plantation     3.720   4.681    11.981 0.034529
#s(Year):Habitat_500mDeciduous_woodland     3.437   4.307    61.613  < 2e-16
#s(Year):Habitat_500mImproved_grassland     1.001   1.002     9.642 0.001913
#s(Year):Habitat_500mLowland_semi_natural   1.339   1.600     3.470 0.149948
#s(Year):Habitat_500mUpland                 2.576   3.212    20.830 0.000165
#s(Year):Habitat_500mUrban                  4.914   6.012    52.256  < 2e-16

AICc(Habitat_ab_GAMM_wiggly) # 48458.11

# Make a reduced model with no interaction effect to get significance of year*habitat interaction
Reduced_Habitat_ab_GAMM_wiggly <- gam(Annual_count ~ 
                                s(Year) + 
                                Habitat_500m + 
                                s(fYear, bs = "re") + 
                                s(Year, fSITE, bs = "re"),
                              family = nb,
                              data = Annual_aggregate_totals_6)

# What is the significance of dropping the Year*Habitat interaction?
anova(Habitat_ab_GAMM_wiggly, Reduced_Habitat_ab_GAMM_wiggly, test= "Chisq")
#     Resid. Df Resid. Dev      Df Deviance  Pr(>Chi)    
#1    2713.2      47720                               
#2    2740.5      47863 -27.313  -142.52 < 2.2e-16 ***

# # #

# ~~~~~~Plotting wiggly abundance (Habitat) for paper ####

# Make a new data frame to predict from. 
newX <- expand.grid(Year = seq(from = min(Annual_aggregate_totals_6$Year), to = max(Annual_aggregate_totals_6$Year)),
                    Habitat_500m = levels(as.factor(Annual_aggregate_totals_6$Habitat_500m)),
                    fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random)

# Get predicted values
newY <- predict(Habitat_ab_GAMM_wiggly, newdata = newX, se.fit = TRUE, 
                exclude = c("s(fYear)","s(Year,fSITE)")) # I have tested changing randoms makes no diff

# Add them together
addXY <- data.frame(newX, newY)

# Rename them and make the CIs
addXY_2 <-
  addXY %>%
  mutate(Abundance = exp(fit),
         lwr = exp(fit - 1.96*se.fit), 
         upr = exp(fit + 1.96*se.fit))

# Rename and reorder Habitat levels in prep

# Make it a character so it can be changed
addXY_2$Habitat_500m <- as.character(addXY_2$Habitat_500m)

# Correct names
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"
# Get currentlevels
levels(as.factor(addXY_2$Habitat_500m))
# Order them
addXY_2$Habitat_500m <- factor(addXY_2$Habitat_500m, 
                                     levels = c("Arable","Broadleaf woodland","Conifer plantation",
                                                "Improved grassland","'Other semi-natural'",
                                                "Upland","Urban"))

# Final plot (Fig. S4a)
ggplot(data = addXY_2, aes(x = Year, y = Abundance)) +
  geom_hline(yintercept =  mean(exp(newY$fit)), linetype = "dashed") +
  geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", colour = "black") +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Total annual abundance") +
  facet_wrap(~Habitat_500m, nrow = 2) +
  ylim(0,6700)
# Saved as 5 x 9 inch PDF


# # # 

# ~~~~~~Poptrends for abundance and habitat ####

# TO see if there are any notable periods of decline and increase, I will run a poptrend for each

Arable_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                      s(Year, fSITE, bs = "re"), 
                    family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Arable"))

Grass_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                       s(Year, fSITE, bs = "re"), 
                     family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Improved_grassland"))

Woods_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                       s(Year, fSITE, bs = "re"), 
                     family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Deciduous_woodland"))

Conifer_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                       s(Year, fSITE, bs = "re"), 
                     family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Conifer_plantation"))

Semi_nat_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                       s(Year, fSITE, bs = "re"), 
                     family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Lowland_semi_natural"))

Urban_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                       s(Year, fSITE, bs = "re"), 
                     family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Urban"))

Upland_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                       s(Year, fSITE, bs = "re"), 
                     family = nb, data = subset(Annual_aggregate_totals_6, Habitat_500m == "Upland"))

# Plot Fig. S5
plot(Arable_pop, main = "Arable", ylim = c(0.3, 1.8))
plot(Grass_pop, main = "Improved grassland", ylim = c(0.3, 1.8))
plot(Woods_pop, main = "Broadleaf woodland", ylim = c(0.3, 1.8))
plot(Conifer_pop, main = "Conifer plantation", ylim = c(0.3, 1.8))
plot(Semi_nat_pop, main = "'Other semi-natural", ylim = c(0.3, 1.8))
plot(Urban_pop, main = "Urban", ylim = c(0.3, 1.8))
plot(Upland_pop, main = "Upland", ylim = c(0.3, 1.8))

# All saved as 5 x 5 inch pdfs


# # # 


# ~~~ Linear GAMM abundance ####

# Run a linear GAMM
Habitat_ab_GAMM_linear <- gam(Annual_count ~ Year*Habitat_500m + 
                               s(fYear, bs = "re") +
                               s(Year, fSITE, bs = "re"),
                             family = nb,
                             data = Annual_aggregate_totals_6)

# Do the checks and plot
Ab_habitat_linear_viz = getViz(Habitat_ab_GAMM_linear, nsim = 0)
print(plot(Ab_habitat_linear_viz), pages = 1) # Random effects good
check(Ab_habitat_linear_viz) # Residuals good enough

# anova(gam) is analogous to drop1(glm)
anova(Habitat_ab_GAMM_linear)
# Parametric Terms:
#                   df Chi.sq  p-value
#Year               1  0.443    0.506
#Habitat_500m       6 40.369 3.85e-07
#Year:Habitat_500m  6 40.381 3.83e-07

# There is a sig Year*Habitat interaction

AICc(Habitat_ab_GAMM_linear) # 48512.61

# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Habitat_ab_GAMM_linear, pairwise ~ Habitat_500m, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(as.factor(Annual_aggregate_totals_6$Habitat_500m))), 
                    fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Habitat_ab_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

# Put back onto response scale
addXY$newY <- exp(addXY$newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Habitat_500m, Perc_change) 
Just_percs
#  Habitat_500m         Perc_change
#1 Arable                     -7.44
#2 Conifer_plantation        -40.5 
#3 Deciduous_woodland        -51.2 
#4 Improved_grassland        -28.0 
#5 Lowland_semi_natural      -28.5 
#6 Upland                    -42.6 
#7 Urban                     -44.4 

# Merge on to Emtrends_df_2
Ab_habitat_df <- merge(Emtrends_df_2, Just_percs)

Ab_habitat_df$Measure <- "Abundance"

# Reorder and rename habitats
# Turn them to a character so they can be renamed
Ab_habitat_df$Habitat_500m <- as.character(Ab_habitat_df$Habitat_500m)
# Correct names
Ab_habitat_df$Habitat_500m[Ab_habitat_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
Ab_habitat_df$Habitat_500m[Ab_habitat_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
Ab_habitat_df$Habitat_500m[Ab_habitat_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
Ab_habitat_df$Habitat_500m[Ab_habitat_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"
# Get currentlevels
levels(as.factor(Ab_habitat_df$Habitat_500m))
# Order them
Ab_habitat_df$Habitat_500m <- factor(Ab_habitat_df$Habitat_500m, 
                                           levels = c("Urban", "Upland", "'Other semi-natural'",
                                                      "Improved grassland", "Conifer plantation",
                                                      "Broadleaf woodland", "Arable"))

# Plot
ggplot(data = Ab_habitat_df, aes(x = Habitat_500m, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Abundance")
 # This plot will be added to other plots later and saved at the end of the script



# # # 

#  ~~~~~**Site exclusion loop - jackknife sensitivity analysis** ####

# This loop removes one site from the dataset at a time and reruns the modl without it. This
# is to see if there are any individual sites having overwhelming effects on the model

# Use the df Ab_habitat_df as made above to store results. The 'no sites excluded' result will
# be included as a control
Ab_habitat_df_exclusions <- Ab_habitat_df

Ab_habitat_df_exclusions$Excluded_site <- 0

# Make a vector of sites 
Site_vec <- unique(Annual_aggregate_totals_6$SITE) # 266 sites

# Run the loop
for(i_SITE in 1:length(Site_vec)){
  
  print(paste0("Excluding site ", Site_vec[i_SITE]))
  
  # Run model with one site missing
  Habitat_ab_GAMM_linear_excl <- gam(Annual_count ~ Year*Habitat_500m + 
                                       s(fYear, bs = "re") +
                                       s(Year, fSITE, bs = "re"),
                                     family = nb,
                                     data = subset(Annual_aggregate_totals_6, SITE != Site_vec[i_SITE]))

  
  # Get the estimated year coefficients with their 95% CIs and make them a df
  Emtrends_df <- emtrends(Habitat_ab_GAMM_linear_excl, pairwise ~ Habitat_500m, var = "Year")
  Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)
  
  # Get perc change using predict for the first and last years
  
  # If S_22 is excluded then I cannot use this as a random effect. Use an 'if' to get round this
  if(Site_vec[i_SITE] != 22){
    newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(as.factor(Annual_aggregate_totals_6$Habitat_500m))), 
                        fSITE = "S_22", fYear = "Y_1968") 
  }else{
    newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(as.factor(Annual_aggregate_totals_6$Habitat_500m))), 
                        fSITE = "S_1", fYear = "Y_1968")
  }
  
  newY <- predict(Habitat_ab_GAMM_linear_excl, newdata = newX, se.fit = FALSE, 
                  exclude = c("s(fYear)","s(Year,fSITE)"))
  addXY <- data.frame(newX, newY)
  
  # Put back onto response scale
  addXY$newY <- exp(addXY$newY)
  
  Perc_change <- 
    addXY %>%
    pivot_wider(names_from = Year, values_from = newY) %>%
    rename(First = "1968", Last = "2016") %>%
    mutate(Perc_change = ((Last-First)/First)*100)
  
  Just_percs <- 
    Perc_change %>%
    dplyr::select(Habitat_500m, Perc_change)
  print(Just_percs)
  
  # Merge on to Emtrends_df_2
  Ab_habitat_df_2 <- merge(Emtrends_df_2, Just_percs)
  
  Ab_habitat_df_2$Measure <- "Abundance"
  Ab_habitat_df_2$Excluded_site <- Site_vec[i_SITE]
  
  # Reorder and rename habitats
  # Turn them to a character so they can be renamed
  Ab_habitat_df_2$Habitat_500m <- as.character(Ab_habitat_df_2$Habitat_500m)
  # Correct names
  Ab_habitat_df_2$Habitat_500m[Ab_habitat_df_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
  Ab_habitat_df_2$Habitat_500m[Ab_habitat_df_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
  Ab_habitat_df_2$Habitat_500m[Ab_habitat_df_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
  Ab_habitat_df_2$Habitat_500m[Ab_habitat_df_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"
  
  # Bind this on to the existing one
  Ab_habitat_df_exclusions <- dplyr::bind_rows(Ab_habitat_df_exclusions, 
                                               Ab_habitat_df_2)
  
  # Save csv (optional)
  ##write.csv(Ab_habitat_df_exclusions, "Data/Jackknife_results/Ab_habitat_df_exclusions.csv", row.names = FALSE)
  
  # Print prgress to screen
  print(paste0(round(i_SITE/length(Site_vec)*100, digits = 1), "% done"))
  

}


# Read back in if needs be
##Ab_habitat_df_exclusions <- read.csv("Data/Jackknife_results/Ab_habitat_df_exclusions.csv")

# Add on Actual_years
Ab_habitat_df_exclusions$SITE <- Ab_habitat_df_exclusions$Excluded_site
Runs <- Annual_aggregate_totals_6 %>% dplyr::select(SITE, Actual_years) %>% distinct()
Exclusions_habitat <- merge(Ab_habitat_df_exclusions, Runs, by = "SITE", all.x = TRUE)

# Cut out all site-region combos where the site is not in the region being modeled
Site_and_hab <- 
  Site_info_2 %>%
  dplyr::select(SITE, Habitat_500m)

# Rename habitats to match other data frame
Site_and_hab$Habitat_500m <- as.factor(Site_and_hab$Habitat_500m)
levels(as.factor(Site_and_hab$Habitat_500m))
levels(Site_and_hab$Habitat_500m) <- c("Arable", "Conifer plantation", "Broadleaf woodland", "Improved grassland", 
                                       "'Other semi-natural'", "Upland", "Urban")
levels(as.factor(Site_and_hab$Habitat_500m))

# Merge on to the new data frame
Exclusions_habitat_2 <- merge(Site_and_hab, Exclusions_habitat, by = c("SITE", "Habitat_500m"), all.x = TRUE)

# Get rid of NAs
Exclusions_habitat_2 <- na.omit(Exclusions_habitat_2)


# Plot (Fig. S11c)
ggplot(Exclusions_habitat_2, aes(x = Actual_years, y = Perc_change)) +
  geom_point(alpha = 0.5, size = 2) + 
  facet_wrap(~Habitat_500m) +
  ylab("Percentage change (1968 - 2016)") +
  xlab("Years running of excluded trap") +
  geom_hline(yintercept = 0, linetype = "dashed")
# Saved as 6 x 6 inch pdf

# Plot effect of Actual_years with error bars
ggplot(data = Exclusions_habitat, aes(x = Actual_years, y = Year.trend)) +
  geom_point(position = position_dodge(0.3)) + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), position = position_dodge(0.3), width = 0.1) +
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Total abundance") +
  facet_wrap(~Habitat_500m)
# Of the seven habitat types, only two (Broadleaf woodland and Urban) were truly robust to removal of 
# single sites. The trend in 'other semi-natural' habitat could be altered quite severely by
# the removal of single sites)


# # #


# 2d) Modelling - abundance - Habitat*region interaction ####

# Data prep

# Get only the woodland and grassland sites and make a new Hab_region var
Annual_aggregate_totals_hab_region <-
  Annual_aggregate_totals_6 %>%
  dplyr::filter(Habitat_500m == "Deciduous_woodland" | Habitat_500m == "Improved_grassland") %>%
  dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S))


#  ~~~~~**Site exclusion loop including improved grassland - jackknife sensitivity analysis  ** ####

# This loop removes one site from the dataset at a time and reruns the modl without it. This
# is to see if there are any individual sites having overwhelming effects on the model

# I will not include inproved grassland in the final model as the estimates were much to unreliable
# for this habitat in the north. This section is just to demonstrate why I left improved grassland out.

# Run a linear GAMM (full model with no excluded sites)
Hab_region_ab_GAMM_linear <- gam(Annual_count ~ Year*Habitat_500m*N_S + 
                                   s(fYear, bs = "re") +
                                   s(Year, fSITE, bs = "re"),
                                 family = nb,
                                 data = Annual_aggregate_totals_hab_region)

# Get model predictions for first and last year
newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(as.factor(Annual_aggregate_totals_hab_region$N_S))), 
                    Habitat_500m = c(levels(droplevels(as.factor(Annual_aggregate_totals_hab_region$Habitat_500m)))),
                    fSITE = "S_1", fYear = "Y_1968")

newY <- predict(Hab_region_ab_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))

addXY <- data.frame(newX, newY)

# Put back onto response scale
addXY$newY <- exp(addXY$newY)

# Get percentage change estimates
Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Habitat_500m, N_S, Perc_change)
Just_percs

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Hab_region_ab_GAMM_linear, pairwise ~ N_S|Habitat_500m, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Merge on to Emtrends_df_2
Ab_hab_region_df <- merge(Emtrends_df_2, Just_percs)

Ab_hab_region_df$Measure <- "Abundance"

# Recreate and rename hab_regions
Ab_hab_region_df$Hab_region[Ab_hab_region_df$Habitat_500m == "Deciduous_woodland" &
                              Ab_hab_region_df$N_S == "North"] <- "Broadleaf woodland (North)"
Ab_hab_region_df$Hab_region[Ab_hab_region_df$Habitat_500m == "Deciduous_woodland" &
                              Ab_hab_region_df$N_S == "South"] <- "Broadleaf woodland (South)"
Ab_hab_region_df$Hab_region[Ab_hab_region_df$Habitat_500m == "Improved_grassland" &
                              Ab_hab_region_df$N_S == "North"] <- "Improved grassland (North)"
Ab_hab_region_df$Hab_region[Ab_hab_region_df$Habitat_500m == "Improved_grassland" &
                              Ab_hab_region_df$N_S == "South"] <- "Improved grassland (South)"

# Use the df Ab_hab_region_df as made above to store results. The 'no sites excluded' result will
# be included as a control
Ab_hab_region_df_exclusions <- Ab_hab_region_df

Ab_hab_region_df_exclusions$Excluded_site <- 0


# Make a vector of sites 
Site_vec <- unique(Annual_aggregate_totals_hab_region$SITE) # 118 sites

# Run the loop
for(i_SITE in 1:length(Site_vec)){
  
  # Print progress to screen
  print(paste0("Excluding site ", Site_vec[i_SITE]))
  
  # Run model with one site excluded at a time
  Hab_region_ab_GAMM_linear_excl <- gam(Annual_count ~ Year*N_S*Habitat_500m + 
                                      s(fYear, bs = "re") +
                                      s(Year, fSITE, bs = "re"),
                                    family = nb,
                                    data = subset(Annual_aggregate_totals_hab_region, SITE != Site_vec[i_SITE]))
  
  
  # Get the estimated year coefficients with their 95% CIs and make them a df
  Emtrends_df <- emtrends(Hab_region_ab_GAMM_linear_excl, pairwise ~ N_S|Habitat_500m, var = "Year")
  Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)
  
  # Get perc change using predict for the first and last years
  
  # If S_34 is excluded then I cannot use this as a random effect. Use an 'if' to get round this
  if(Site_vec[i_SITE] != 34){
    newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(as.factor(Annual_aggregate_totals_hab_region$N_S))), 
                        Habitat_500m = c(levels(droplevels(as.factor(Annual_aggregate_totals_hab_region$Habitat_500m)))),
                        fSITE = "S_34", fYear = "Y_1968")
  }else{
    newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(as.factor(Annual_aggregate_totals_hab_region$N_S))), 
                        Habitat_500m = c(levels(droplevels(as.factor(Annual_aggregate_totals_hab_region$Habitat_500m)))),
                        fSITE = "S_1", fYear = "Y_1968")
  }
  
  newY <- predict(Hab_region_ab_GAMM_linear_excl, newdata = newX, se.fit = FALSE, 
                  exclude = c("s(fYear)","s(Year,fSITE)"))
  addXY <- data.frame(newX, newY)
  
  # Put back onto response scale
  addXY$newY <- exp(addXY$newY)
  
  Perc_change <- 
    addXY %>%
    pivot_wider(names_from = Year, values_from = newY) %>%
    rename(First = "1968", Last = "2016") %>%
    mutate(Perc_change = ((Last-First)/First)*100)
  
  Just_percs <- 
    Perc_change %>%
    dplyr::select(Habitat_500m, N_S, Perc_change)
  print(Just_percs)
  
  # Merge on to Emtrends_df_2
  Ab_hab_region_df_2 <- merge(Emtrends_df_2, Just_percs)
  
  Ab_hab_region_df_2$Measure <- "Abundance"
  Ab_hab_region_df_2$Excluded_site <- Site_vec[i_SITE]
  
  # Bind this on to the existing one
  Ab_hab_region_df_exclusions <- dplyr::bind_rows(Ab_hab_region_df_exclusions, 
                                              Ab_hab_region_df_2)
  
  # Recreate and rename hab_regions
  Ab_hab_region_df_exclusions$Hab_region[Ab_hab_region_df_exclusions$Habitat_500m == "Deciduous_woodland" &
                                           Ab_hab_region_df_exclusions$N_S == "North"] <- "Broadleaf woodland (North)"
  Ab_hab_region_df_exclusions$Hab_region[Ab_hab_region_df_exclusions$Habitat_500m == "Deciduous_woodland" &
                                           Ab_hab_region_df_exclusions$N_S == "South"] <- "Broadleaf woodland (South)"
  Ab_hab_region_df_exclusions$Hab_region[Ab_hab_region_df_exclusions$Habitat_500m == "Improved_grassland" &
                                           Ab_hab_region_df_exclusions$N_S == "North"] <- "Improved grassland (North)"
  Ab_hab_region_df_exclusions$Hab_region[Ab_hab_region_df_exclusions$Habitat_500m == "Improved_grassland" &
                                           Ab_hab_region_df_exclusions$N_S == "South"] <- "Improved grassland (South)"
  
  # Save csv (optional)
  ##write.csv(Ab_hab_region_df_exclusions, "Data/Jackknife_results/Ab_hab_region_df_exclusions.csv", row.names = FALSE)
  
  # Print progress to screen
  print(paste0(round(i_SITE/length(Site_vec)*100, digits = 1), "% done"))
  
}

# Read back in if need be
##Ab_hab_region_df_exclusions <- read.csv("Data/Jackknife_results/Ab_hab_region_df_exclusions.csv")

# Add on Actual_years
Ab_hab_region_df_exclusions$SITE <- Ab_hab_region_df_exclusions$Excluded_site
Runs <- 
  Annual_aggregate_totals_hab_region %>% 
  dplyr::select(SITE, Actual_years) %>% 
  distinct()
Exclusions_hab_region <- merge(Ab_hab_region_df_exclusions, Runs, by = "SITE", all.x = TRUE)

# Cut out all site-hab_region combos where the site is not in the region being modeled
Site_and_hab_region <- 
  Site_info_2 %>%
  dplyr::select(SITE, Habitat_500m, N_S) %>%
  dplyr::filter(Habitat_500m == "Deciduous_woodland" | Habitat_500m == "Improved_grassland") %>%
  dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S))

# Rename habitats to match other data frame
Site_and_hab_region$Hab_region <- as.factor(Site_and_hab_region$Hab_region)
levels(as.factor(Site_and_hab_region$Hab_region))
levels(Site_and_hab_region$Hab_region) <- c("Broadleaf woodland (North)", "Broadleaf woodland (South)", "Improved grassland (North)",
                                            "Improved grassland (South)")
levels(as.factor(Site_and_hab_region$Hab_region))

# Merge on to the new data frame
Exclusions_hab_region_2 <- merge(Site_and_hab_region, Exclusions_hab_region, by = c("SITE", "Hab_region"), all.x = TRUE)

# Get rid of NAs
Exclusions_hab_region_2 <- na.omit(Exclusions_hab_region_2)


# Plot (Fig. S11d)
ggplot(Exclusions_hab_region_2, aes(x = Actual_years, y = Perc_change)) +
  geom_point(alpha = 0.5, size = 2) + 
  facet_wrap(~Hab_region) +
  xlab("Years running of excluded trap") +
  ylab("Percentage change (1968 - 2016)")+
  geom_hline(yintercept = 0, linetype = "dashed")
# Saved as 5 x 5 inch pdf



# # # 

# ~~~ Wiggly GAMM abundance (broadleaf woodland only) ####

# Data prep

# Get only the broadleaf woodland sites and make a new Hab_region var
Annual_aggregate_totals_hab_region <-
  Annual_aggregate_totals_6 %>%
  dplyr::filter(Habitat_500m == "Deciduous_woodland") %>%
  dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S))

# How many sites and site-years?
Annual_aggregate_totals_hab_region %>%
  group_by(Hab_region) %>%
  dplyr::summarise(Number = length(unique(fSITE))) 
#   Hab_region               Number
#1 Deciduous_woodland_North     18
#2 Deciduous_woodland_South     24

# Site years
Annual_aggregate_totals_hab_region %>%
  group_by(Hab_region) %>%
  dplyr::mutate(Site_year = paste0(fSITE,"_",fYear)) %>%
  dplyr::summarise(Number = length(unique(Site_year)))
#   Hab_region               Number
#1 Deciduous_woodland_North    237
#2 Deciduous_woodland_South    329


# Make Region a factor
Annual_aggregate_totals_hab_region$N_S <- as.factor(Annual_aggregate_totals_hab_region$N_S)

# Run a GAMM with a non-linear year effect and region interaction
Hab_region_ab_GAMM_wiggly <- gam(Annual_count ~ 
                                   s(Year, by = N_S) +
                                   N_S + 
                                   s(fYear, bs = "re") + 
                                   s(Year, fSITE, bs = "re"),
                                 family = nb,
                                 data = Annual_aggregate_totals_hab_region) 

# How does it look?
Ab_hab_region_viz = getViz(Hab_region_ab_GAMM_wiggly, nsim = 0)
print(plot(Ab_hab_region_viz), pages = 1) # Random effects fine. Smoother shapes as expected
check(Ab_hab_region_viz) # # Residuals fine

# Plot model predictions (Fig. S6a)
plot_smooth(Hab_region_ab_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Abundance", 
            transform = "exp", ylim = c(0,6100), ylab = "Total abundance")
# Saved as 5 x 5 inch pdf

summary(Hab_region_ab_GAMM_wiggly) # All smoothers sig
#Approximate significance of smooth terms:
#  edf Ref.df  Chi.sq p-value    
#  s(Year):N_SNorth  3.875  4.706   38.78 1.25e-06 ***
#  s(Year):N_SSouth  1.849  2.093   57.37  < 2e-16 ***


# Make a reduced model
Reduced_Hab_region_ab_GAMM_wiggly <- gam(Annual_count ~ 
                                   s(Year) +
                                   N_S + 
                                   s(fYear, bs = "re") + 
                                   s(Year, fSITE, bs = "re"),
                                 family = nb,
                                 data = Annual_aggregate_totals_hab_region)

# Is the year*region interaction significant?
anova(Hab_region_ab_GAMM_wiggly, Reduced_Hab_region_ab_GAMM_wiggly, test = "Chisq")
#     Resid. Df Resid. Dev      Df Deviance  Pr(>Chi)    
#1    471.68     9053.1                               
#2    477.55     9089.0 -5.8651  -35.851 2.568e-06 ***

AICc(Hab_region_ab_GAMM_wiggly) # 9258.684

# # # 

# ~~~ Linear GAMM abundance (broadleaf woodland only) ####

# Run a linear GAMM
Hab_region_ab_GAMM_linear <- gam(Annual_count ~ Year*N_S + 
                                   s(fYear, bs = "re") +
                                   s(Year, fSITE, bs = "re"),
                                 family = nb,
                                 data = Annual_aggregate_totals_hab_region)

# Do the checks and plot
Ab_hab_region_linear_viz = getViz(Hab_region_ab_GAMM_linear, nsim = 0)
print(plot(Ab_hab_region_linear_viz), pages = 1) # Random effects a little off
check(Ab_hab_region_linear_viz) # Residuals fine

anova(Hab_region_ab_GAMM_linear) # Analagous to drop1
# Parametric Terms:
#          df Chi.sq p-value
#Year      1  9.833 0.00171
#N_S       1  9.881 0.00167
#Year:N_S  1  9.836 0.00171

# There is a sig Year*Region interaction in woodlands

AICc(Hab_region_ab_GAMM_linear) # 9273.784

# How do linear and non-linear compare?
AICc(Hab_region_ab_GAMM_wiggly) - AICc(Hab_region_ab_GAMM_linear)
# -15.09963. Non-linear is better

# Plotting

# First get rid of any levels clinging on
Annual_aggregate_totals_hab_region <- droplevels(Annual_aggregate_totals_hab_region)

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Hab_region_ab_GAMM_linear, pairwise ~ N_S, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016),
                    N_S = c(levels(Annual_aggregate_totals_hab_region$N_S)),
                    fSITE = "S_46", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Hab_region_ab_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

# Get it back on response scale
addXY$newY <- exp(addXY$newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(N_S, Perc_change) 
Just_percs
#N_S   Perc_change
# 1 North       -34.2
# 2 South       -56.0

# Merge on to Emtrends_df_2
Ab_hab_region_df <- merge(Emtrends_df_2, Just_percs)

Ab_hab_region_df$Measure <- "Abundance"

# Plot
ggplot(data = Ab_hab_region_df, aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  xlab("Region") +
  ggtitle("Abundance")
# This plot will be combined with other plots and saved at the end of this script

# # # 

# ~~~~~ poptrend for broadleaf woodland north and south ####

North_woods_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                      s(Year, fSITE, bs = "re"), 
                    family = nb, data = subset(Annual_aggregate_totals_hab_region, N_S == "North"))

South_woods_pop <- ptrend(Annual_count ~ trend(Year, tempRE = TRUE, type = "smooth", k = 16) + 
                      s(Year, fSITE, bs = "re"), 
                    family = nb, data = subset(Annual_aggregate_totals_hab_region, N_S == "South"))

# Plot (Fig. S7)
plot(North_woods_pop, main = "North \n Broadleaf woodland", ylim = c(0.3, 2.6))
plot(South_woods_pop, main = "South \n Broadleaf woodland", ylim = c(0.3, 2.6))
# Each saved as a 5 x 5 inch pdf


# # #

# ~~~ ####

# 3. Biomass change ####

# ~ Data prep ####

# The dfs I need are
names(Annual_species_totals) 
names(Site_info)
names(Site_year_completeness)

# First get a biomass for each site-year

# Cut out only the biomass from Biomass_df
Biomass_df <- 
  Biomass_df %>%
  dplyr::select(RIS_code, Biomass_Kinsella_direct)

# Rename Annual_species_totals in prep
Annual_species_totals_2 <- 
  Annual_species_totals %>%
  dplyr::rename(Year = CalYear, SITE = "RIS.TrapCode", RIS_code = "Code.RISgeneric")

# Merge it on
Annual_species_totals_3 <- merge(Annual_species_totals_2, Biomass_df, by = "RIS_code", all.x = TRUE)

# Create a Total_biomass by multiplying the biomass per species by the number of that species caught
Annual_species_totals_3$Total_biomass <- Annual_species_totals_3$Annual_count*Annual_species_totals_3$Biomass_Kinsella_direct

# Squash it down to one total per site-year
Annual_biomass <-
  Annual_species_totals_3 %>%
  dplyr::group_by(SITE, Year) %>%
  dplyr::summarise(Total_biomass = sum(Total_biomass, na.rm = TRUE), 
                   Annual_count = sum(Annual_count))

# Prepare to merge on site info
Site_year_completeness_2 <- Site_year_completeness
Site_year_completeness_2$Site_name_db <- NULL
Site_year_completeness_2$Site_name_db_nogaps <- NULL

# Merge
Annual_biomass_2 <- merge(Annual_biomass, Site_info, by = c("SITE"))
Annual_biomass_3 <- merge(Annual_biomass_2, Site_year_completeness_2, by = c("SITE", "Year"))

# Check 
names(Annual_biomass_3)

# Now cut down data to desirable sites and site-years
# First, get rid of anything outside the 1968 - 2016 range and also the Ireland and Channel Isles sites
# and 
Annual_biomass_4 <- 
  Annual_biomass_3 %>%
  dplyr::filter(Year >= 1968 & Year <= 2016) %>%
  dplyr::filter(Region != "Channel Islands") %>%
  dplyr::filter(Region != "Ireland") %>%
  dplyr::filter(Complete_type_Paul == "Complete") 

# Make a new log(biomass) variable
Annual_biomass_4$Log_biomass <- log(Annual_biomass_4$Total_biomass)

# Make a new variables for Actual_years
Actual_years_ran <-
  Annual_biomass_4 %>%
  dplyr::group_by(SITE) %>%
  dplyr::summarise(Actual_years = length(unique(Year)))

# Merge it on
Annual_biomass_5 <- merge(Annual_biomass_4, Actual_years_ran, by = "SITE")

# And cut it down to sites with at least three Actual_years running 
Annual_biomass_6 <- 
  Annual_biomass_5 %>%
  dplyr::filter(Actual_years >= 3)

# Make an fSITE number so the spaces in the site names doesn't mess things up
Annual_biomass_6$fSITE <- as.factor(paste0("S_",Annual_biomass_6$SITE))

# Make a year factor for a random effect
Annual_biomass_6$fYear <- as.factor(paste0("Y_", Annual_biomass_6$Year))

# # # 

# 3a) Modelling - Biomass - All sites ####

# ~~~ Wiggly GAMM biomass ####

# Run a GAMM with a non-linear year effect
Overall_biomass_GAMM_wiggly <- gam(Log_biomass ~ s(Year) + 
                                s(fYear, bs = "re") +
                                s(Year, fSITE, bs = "re"),
                              data = Annual_biomass_6) 

# Model checks
Biomass_viz = getViz(Overall_biomass_GAMM_wiggly, nsim = 0)
print(plot(Biomass_viz), pages = 1) # Some of the site random effects a little off. Clear decline
check(Biomass_viz) # Residuals good enough

# Plot model predictions (Fig. S2b)
plot_smooth(Overall_biomass_GAMM_wiggly, view = "Year", main = "Biomass", transform = "exp", 
            ylim = c(0,80000), ylab = "Total biomass (mg)") 
# Saved as 5 x 5 inch pdf
#
newX <- expand.grid(Year = seq(from = min(Annual_biomass_6$Year), to = max(Annual_biomass_6$Year),
                               length = 49), fSITE = "S_22", fYear = "Y_1968") # (Selecting 
# random effects at random. I have tested that changing random effects has no effect on predictions)
newY <- predict(Overall_biomass_GAMM_wiggly, newdata = newX, se.fit = TRUE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)
# Put back on response scale
addXY$fit <- exp(addXY$fit)

(addXY$fit[addXY$Year == 2016] - addXY$fit[addXY$Year == 1968])/
  addXY$fit[addXY$Year == 1968]*100
# -39.26477

anova(Overall_biomass_GAMM_wiggly)
#                 edf Ref.df     F p-value
#s(Year)         1.00   1.00 29.87  <2e-16


AICc(Overall_biomass_GAMM_wiggly) # 2335.065


# # #

# ~~~ Linear GAMM biomass ####

# Run a linear GAMM
Overall_biomass_GAMM_linear <- gam(Log_biomass ~ Year + 
                                s(fYear, bs = "re") +
                                s(Year, fSITE, bs = "re"),
                              data = Annual_biomass_6)

# Do the checks and plot
Biomass_linear_viz = getViz(Overall_biomass_GAMM_linear, nsim = 0)
print(plot(Biomass_linear_viz), pages = 1) # A few outlying sites
check(Biomass_linear_viz) # Residuals good enough

anova(Overall_biomass_GAMM_linear)
#Parametric Terms:
#       df     F  p-value
# Year  1 29.87 5.04e-08

AICc(Overall_biomass_GAMM_linear) # 2335.064

# How does linear and non-linear compare?
AICc(Overall_biomass_GAMM_wiggly) - AICc(Overall_biomass_GAMM_linear) # 0.0005776261
# Models are equivalent so I will stick with linear

#

# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Overall_biomass_GAMM_linear, pairwise ~ NULL, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Overall_biomass_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

# Put back onto response scale
addXY$newY <- exp(addXY$newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Perc_change)
Just_percs # -39.3%

# Merge on to Emtrends_df_2
Biomass_overall_df <- merge(Emtrends_df_2, Just_percs)

Biomass_overall_df$Measure <- "Biomass"

ggplot(data = Biomass_overall_df, aes(x = Measure, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Biomass")
# This plot will be combined with other plots and saved at the end if this script

###

# Redo this but with MacGregor et al. (2019)'s sites only ####

MacG_sites_data <- 
  Annual_biomass_6 %>%
  dplyr::filter(SITE == 1 | SITE == 22 | SITE == 46 | SITE == 34 | SITE == 39 |
                  SITE == 88 | SITE == 97 | SITE == 131 | SITE == 149 | SITE == 168 |
                  SITE == 212 | SITE == 187 | SITE == 293 | SITE == 261 | SITE == 277 |
                  SITE == 274 | SITE == 292 | SITE == 264 | SITE == 111 | SITE == 289 |
                  SITE == 350 | SITE == 336 | SITE == 346 | SITE == 331 | SITE == 368 |
                  SITE == 382 | SITE == 370 | SITE == 398 | SITE == 424 | SITE == 416 |
                  SITE == 446 | SITE == 451 | SITE == 467 | SITE == 477)

# Run a linear GAMM
Overall_biomass_GAMM_linear_MacG <- gam(Log_biomass ~ Year + 
                                     s(fYear, bs = "re") +
                                     s(Year, fSITE, bs = "re"),
                                   data = MacG_sites_data)

anova(Overall_biomass_GAMM_linear_MacG)
#Parametric Terms:
# df     F p-value
# Year  1 10.88   0.001

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Overall_biomass_GAMM_linear_MacG, pairwise ~ NULL, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016), fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Overall_biomass_GAMM_linear_MacG, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

# Put back onto response scale
addXY$newY <- exp(addXY$newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Perc_change)
Just_percs # -29.7% A bit less severe than when including all sites

# # #

# Deer damage and woodland - Section added for second draft Jan 2022 ####

# Run a linear GAMM
BL_woodland_biomass_GAMM_linear <- gam(Log_biomass ~ Year*Deer_damage_estimate + 
                                     s(fYear, bs = "re") +
                                     s(Year, fSITE, bs = "re"),
                                   data = subset(Annual_biomass_6, Habitat_500m == "Deciduous_woodland"))

anova(BL_woodland_biomass_GAMM_linear)
# Parametric Terms:
#                           df      F  p-value
# Year                       1 11.401 0.000804
# Deer_damage_estimate       1  3.803 0.051830
# Year:Deer_damage_estimate  1  3.748 0.053544

# No significant year interaction

# # #

# 3b) Modelling - biomass - The north and south ####

# ~~~ Wiggly GAMM biomass ####

# Make N_S a factor
Annual_biomass_6$N_S <- as.factor(Annual_biomass_6$N_S)

# Run a GAMM with a non-linear year effect and region interaction
Region_biomass_GAMM_wiggly <- gam(Log_biomass ~ 
                               s(Year, by = N_S) + 
                               N_S + 
                               s(fYear, bs = "re") + 
                               s(Year, fSITE, bs = "re"),
                             data = Annual_biomass_6) 

# How does it look?
Biomass_region_viz = getViz(Region_biomass_GAMM_wiggly, nsim = 0)
print(plot(Biomass_region_viz), pages = 1) # Some random effects a little off. 
check(Biomass_region_viz) ## Resiudals good enough

# Plot model predictions (Fig. S3b)
plot_smooth(Region_biomass_GAMM_wiggly, view = "Year",  plot_all = c("N_S"),
            main = "Biomass", ylab = "Biomass (mg)", transform = "exp", ylim = c(0, 90000))
# Saved as 5 x 5 inch pdf


summary(Region_biomass_GAMM_wiggly) # Only south sig (v. strongly so)
# Approximate significance of smooth terms:
#   edf  Ref.df      F  p-value    
#s(Year):N_SNorth   2.276   2.772  1.957   0.152    
#s(Year):N_SSouth   1.858   2.120 20.415  <2e-16 ***

# Get the significance of the interaction
Reduced_Region_biomass_GAMM_wiggly <- gam(Log_biomass ~ 
                                            s(Year) + 
                                            N_S + 
                                            s(fYear, bs = "re") + 
                                            s(Year, fSITE, bs = "re"),
                                          data = Annual_biomass_6) 

anova(Region_biomass_GAMM_wiggly, Reduced_Region_biomass_GAMM_wiggly, test = "Chisq")
#     Resid. Df Resid. Dev      Df Deviance  Pr(>Chi)    
# 1    2738.2     304.30                               
# 2    2741.6     307.99 -3.4397   -3.693 4.926e-07 ***


AICc(Region_biomass_GAMM_wiggly) # 2305.6


# # # 


# ~~~ Linear GAMM biomass ####

# Run a linear GAMM
Region_biomass_GAMM_linear <- gam(Log_biomass ~ Year*N_S + 
                               s(fYear, bs = "re") +
                               s(Year, fSITE, bs = "re"),
                             data = Annual_biomass_6)

# Do the checks and plot
Biomass_region_linear_viz = getViz(Region_biomass_GAMM_linear, nsim = 0)
print(plot(Biomass_region_linear_viz), pages = 1) # Random effects slightly off
check(Biomass_region_linear_viz) # Residuals good enough

# anova(gam) is analogous to drop1(glm)
anova(Region_biomass_GAMM_linear)
#Parametric Terms:
#         df      F  p-value
#Year      1  4.406   0.0359
#N_S       1 25.767 4.11e-07
#Year:N_S  1 25.465 4.80e-07

# There is a sig Year*N_S interaction

AICc(Region_biomass_GAMM_linear) # 18414.32

# How does linear compare to non-linear?
AICc(Region_biomass_GAMM_wiggly) - AICc(Region_biomass_GAMM_linear) # -0.794888
# Models are equivalent so I will stick with linear


# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Region_biomass_GAMM_linear, pairwise ~ N_S, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016), N_S = c(levels(Annual_biomass_6$N_S)), 
                    fSITE = "S_1", fYear = "Y_2016") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Region_biomass_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

# Put back onto response scale
addXY$newY <- exp(addXY$newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(N_S, Perc_change) 
Just_percs
# N_S   Perc_change
#1 North       -20.0
#2 South       -46.3


# Merge on to Emtrends_df_2
Biomass_region_df <- merge(Emtrends_df_2, Just_percs)

Biomass_region_df$Measure <- "Biomass"

# Reorder north and south

Biomass_region_df$N_S <- factor(Biomass_region_df$N_S, levels = c("South", "North"))

ggplot(data = Biomass_region_df, aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Biomass")
# This plot will be added to other plots and saved at the end of this script

# # # 

# 3c) Modelling - biomass - 7 habitats ####

# ~~~ Wiggly GAMM biomass ####

# Make Habitat_500m a factor
Annual_biomass_6$Habitat_500m <- as.factor(Annual_biomass_6$Habitat_500m)

# Run a GAMM with a non-linear year effect and region interaction
Habitat_biomass_GAMM_wiggly <- gam(Log_biomass ~ 
                                s(Year, by = Habitat_500m) + 
                                Habitat_500m + 
                                s(fYear, bs = "re") + 
                                s(Year, fSITE, bs = "re"),
                              data = Annual_biomass_6)

# How does it look?
Biomass_habitat_viz = getViz(Habitat_biomass_GAMM_wiggly, nsim = 0)
print(plot(Biomass_habitat_viz), pages = 1) # Some random effects a little off
check(Biomass_habitat_viz) # Residuals good enough

summary(Habitat_biomass_GAMM_wiggly) # All smooths apart from Upland and Semi-nat sig.
#                                             edf  Ref.df      F  p-value    
# s(Year):Habitat_500mArable                 7.224   8.217  2.175  0.02942 *  
# s(Year):Habitat_500mConifer_plantation     3.868   4.849  2.818  0.01842 *  
# s(Year):Habitat_500mDeciduous_woodland     2.507   3.149 16.825  < 2e-16 ***
# s(Year):Habitat_500mImproved_grassland     1.000   1.000 18.256 2.06e-05 ***
# s(Year):Habitat_500mLowland_semi_natural   1.369   1.643  5.481  0.01283 *  
# s(Year):Habitat_500mUpland                 2.777   3.457  4.092  0.00459 ** 
# s(Year):Habitat_500mUrban                  4.840   5.915  7.897  < 2e-16 ***

# Make a reduced model to test significance of interaction effect
Reduced_Habitat_biomass_GAMM_wiggly <- gam(Log_biomass ~ 
                                     s(Year) + 
                                     Habitat_500m + 
                                     s(fYear, bs = "re") + 
                                     s(Year, fSITE, bs = "re"),
                                   data = Annual_biomass_6) 

anova(Habitat_biomass_GAMM_wiggly, Reduced_Habitat_biomass_GAMM_wiggly, test = "Chisq")
#   Resid. Df Resid. Dev      Df Deviance  Pr(>Chi)    
#1    2714.8      296.8                               
#2    2741.8      308.2 -26.995  -11.405 3.882e-11 ***

AICc(Habitat_biomass_GAMM_wiggly) # 2273.865


# # #

# ~~~~~~Plotting wiggly biomass (Habitat) for paper ####

# Make a new data frame to predict from. 
newX <- expand.grid(Year = seq(from = min(Annual_biomass_6$Year), to = max(Annual_biomass_6$Year)),
                    Habitat_500m = levels(as.factor(Annual_biomass_6$Habitat_500m)),
                    fSITE = "S_22", fYear = "Y_1968") # (Selecting random effects at random)

# Get predicted values
newY <- predict(Habitat_biomass_GAMM_wiggly, newdata = newX, se.fit = TRUE, 
                exclude = c("s(fYear)","s(Year,fSITE)")) # I have tested changing randoms makes no diff

# Add them together
addXY <- data.frame(newX, newY)

# Rename them and make the CIs
addXY_2 <-
  addXY %>%
  mutate(Biomass = exp(fit),
         lwr = exp(fit - 1.96*se.fit), 
         upr = exp(fit + 1.96*se.fit))

# Rename and reorder Habitat levels in prep

# Make it a character so it can be changed
addXY_2$Habitat_500m <- as.character(addXY_2$Habitat_500m)

# Correct names
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
addXY_2$Habitat_500m[addXY_2$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"
# Get current levels
levels(as.factor(addXY_2$Habitat_500m))
# Order them
addXY_2$Habitat_500m <- factor(addXY_2$Habitat_500m, 
                               levels = c("Arable","Broadleaf woodland","Conifer plantation",
                                          "Improved grassland","'Other semi-natural'",
                                          "Upland","Urban"))

# Plot (Fig. S4b)
ggplot(data = addXY_2, aes(x = Year, y = Biomass)) +
  geom_hline(yintercept =  mean(exp(newY$fit)), linetype = "dashed") +
  geom_smooth(aes(ymin = lwr, ymax = upr), stat = "identity", colour = "black") +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Total annual biomass") +
  facet_wrap(~Habitat_500m, nrow = 2) +
  ylim(0,165000)
# Saved as 5 x 9 inch PDF


# # # 


# ~~~ Linear GAMM biomass ####

# Run a linear GAMM
Habitat_biomass_GAMM_linear <- gam(Log_biomass ~ Year*Habitat_500m + 
                                s(fYear, bs = "re") +
                                s(Year, fSITE, bs = "re"),
                              data = Annual_biomass_6)

# Do the checks and plot
Biomass_habitat_linear_viz = getViz(Habitat_biomass_GAMM_linear, nsim = 0)
print(plot(Biomass_habitat_linear_viz), pages = 1) # Random effects good 
check(Biomass_habitat_linear_viz) # Resiudals good enough

# anova(gam) is analogous to drop1(glm)
anova(Habitat_biomass_GAMM_linear)
#Parametric Terms:
#                  df      F  p-value
# Year               1 7.102 0.00775
# Habitat_500m       6 2.845 0.00917
# Year:Habitat_500m  6 2.825 0.00961

# There is a sig Year*Habitat interaction

AICc(Habitat_biomass_GAMM_linear) # 2329.442

# How does linear and non-linear compare?
AICc(Habitat_biomass_GAMM_wiggly) - AICc(Habitat_biomass_GAMM_linear) # -55.57666
# Non-linear is better

# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Habitat_biomass_GAMM_linear, pairwise ~ Habitat_500m, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016), Habitat_500m = c(levels(Annual_biomass_6$Habitat_500m)), 
                    fSITE = "S_22", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Habitat_biomass_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

# Put back onto response scale
addXY$newY <- exp(addXY$newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(Habitat_500m, Perc_change) 
Just_percs
#   Arable                     -26.5
# 2 Conifer_plantation         -47.4
# 3 Deciduous_woodland         -51.5
# 4 Improved_grassland         -36.7
# 5 Lowland_semi_natural       -43.4
# 6 Upland                     -27.3
# 7 Urban                      -45.9


# Merge on to Emtrends_df_2
Biomass_habitat_df <- merge(Emtrends_df_2, Just_percs)

Biomass_habitat_df$Measure <- "Biomass"

# Reorder and rename habitats
# Turn them to a character so they can be renamed
Biomass_habitat_df$Habitat_500m <- as.character(Biomass_habitat_df$Habitat_500m)
# Correct names
Biomass_habitat_df$Habitat_500m[Biomass_habitat_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
Biomass_habitat_df$Habitat_500m[Biomass_habitat_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
Biomass_habitat_df$Habitat_500m[Biomass_habitat_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
Biomass_habitat_df$Habitat_500m[Biomass_habitat_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"
# Get currentlevels
levels(as.factor(Biomass_habitat_df$Habitat_500m))
# Order them
Biomass_habitat_df$Habitat_500m <- factor(Biomass_habitat_df$Habitat_500m, 
                                     levels = c("Urban", "Upland", "'Other semi-natural'",
                                                "Improved grassland", "Conifer plantation",
                                                "Broadleaf woodland", "Arable"))

# Plot
ggplot(data = Biomass_habitat_df, aes(x = Habitat_500m, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  ggtitle("Biomass")
# This plot will be added to other plots and saved at the end of this script

# # # 


# 3d) Modelling - biomass - Habitat*region interaction ####

# Get only the woodland sites and make a new Hab_region var
Annual_biomass_hab_region <-
  Annual_biomass_6 %>%
  dplyr::filter(Habitat_500m == "Deciduous_woodland") %>%
  dplyr::mutate(Hab_region = paste0(Habitat_500m,"_",N_S))

# ~~~ Wiggly GAMM biomass (broadleaf woodland only) ####

# Make N_S a factor
Annual_biomass_hab_region$N_S <- as.factor(Annual_biomass_hab_region$N_S)

# Run a GAMM with a non-linear year region interaction
Hab_region_biomass_GAMM_wiggly <- gam(Log_biomass ~ 
                                        s(Year, by = N_S) +
                                        N_S + 
                                        s(fYear, bs = "re") + 
                                        s(Year, fSITE, bs = "re"),
                                      data = Annual_biomass_hab_region) 

# How does it look?
Biomass_hab_region_viz = getViz(Hab_region_biomass_GAMM_wiggly, nsim = 0)
print(plot(Biomass_hab_region_viz), pages = 1) # Random effects good
check(Biomass_hab_region_viz) # Residuals good

# Plot model predictions (Fig. S6b)
plot_smooth(Hab_region_biomass_GAMM_wiggly, view = "Year", plot_all = c("N_S"), main = "Biomass", 
            transform = "exp", ylim = c(0,150000), ylab = "Total biomass (mg)")
# Saved as 5 x 5 inch pdf


summary(Hab_region_biomass_GAMM_wiggly)
#                       edf Ref.df      F p-value    
# s(Year):N_SNorth  3.545  4.306  7.821 3.25e-06 ***
# s(Year):N_SSouth  1.982  2.279 34.008  < 2e-16 ***

# Make a reduced model
Reduced_Hab_region_biomass_GAMM_wiggly <- gam(Log_biomass ~ 
                                                s(Year) +
                                                N_S + 
                                                s(fYear, bs = "re") + 
                                                s(Year, fSITE, bs = "re"),
                                              data = Annual_biomass_hab_region) 

anova(Hab_region_biomass_GAMM_wiggly, Reduced_Hab_region_biomass_GAMM_wiggly, test = "Chisq")
#Resid. Df Resid. Dev      Df Deviance  Pr(>Chi)    
#1    474.89     30.348                               
#2    479.70     32.899 -4.8117  -2.5511 8.524e-08 ***

AICc(Hab_region_biomass_GAMM_wiggly) #  143.8704


# # # 

# ~~~ Linear GAMM biomass  (broadleaf woodland only) ####

# Run a linear GAMM
Hab_region_biomass_GAMM_linear <- gam(Log_biomass ~ Year*N_S + 
                                        s(fYear, bs = "re") +
                                        s(Year, fSITE, bs = "re"),
                                      data = Annual_biomass_hab_region)

# Do the checks and plot
Biomass_hab_region_linear_viz = getViz(Hab_region_biomass_GAMM_linear, nsim = 0)
print(plot(Biomass_hab_region_linear_viz), pages = 1) # Random effects fine
check(Biomass_hab_region_linear_viz) # Residuals fine


anova(Hab_region_biomass_GAMM_linear) # Analagous to drop1
# Parametric Terms:
#df      F  p-value
#Year      1  6.093   0.0139
#N_S       1 19.759 1.09e-05
#Year:N_S  1 19.751 1.09e-05

# There is a sig Year*Region interaction

AICc(Hab_region_biomass_GAMM_linear) # 160.2371

# How does linear and non-linear compare?
AICc(Hab_region_biomass_GAMM_wiggly) - AICc(Hab_region_biomass_GAMM_linear) #  -16.36673
# Non-linear is better


# Plotting

# Get the estimated year coefficients with their 95% CIs and make them a df
Emtrends_df <- emtrends(Hab_region_biomass_GAMM_linear, pairwise ~ N_S, var = "Year")
Emtrends_df_2 <- data.frame(Emtrends_df$emtrends)

# Get perc change using predict for the first and last years
newX <- expand.grid(Year = c(1968,2016), 
                    N_S = c(levels(Annual_biomass_hab_region$N_S)),
                    fSITE = "S_46", fYear = "Y_1968") 
# (Selecting random effects at random. I have tested that changing random effects has no 
#       effect on predictions)
newY <- predict(Hab_region_biomass_GAMM_linear, newdata = newX, se.fit = FALSE, 
                exclude = c("s(fYear)","s(Year,fSITE)"))
addXY <- data.frame(newX, newY)

# Get it back on response scale
addXY$newY <- exp(addXY$newY)

Perc_change <- 
  addXY %>%
  pivot_wider(names_from = Year, values_from = newY) %>%
  rename(First = "1968", Last = "2016") %>%
  mutate(Perc_change = ((Last-First)/First)*100)

Just_percs <- 
  Perc_change %>%
  dplyr::select(N_S, Perc_change) 
Just_percs
#    N_S   Perc_change
# 1 North       -29.3
# 2 South       -60.9

# Merge on to Emtrends_df_2
Biomass_hab_region_df <- merge(Emtrends_df_2, Just_percs)

Biomass_hab_region_df$Measure <- "Biomass"

# Plot
ggplot(data = Biomass_hab_region_df, aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  ylab("Year coefficient") +
  xlab("Region") +
  ggtitle("Biomass")
# This plot will be added to other plots and saved at the end of this script

# # # 


# ~~~ ####

# Making final combined plots ####

# Effect of Region on the four responses (linear) ####

# Need to make a plot that is four panels wide (responses) and three panels 
# high (region (including all-sites))

# This will show the year coefficients from the linear models with 95% CIs.

# I need the following dfs which are already made above
Richness_overall_df
Shannon_overall_df
Ab_overall_df
Biomass_overall_df

Richness_region_df
Shannon_region_df
Ab_region_df
Biomass_region_df

# First need to give a new var to the overall dfs to show they are both regions combined
Richness_overall_df$N_S <- "All sites"
Shannon_overall_df$N_S <- "All sites"
Ab_overall_df$N_S <- "All sites"
Biomass_overall_df$N_S <- "All sites"

# also get rid of the dud column
Richness_overall_df$X1 <- NULL
Shannon_overall_df$X1 <- NULL
Ab_overall_df$X1 <- NULL
Biomass_overall_df$X1 <- NULL

# bind em all up
Region_plot_df <- dplyr::bind_rows(Ab_overall_df, Biomass_overall_df, Richness_overall_df, Shannon_overall_df,
  Ab_region_df, Biomass_region_df, Richness_region_df, Shannon_region_df)

# Change the levels order so north comes out on top
Region_plot_df$N_S <- factor(Region_plot_df$N_S, levels = c("All sites", "South", "North"))

# Plot them. I can't get the scales to act nicely, so I will have to plot each one separately

#Abundance
a <- ggplot(data = subset(Region_plot_df, Measure == "Abundance"), aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) + # Leave this in/out depending on requirements
  ylab("Year coefficient") +
  ggtitle("Total abundance") +
  xlab(NULL) +
  ylim(-0.015, 0.015)

# Biomass
b <- ggplot(data = subset(Region_plot_df, Measure == "Biomass"), aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) +
  ylab("Year coefficient") +
  ggtitle("Biomass") +
  xlab(NULL) +
  ylim(-0.019, 0.019)

# Richness
r <- ggplot(data = subset(Region_plot_df, Measure == "Richness"), aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) +
  ylab("Year coefficient") +
  ggtitle("Species richness") +
  xlab(NULL) +
  ylim(-0.65,0.65)

# Shannon
d <- ggplot(data = subset(Region_plot_df, Measure == "Shannon"), aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) +
  ylab("Year coefficient") +
  ggtitle("Diversity") +
  xlab(NULL) +
  ylim(-0.4,0.4)

grid.arrange(a,b,r,d, nrow = 1)

# Saved as 4 x 8 inch pdf (Fig. 1)

# # # 

# Effect of Habitat on the four responses (linear) ####

# Need to make a plot that is four panels wide (responses) and eight panels 
# high (habitat (including all-sites))

# This will show the year coefficients from the linear models with 95% CIs.

# I need the following dfs which are already made above
Richness_overall_df
Shannon_overall_df
Ab_overall_df
Biomass_overall_df

Richness_habitat_df
Shannon_habitat_df
Ab_habitat_df
Biomass_habitat_df

# First need to give a new var to the overall dfs to show they are both regions combined
Richness_overall_df$Habitat_500m <- "All sites"
Shannon_overall_df$Habitat_500m <- "All sites"
Ab_overall_df$Habitat_500m <- "All sites"
Biomass_overall_df$Habitat_500m <- "All sites"

# also get rid of the dud column
Richness_overall_df$X1 <- NULL
Shannon_overall_df$X1 <- NULL
Ab_overall_df$X1 <- NULL
Biomass_overall_df$X1 <- NULL

# bind em all up
Habitat_plot_df <- dplyr::bind_rows(Ab_overall_df, Biomass_overall_df, Richness_overall_df, Shannon_overall_df,
                                    Ab_habitat_df, Biomass_habitat_df, Richness_habitat_df, Shannon_habitat_df)

# change names of habitat levels

# Make it a character so it can be changed
Habitat_plot_df$Habitat_500m <- as.character(Habitat_plot_df$Habitat_500m)

# Correct names
Habitat_plot_df$Habitat_500m[Habitat_plot_df$Habitat_500m == "Conifer_plantation"] <- "Conifer plantation"
Habitat_plot_df$Habitat_500m[Habitat_plot_df$Habitat_500m == "Deciduous_woodland"] <- "Broadleaf woodland"
Habitat_plot_df$Habitat_500m[Habitat_plot_df$Habitat_500m == "Improved_grassland"] <- "Improved grassland"
Habitat_plot_df$Habitat_500m[Habitat_plot_df$Habitat_500m == "Lowland_semi_natural"] <- "'Other semi-natural'"

# Change the levels 
Habitat_plot_df$Habitat_500m <- factor(Habitat_plot_df$Habitat_500m, 
                                       levels = c("All sites", "Urban", "Upland", "'Other semi-natural'", 
                                                  "Improved grassland", "Conifer plantation", "Broadleaf woodland",
                                                  "Arable"))

# Plot them. I can't get the scales to act nicely, so I will have to plot each one separately

#Abundance
a <- ggplot(data = subset(Habitat_plot_df, Measure == "Abundance"), aes(x = Habitat_500m, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) + # Leave this in/out depending on requirements
  ylab("Year coefficient") +
  ggtitle("Total abundance") +
  xlab(NULL) +
  ylim(-0.02, 0.02)

# Biomass
b <- ggplot(data = subset(Habitat_plot_df, Measure == "Biomass"), aes(x = Habitat_500m, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) +
  ylab("Year coefficient") +
  ggtitle("Biomass") +
  xlab(NULL) +
  ylim(-0.025, 0.025)

# Richness
r <- ggplot(data = subset(Habitat_plot_df, Measure == "Richness"), aes(x = Habitat_500m, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) +
  ylab("Year coefficient") +
  ggtitle("Species richness") +
  xlab(NULL) +
  ylim(-1.1, 1.1)

# Shannon
d <- ggplot(data = subset(Habitat_plot_df, Measure == "Shannon"), aes(x = Habitat_500m, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) +
  ylab("Year coefficient") +
  ggtitle("Diversity") +
  xlab(NULL) +
  ylim(-0.45,0.45)

grid.arrange(a,b,r,d, nrow = 1)

# Saved as 4 x 8 inch pdf as (Fig. 2)


# # # 

# Effect of Region (broadleaf woodland only) on the four responses (linear) ####

# Need to make a plot that is four panels wide (responses) and three panels 
# high (region (including all-sites))

# This will show the year coefficients from the linear models with 95% CIs.

# I need the following dfs which are already made above
Richness_habitat_df
Shannon_habitat_df
Ab_habitat_df
Biomass_habitat_df

Richness_hab_region_df
Shannon_hab_region_df
Ab_hab_region_df
Biomass_hab_region_df

# Cut out only the woodland sites from the habitat dfs
Richness_habitat_df_woods <- Richness_habitat_df %>% dplyr::filter(Habitat_500m == "Broadleaf woodland")
Shannon_habitat_df_woods <- Shannon_habitat_df %>% dplyr::filter(Habitat_500m == "Broadleaf woodland")
Ab_habitat_df_woods <- Ab_habitat_df %>% dplyr::filter(Habitat_500m == "Broadleaf woodland")
Biomass_habitat_df_woods <- Biomass_habitat_df %>% dplyr::filter(Habitat_500m == "Broadleaf woodland")


# First need to give a new var to the overall dfs to show they are both regions combined
Richness_habitat_df_woods$N_S <- "All woodland sites"
Shannon_habitat_df_woods$N_S <- "All woodland sites"
Ab_habitat_df_woods$N_S <- "All woodland sites"
Biomass_habitat_df_woods$N_S <- "All woodland sites"

# bind em all up
Hab_region_plot_df <- dplyr::bind_rows(Richness_habitat_df_woods, Biomass_habitat_df_woods,
                                       Ab_habitat_df_woods, Shannon_habitat_df_woods,
                                   Ab_hab_region_df, Biomass_hab_region_df, 
                                   Richness_hab_region_df, Shannon_hab_region_df)

# Change the levels order so north comes out on top
Hab_region_plot_df$N_S <- factor(Hab_region_plot_df$N_S, levels = c("All woodland sites", "South", "North"))

# Plot them. I can't get the scales to act nicely, so I will have to plot each one separately

#Abundance
a <- ggplot(data = subset(Hab_region_plot_df, Measure == "Abundance"), aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) + # Leave this in/out depending on requirements
  ylab("Year coefficient") +
  ggtitle("Total abundance") +
  xlab(NULL) +
  ylim(-0.028, 0.028)


# Biomass
b <- ggplot(data = subset(Hab_region_plot_df, Measure == "Biomass"), aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) +
  ylab("Year coefficient") +
  ggtitle("Biomass") +
  xlab(NULL) +
  ylim(-0.026, 0.026)

# Richness
r <- ggplot(data = subset(Hab_region_plot_df, Measure == "Richness"), aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) +
  ylab("Year coefficient") +
  ggtitle("Species richness") +
  xlab(NULL) +
  ylim(-1.3, 1.3)

# Shannon
d <- ggplot(data = subset(Hab_region_plot_df, Measure == "Shannon"), aes(x = N_S, y = Year.trend)) +
  geom_point() + 
  geom_errorbar(aes(ymax = upper.CL, ymin = lower.CL), width = 0.1) + #, size = 1
  geom_hline(yintercept = 0, linetype = 2) + 
  coord_flip() +
  theme_bw() +
  theme(panel.border = element_blank()) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.line = element_line(colour = "black")) +
  theme(axis.text.y = element_blank()) +
  ylab("Year coefficient") +
  ggtitle("Diversity") +
  xlab(NULL) +
  ylim(-0.6, 0.6)

grid.arrange(a,b,r,d, nrow = 1)
# Saved as 4 x 8 inch pdf as (Fig. 3)

# # #

# 

# End of script