# Define the required packages
required_packages <- c("ape", "readr","emmeans", "dplyr", "tidyverse", "cowplot", "car", "multcomp", "phytools")

# Install missing packages
for (pkg in required_packages) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg)
  }
}

# Load the packages
lapply(required_packages, library, character.only = TRUE)

#Set WD - to wherever data is downloaded
setwd("C:/Users/wi920208/OneDrive - University of Reading/Work/PhD/Range Filling MS") 

####Import .csv####
result_df <- read_csv("RangeFillingData.csv")

####Summary statistics####
min_species <- result_df[result_df$Percent == min(result_df$Percent), "Species"]
min_species
min(result_df$Percent)

max_species <- result_df[result_df$Percent == max(result_df$Percent), "Species"]
max_species
max(result_df$Percent)


####Load tree####
Tree<-read.tree("Tree.tre")
Tree$tip.label <- gsub("_", " ", Tree$tip.label)

####Reorder Factors####
result_df$Lecty<-factor(result_df$Lecty, levels = c("Polylectic", "Oligolectic","Clepto- and Social Parasite"))
result_df$Overwintering<-factor(result_df$Overwintering, levels = c("Prepupa", "Adult within cocoon","Adult within brood cell","Adult (female only)"))
result_df$Habitat.Breadth<-factor(result_df$Habitat.Breadth, levels = c("1", "2","3","4","5"))

####Set up data for phylolm model####
# Convert tibble to data frame
result_df <- as.data.frame(result_df) # Table S1
# Set row names to the Species column
rownames(result_df) <- result_df$Species

####Test relationship between traits and climate envelope size####
# Test for phylogenetic signal in Range Filling Ability
# Make sure the tree and data are matched
species_to_use <- intersect(Tree$tip.label, rownames(result_df))

# Subset the tree and data
Tree_subset <- drop.tip(Tree, setdiff(Tree$tip.label, species_to_use))
trait_vector <- log(result_df[species_to_use, "Suitable.Habitat.Area"])

# Run phylogenetic signal test (Pagel's lambda)
phylosig(Tree_subset, trait_vector, method="lambda", test=TRUE) # No evidence of phylogenetic signal

# Run models
fit.climate.envelope_lm <- lm(Suitable.Habitat.Area~log(Body.Size) + Overwintering + Lecty + Habitat.Breadth, 
                              data = result_df)

summary(fit.climate.envelope_lm) #Table S2
vif(fit.climate.envelope_lm)

####Test relationship between traits and range filling####
# Run model
fit_gamma <- glm(Percent ~ log(Body.Size) + Overwintering + Lecty + Habitat.Breadth, 
                 data = result_df, family = Gamma(link = "log"))
summary(fit_gamma)
vif(fit_gamma)


####Plot body size predictions####
newdata_bs <- expand.grid(
  Habitat.Breadth = levels(result_df$Habitat.Breadth),
  Overwintering = levels(result_df$Overwintering),
  Lecty = levels(result_df$Lecty),
  Body.Size = seq(min(result_df$Body.Size), max(result_df$Body.Size), by = 0.01))

# Make predictions with confidence intervals
body_size_predictions <- predict(fit_gamma, 
                                 newdata = newdata_bs, 
                                 type = "response",
                                 se.fit = TRUE)

body_size_predictionsPreds <- as.data.frame(body_size_predictions$fit)
body_size_predictionsSE <- as.data.frame(body_size_predictions$se.fit)
body_size_Plot <- cbind(body_size_predictionsPreds,body_size_predictionsSE, newdata_bs)
names(body_size_Plot)[1] <- "Pred"
names(body_size_Plot)[2] <- "se"

body_size_Plot$upr <- body_size_Plot$Pred + body_size_Plot$se
body_size_Plot$lwr <- body_size_Plot$Pred - body_size_Plot$se

body_size_Plot <- body_size_Plot %>% group_by(Body.Size) %>% 
  summarise(Pred = mean(Pred),
            upr = mean(upr),
            lwr = mean(lwr))

#Calculate predicted change from body size = 2.5mm to 5mm
body_size_Plot_2mm <- subset(body_size_Plot, Body.Size == 2.50)
body_size_Plot_5mm <- subset(body_size_Plot, Body.Size == 5.00)
body_size_2mm_5mm <- rbind(body_size_Plot_2mm, body_size_Plot_5mm)
body_size_2mm_5mm

####Plot lecty predictions####
newdata_lec <- expand.grid(
  Habitat.Breadth = levels(result_df$Habitat.Breadth),
  Overwintering = levels(result_df$Overwintering),
  Lecty = levels(result_df$Lecty),
  Body.Size = median(result_df$Body.Size))

# Make predictions with confidence intervals
lecty_predictions <- predict(fit_gamma, 
                             newdata = newdata_lec, 
                             type = "response",
                             se.fit = TRUE)

lecty_predictionsPreds <- as.data.frame(lecty_predictions$fit)
lecty_predictionsSE <- as.data.frame(lecty_predictions$se.fit)
lecty_predictionsSE <- na.omit(lecty_predictionsSE)
lecty_Plot <- cbind(lecty_predictionsPreds,body_size_predictionsSE, newdata_lec)
names(lecty_Plot)[1] <- "Pred"
names(lecty_Plot)[2] <- "se"

lecty_Plot$upr <- lecty_Plot$Pred + lecty_Plot$se
lecty_Plot$lwr <- lecty_Plot$Pred - lecty_Plot$se

lecty_Plot <- lecty_Plot %>% group_by(Lecty) %>% 
  summarise(Pred = mean(Pred),
            se = mean(se),
            upr = mean(upr),
            lwr = mean(lwr)
  )
lecty_Plot

# Pairwise Comparisons for Lecty
lecty_emm <- emmeans(fit_gamma, ~ Lecty, data = result_df)
lecty_pairs <- pairs(lecty_emm, adjust = "Tukey")
lecty_pairs

####Plot habitat breadth predictions####
newdata_hb <- expand.grid(
  Habitat.Breadth = levels(result_df$Habitat.Breadth),
  Overwintering = levels(result_df$Overwintering),
  Lecty = levels(result_df$Lecty),
  Body.Size = median(result_df$Body.Size))

# Make predictions with confidence intervals
hb_predictions <- predict(fit_gamma, 
                             newdata = newdata_hb, 
                             type = "response",
                             se.fit = TRUE)

hb_predictionsPreds <- as.data.frame(hb_predictions$fit)
hb_predictionsSE <- as.data.frame(hb_predictions$se.fit)
hb_predictionsSE <- na.omit(hb_predictionsSE)
hb_Plot <- cbind(hb_predictionsPreds,hb_predictionsSE, newdata_lec)
names(hb_Plot)[1] <- "Pred"
names(hb_Plot)[2] <- "se"

hb_Plot$upr <- hb_Plot$Pred + hb_Plot$se
hb_Plot$lwr <- hb_Plot$Pred - hb_Plot$se

hb_Plot <- hb_Plot %>% group_by(Habitat.Breadth) %>% 
  summarise(Pred = mean(Pred),
            se = mean(se),
            upr = mean(upr),
            lwr = mean(lwr)
  )
hb_Plot

# Pairwise Comparisons for Habitat Breadth
hb_emm <- emmeans(fit_gamma, ~ Habitat.Breadth, data = result_df)
hb_pairs <- pairs(hb_emm, adjust = "Tukey")
hb_pairs

####Plot Figures####
# Figure 1 - Low ignorance area

# Figure 2 - Range filling percentages by species
ggplot(result_df, aes(x = Percent, y = fct_reorder(Species, Percent))) +
  geom_point() +
  theme_bw(base_size = 15) +
  theme(axis.text.y = element_text(face = "italic")) +
  labs(x = "Percentage of climate envelope containing presence record",
       y = "Species")

# Figure 3
A <- ggplot()+
  geom_line(data = body_size_Plot, aes(x = Body.Size, y = Pred))+
  geom_line(data = body_size_Plot, aes(x = Body.Size, y = upr), linetype = "dashed")+
  geom_line(data = body_size_Plot, aes(x = Body.Size, y = lwr), linetype = "dashed")+
  geom_point(data = result_df, aes(x = Body.Size, y = Percent))+
  theme_bw(base_size = 15)+
  labs(x = "Body Size (mm)",
       y = "Percentage of Climate\n Envelope Filled")


B <- ggplot() +
  geom_pointrange(data = lecty_Plot, aes(x = Lecty, y = Pred, ymax = upr, ymin = lwr), linetype = "dashed") +
  geom_point(data = result_df, aes(x = Lecty, y = Percent, shape = Lecty), position = position_jitter(width = 0.4)) +
  theme_bw(base_size = 15) +
  theme(legend.position = "bottom") +
  labs(x = "Lecty",
       y = "Percentage of Climate\n Envelope Filled") +
  geom_segment(aes(x = 1, xend = 2, y = 135, yend = 135)) +
  geom_segment(aes(x = 1, xend = 1, y = 137, yend = 133)) +
  geom_segment(aes(x = 2, xend = 2, y = 137, yend = 133)) +
  annotate("text", x = 1.5, y = 140, label = "p = 0.015", size = 3)


C <- ggplot(hb_Plot) +
  geom_pointrange(aes(x = Habitat.Breadth, y = Pred, ymax = upr, ymin = lwr), linetype = "dashed") +
  geom_point(data = result_df, aes(x = Habitat.Breadth, y = Percent, shape = Habitat.Breadth), position = position_jitter(width = 0.4)) +
  theme_bw(base_size = 15) +
  theme(legend.position = "bottom") +
  labs(x = "Habitat Breadth",
       y = "Percentage of Climate\n Envelope Filled") +
  geom_segment(aes(x = 1, xend = 3, y = 135, yend = 135)) +
  geom_segment(aes(x = 1, xend = 1, y = 137, yend = 133)) +
  geom_segment(aes(x = 3, xend = 3, y = 137, yend = 133)) +
  annotate("text", x = 2, y = 140, label = "p = 0.046", size = 3)+
  scale_shape_discrete(name = "Habitat Breadth")

plot_grid(A, B, C, ncol = 1, labels = c("A", "B", "C"), label_x = 0.08, label_y = 0.95, hjust = 1, vjust = 1)         

# Figure S1
plot(Tree, cex = 0.6)