# Code used in Pinho et al 2021 ("Hibernation slows epigenetic aging in yellow-bellied marmots") #### Epigenetic Clock (10 fold) #### library(glmnet) load(file = "Marmot_probes_sesame_normalized149.Rdata") betas <- t(normalized_betas_sesame) sample_sheet <- read.csv("SampleSheet_Pinho_etal2021.csv") ages <-sample_sheet[,"Age",drop=FALSE] colnames(ages)<- c("Age") rownames(ages) <- rownames(betas) ages$ID <- sample_sheet$AnimalID # randomize order of samples order <-sample(1:149, 149, replace = FALSE) ages <- ages[order,] betas <- betas[order,] ages$Prediction <- NA Important_markers <- as.data.frame(matrix(ncol = 4, nrow = 0)) names(Important_markers) <- c("coeficient","CG","lambda") for (i in c(14*c(1:11))){ ifelse(i==154, betasLoop <-betas[-c(141:149),], betasLoop <-betas[-c((i-13):i),]) ifelse(i==154, agesLoop <- ages$Age[-c(141:149)], agesLoop <- ages$Age[-c((i-13):i)]) cvfit = cv.glmnet(betasLoop, agesLoop, nfolds = 10, alpha= .5) Final_coeficients <-as.matrix(coef(cvfit, s= "lambda.min")) Final_coeficients <-as.data.frame(Final_coeficients[which(Final_coeficients!=0),]) Final_coeficients$CG <- rownames(Final_coeficients) Final_coeficients$lambda <- cvfit$lambda.min Important_markers<- rbind(Important_markers, Final_coeficients) if (i<154){ ages$Prediction[c((i-13):i)] = predict(cvfit, newx = betas[c((i-13):i),,drop=FALSE], type = "response", s = "lambda.min") } if (i==154){ ages$Prediction[(141:149)] = predict(cvfit, newx = betas[c(141:149),,drop=FALSE], type = "response", s = "lambda.min") } } cor.test(ages$Age,ages$Prediction)# 0.98 # "Important_markers" used to make Table S1 # Final EC model (Table S2) cvfit = cv.glmnet(betas, ages$Age, nfolds = 10, alpha= .5) Final_coeficients <-as.matrix(coef(cvfit, s= "lambda.min")) Final_coeficients <-as.data.frame(Final_coeficients[which(Final_coeficients!=0),]) cvfit$lambda.min #### GAMM analysis #### rm(list=ls()) library(mgcv) ALL <- read.csv("SampleSheet_Pinho_etal2021.csv") #### Model ALL$newdate <- ifelse(ALL$Trap_jday<122, ALL$Trap_jday-121+365, ALL$Trap_jday-121) M<- gamm(Epigenetic_state ~ s(Age, k=18, bs = "cr")+ s(newdate, k=-1, bs = "cc") + ti(Age, newdate, bs=c("cr","cc")),random = list(AnimalID =~ 1), data=ALL, method = "REML") summary(M$gam) summary(M$lme) par(mfrow = c(1, 1)) coef(M$gam) plot(M$gam, select = c(1)) plot(M$gam, select = c(2), ylim=c(-1,1)) # GAMM Validation par(mfrow = c(2, 2)) gam.check(M$gam) par(mfrow = c(1, 1)) acf(resid(M$lme)) pacf(resid(M$lme)) E <- resid(M$lme, type = "normalized") F <- fitted(M$lme) plot(x = F, y = E, xlab = "Fitted values",ylab = "Residuals", cex = 0.3) plot(x = ALL$Age, y = E, xlab = "Age", ylab = "Residuals", cex = 0.3) plot(x = ALL$newdate, y = E, xlab = "Age", ylab = "Residuals", cex = 0.3) plot(x = ALL$Trap_jday, y = E, xlab = "Age", ylab = "Residuals", cex = 0.3) concurvity(M$gam,full = TRUE) concurvity(M$gam,full = FALSE) plot(ALL$Age,ALL$Trap_jday) #### simulations ## packages library(mgcv) library(tidyverse) library(patchwork) ## data ALL <- read.csv("SampleSheet_Pinho_etal2021.csv") ALL <- ALL %>% mutate( age_day = Age * 365, newdate = ifelse(Trap_jday < 122, Trap_jday - 121 + 365, Trap_jday - 121), yr_day = age_day - trunc(Age) * 365 ) ## function to simulate methylation accumulation as a function of age and season epi_st_fn <- function(age_yr, age_day, b_day, sum_meth, win_meth) { sum_len <- 150 # length of active season win_len <- 365 - sum_len # length of hibernation sum_days <- sum_len + 110 - b_day # number of active days left after birthday # to add curvature to accumulation rate modify following line age_ef <- rep(1, 13) ages <- seq_len(age_yr) age_meth <- sum( (sum_len * sum_meth + win_len * win_meth) * age_ef[seq_len(age_yr)]) day_meth <- ifelse( age_day <= sum_days, age_day * sum_meth * age_ef[age_yr + 1], ifelse(age_day > (win_len + sum_days), (win_len * win_meth + (age_day - win_len) * sum_meth) * age_ef[age_yr + 1], ((age_day - sum_days) * win_meth + sum_days * sum_meth) * age_ef[age_yr + 1] ) ) out <- age_meth + day_meth out } ## plotting simulated methylation rate m_rate <- 0.004 sim_plot <- expand.grid( aday = 1:365, age = 0:12, type = c("linear", "seasonal") ) sim_plot <- sim_plot %>% rowwise() %>% mutate( sim = ifelse( type == "linear", epi_st_fn(age, aday, 183, m_rate, m_rate)-4, epi_st_fn(age, aday, 183, m_rate * 365 / 150, 0)-4 ), age_day = age * 365 + aday ) ggplot(sim_plot, aes (x = age_day, y = sim)) + geom_line() + facet_wrap(~ type) + ylab("Simulated methylation") + scale_x_continuous("Age", breaks = 1 + (0:12) * 365, labels = 0:12) + theme_classic() ggsave("simulation_pattern.png") ## setting simulations nsim <- 1000 sim_out_d <- data.frame( s1p = NA, s2p = NA ) m_sim1_d <- m_sim2_d <- list() for (i in 1:nsim) { print(i) iddev <- rnorm(length(unique(ALL$AnimalID)), 0, 0.563) names(iddev) <- unique(ALL$AnimalID) sim_dat <- ALL %>% rowwise() %>% mutate( id_dev = iddev[match(AnimalID, names(iddev))], err_dev = rnorm(1, 0, 0.532), epi1 = epi_st_fn(trunc(Age), yr_day, pup_emerjdate, m_rate, m_rate), epi2 = epi_st_fn(trunc(Age), yr_day, pup_emerjdate, m_rate * 365 / 150, 0), sim1 = epi1 + id_dev + err_dev, sim2 = epi2 + id_dev + err_dev ) s1 <- gamm(sim1 ~ s(Age, k = 18, bs = "cr") + s(newdate, k = -1, bs = "cc"), random = list(AnimalID = ~1), data = sim_dat) s2 <- gamm(sim2 ~ s(Age, k = 18, bs = "cr") + s(newdate, k = -1, bs = "cc"), random = list(AnimalID = ~1), data = sim_dat) sim_out_d[i, "s1p"] <- summary(s1$gam)$s.table[2, 4] sim_out_d[i, "s2p"] <- summary(s2$gam)$s.table[2, 4] m_sim1_d[[i]] <- s1 m_sim2_d[[i]] <- s2 } # proportion of simulation with a significant seasonal pattern # first number gives type 1 error and second one gives power estimation apply(sim_out_d, 2, function(x) sum(x < 0.05) / (nrow(sim_out_d))) # Getting p-value for which 0.05 type I error rate is achieved quantile(sim_out_d[, 1], 0.05) #### Analyses per site #### ## GAM rm(list=ls()) library(mgcv) sample_sheet <- read.csv("SampleSheet_Pinho_etal2021.csv") sample_sheet$newdate <- ifelse(sample_sheet$Trap_jday<122, sample_sheet$Trap_jday-121+365, sample_sheet$Trap_jday-121) load(file = "Marmot_probes_sesame_normalized149.Rdata") betas <- t(normalized_betas_sesame) sites <- as.data.frame(colnames(betas)) names(sites)<- "Site" sites$GAM_edf_Age <- NA sites$GAM_ref.df_Age <- NA sites$GAM_F_Age <- NA sites$GAM_Sig_age <- NA sites$GAM_edf_Jdate <- NA sites$GAM_ref.df_Jdate <- NA sites$GAM_F_Jdate <- NA sites$GAM_Sig_jdate <- NA sites$GAM_res_df <- NA for (i in c(1:nrow(sites))){ Methyl_level <- betas[,which(colnames(betas)==as.character(sites$Site[i])), drop=FALSE] data_site <- as.data.frame(cbind(Methyl_level, sample_sheet$Age, sample_sheet$newdate, as.data.frame(as.character(sample_sheet$AnimalID)))) names(data_site) <- c("beta","Age","newdate", "AnimalID") M <- gam(beta ~ s(Age, k=-1, bs = "cr")+ s(newdate, k=-1, bs = "cc"), data= data_site) Sum <- summary.gam(M, digits=200) sites$GAM_edf_Age[i] <- Sum$s.table[,1, drop = FALSE][1] sites$GAM_ref.df_Age[i] <- Sum$s.table[,2, drop = FALSE][1] sites$GAM_F_Age[i] <- Sum$s.table[,3, drop = FALSE][1] sites$GAM_Sig_age[i] <- Sum$s.table[,4, drop = FALSE][1] sites$GAM_edf_Jdate[i] <- Sum$s.table[,1, drop = FALSE][2] sites$GAM_ref.df_Jdate[i] <- Sum$s.table[,2, drop = FALSE][2] sites$GAM_F_Jdate[i] <- Sum$s.table[,3, drop = FALSE][2] sites$GAM_Sig_jdate[i] <- Sum$s.table[,4, drop = FALSE][2] sites$GAM_res_df[i] <- M$df.residual } ##replacing zero p values with approximation sites$GAM_Sig_age <- ifelse( sites$GAM_Sig_age == 0, pf(sites$GAM_F_Age, sites$GAM_ref.df_Age, sites$GAM_res_df, lower.tail= FALSE), sites$GAM_Sig_age) sites$GAM_Sig_jdate <- ifelse( sites$GAM_Sig_jdate == 0, pf(sites$GAM_F_Jdate, sites$GAM_ref.df_Jdate, sites$GAM_res_df, lower.tail= FALSE), sites$GAM_Sig_jdate) ## EWAS rm(list=ls()) library(lmerTest) sample_sheet <- read.csv("SampleSheet_Pinho_etal2021.csv") sample_sheet$newdate <- ifelse(sample_sheet$Trap_jday<122, sample_sheet$Trap_jday-121+365, sample_sheet$Trap_jday-121) load(file = "Marmot_probes_sesame_normalized149.Rdata") betas <- t(normalized_betas_sesame) sites <- as.data.frame(colnames(betas)) names(sites)<- "Site" sites$EWAS_estimate <- NA sites$EWAS_Std.Error <- NA sites$EWAS_tvalue <- NA sites$EWAS_pval <- NA for (i in c(1:nrow(sites))){ Methyl_level <- betas[,which(colnames(betas)==as.character(sites$Site[i])), drop=FALSE] data_site <- as.data.frame(cbind(Methyl_level, sample_sheet$Age)) names(data_site) <- c("beta","Age") M <- lm(beta ~ Age, data= data_site) Sum <- summary(M) sites$EWAS_estimate[i] <- Sum$coefficients[2,1] sites$EWAS_Std.Error[i] <- Sum$coefficients[2,2] sites$EWAS_tvalue[i] <- Sum$coefficients[2,3] sites$EWAS_pval[i] <- Sum$coefficients[2,4] }