### Load cognition data cog <- read_excel(paste0(clinPATH,"/psychometrics.xlsx")) %>% select(ID, psy_date, mocatots, craftdre, MEMUNITS, srtfree, asscmem, minttots, ANIMALS, VEG, BOSTON, tma, tmb, trailb, TRAILA, digsym, switchmixed, DIGIF, DIGIB, digforct, digbacct, lettnum) %>% mutate(psy_Date = as.Date(psy_date))%>% select (-psy_date)%>% mutate(MEMUNITS = case_when(MEMUNITS <= 25 ~ MEMUNITS),switchmixed = case_when(switchmixed <= 52 ~ switchmixed)) full_data <- cog %>% mutate(BOSTON = as.numeric(BOSTON), minttots = as.numeric(minttots), eqMINT = case_when ( minttots == 32 ~ 30, minttots == 31 ~ 29, minttots == 30 ~ 28, minttots == 29 ~ 27, minttots == 28 ~ 26, minttots == 27 ~ 25, minttots == 26 ~ 24, minttots == 25 ~ 22, minttots == 24 ~ 21, minttots == 23 ~ 20, minttots == 22 ~ 18, minttots == 21 ~ 17, minttots == 20 ~ 16, minttots == 19 ~ 15, minttots == 18 ~ 14, minttots == 17 ~ 13, minttots == 16 ~ 12, minttots == 15 ~ 11, minttots == 14 ~ 11, minttots == 13 ~ 10, minttots == 12 ~ 9, minttots == 11 ~ 9, minttots == 10 ~ 8, minttots == 9 ~ 8, minttots == 8 ~ 7, minttots == 7 ~ 7, minttots == 6 ~ 6, minttots == 5 ~ 6, minttots == 4 ~ 5, minttots == 3 ~ 4, minttots == 2 ~ 3, minttots == 1 ~ 2, minttots == 0 ~ 1, is.na(minttots) ~ BOSTON), MEMUNITS = as.numeric(MEMUNITS), craftdre = as.numeric(craftdre),eqCRFTDel = case_when ( craftdre == 25 ~ 24, craftdre == 24 ~ 23, craftdre == 23 ~ 22, craftdre == 22 ~ 21, craftdre == 21 ~ 20, craftdre == 20 ~ 19, craftdre == 19 ~ 18, craftdre == 18 ~ 16, craftdre == 17 ~ 15, craftdre == 16 ~ 14, craftdre == 15 ~ 13, craftdre == 14 ~ 12, craftdre == 13 ~ 11, craftdre == 12 ~ 10, craftdre == 11 ~ 9, craftdre == 10 ~ 8, craftdre == 9 ~ 8, craftdre == 8 ~ 7, craftdre == 7 ~ 6, craftdre == 6 ~ 5, craftdre == 5 ~ 5, craftdre == 4 ~ 4, craftdre == 3 ~ 3, craftdre == 2 ~ 3, craftdre == 1 ~ 1, craftdre == 0 ~ 0, is.na(craftdre) ~ MEMUNITS), DIGIF = as.numeric(DIGIF), digforct = as.numeric(digforct), eqDIGFCT = case_when ( digforct == 14 ~ 12, digforct == 13 ~ 12, digforct == 12 ~ 11, digforct == 11 ~ 11, digforct == 10 ~ 10, digforct == 9 ~ 9, digforct == 8 ~ 9, digforct == 7 ~ 8, digforct == 6 ~ 7, digforct == 5 ~ 6, digforct == 4 ~ 5, digforct == 3 ~ 3, digforct == 2 ~ 2, digforct == 1 ~ 1, digforct == 0 ~ 0, is.na(digforct) ~ DIGIF), DIGIB = as.numeric(DIGIB), digbacct = as.numeric(digbacct), eqDIGBCT = case_when ( digbacct == 14 ~ 12, digbacct == 13 ~ 12, digbacct == 12 ~ 12, digbacct == 11 ~ 11, digbacct == 10 ~ 10, digbacct == 9 ~ 9, digbacct == 8 ~ 8, digbacct == 7 ~ 7, digbacct == 6 ~ 6, digbacct == 5 ~ 5, digbacct == 4 ~ 4, digbacct == 3 ~ 3, digbacct == 2 ~ 2, digbacct == 1 ~ 1, digbacct == 0 ~ 0, is.na(digbacct) ~ DIGIB), tma = as.numeric(tma), tmb = as.numeric(tmb), TRAILA = as.numeric(TRAILA), trailb = as.numeric(trailb), tmax = case_when( tma > 149 ~ 150, tma < 150 ~ tma), TRAILAx = case_when( TRAILA > 149 ~ 150, TRAILA < 150 ~ TRAILA), tmbx = case_when( tmb > 300 ~ 300, tmb < 300 ~ tmb), TRAILBx = case_when( trailb > 299 ~ 300, trailb < 300 ~ trailb),TRAILA_Final = case_when ( is.na(TRAILAx) ~ tmax, TRAILAx > 0 ~ TRAILAx), TRAILB_Final = case_when ( is.na(TRAILBx) ~ tmbx, TRAILBx > 0 ~ TRAILBx))%>% select(ID, psy_Date, srtfree, ANIMALS, TRAILB_Final, digsym) full_data<- full_data%>% mutate(year = format(psy_Date, format = "%Y"))%>% filter(year > 2003) m_TRAILB_Final <- mean(full_data$TRAILB_Final, na.rm = T); sd_TRAILB_Final <- sd(full_data$TRAILB_Final, na.rm = T); full_data$z_TRAILB_Final <- -1*(full_data$TRAILB_Final - m_TRAILB_Final)/sd_TRAILB_Final m_ANIMALS <- mean(full_data$ANIMALS, na.rm = T); sd_ANIMALS <- sd(full_data$ANIMALS, na.rm = T); full_data$z_ANIMALS <- (full_data$ANIMALS - m_ANIMALS)/sd_ANIMALS m_srtfree <- mean(full_data$srtfree, na.rm = T); sd_srtfree <- sd(full_data$srtfree, na.rm = T); full_data$z_srtfree <- (full_data$srtfree - m_srtfree)/sd_srtfree m_digsym <- mean(full_data$digsym, na.rm = T); sd_digsym <- sd(full_data$digsym, na.rm = T); full_data$z_digsym <- (full_data$digsym - m_digsym)/sd_digsym full_data$K1_na_count <- apply(full_data [, c("z_srtfree", "z_digsym", "z_ANIMALS", "z_TRAILB_Final")], 1, function(x) sum(is.na(x))) index_srtfree <- grep("z_srtfree", colnames(full_data)) index_animals <- grep("z_ANIMALS", colnames(full_data)) index_tmb <- grep("z_TRAILB_Final", colnames(full_data)) index_digsym <- grep("z_digsym", colnames(full_data)) full_data$K1 <- rowMeans(full_data[, c(index_srtfree,index_animals,index_digsym, index_tmb)], na.rm = TRUE) full_data$K1 = ifelse(full_data$K1_na_count <3, full_data$K1, NA) desc <- data.frame(t(describe(full_data$K1))) colnames(desc) <- "K1" full_data <- full_data %>% mutate(K1 = case_when(K1 <= (desc$K1[3]+3.5*desc$K1[4]) & K1 >= (desc$K1[3]-3.5*desc$K1[4]) ~ K1))%>% select(ID, psy_Date, year, K1)%>% filter(year > 2003) mri <- read_excel(paste0(dfPATH,"mri_3t.xlsx")) %>% mutate(mri_Date = as.Date(MR_Date), CortSig = LOAD_CorticalSignature_Thickness)%>% select(ID, mri_Date, CortSig)%>% drop_na(CortSig) # Read in TAU tau <- read_excel(paste0(dfPATH,"tau.xlsx")) %>% mutate(tau_Date = as.Date(PET_Date)) %>% select(ID, tau_Date, Tauopathy) %>% drop_na(Tauopathy) # Read in PIB pib <- read_excel(paste0(dfPATH,"pib_mrfreepet.xlsx")) %>% mutate(ab_Date = as.Date(PET_Date), Tracer = "pib") %>% select(ID, ab_Date, Centiloid = MRFreePET_Centiloid, Tracer) %>% drop_na(Centiloid) av <- read_excel(paste0(dfPATH,"av45_mrfreepet.xlsx")) %>% mutate(ab_Date = as.Date(PET_Date), Tracer = "av") %>% select(ID, ab_Date, Centiloid = MRFreePET_Centiloid, Tracer) %>% drop_na(Centiloid) fbb <- read_excel(paste0(dfPATH,"fbb_mrfreepet.xlsx")) %>% mutate(ab_Date = as.Date(PET_Date), Tracer = "fbb") %>% select(ID, ab_Date, Centiloid = MRFreePET_Centiloid, Tracer) %>% drop_na(Centiloid) # merge the types of amyloid-PET ab <- rbind(pib, av, fbb) # Remove Outliers desc <- data.frame(t(describe(mri$CortSig))) mri <- mri %>% mutate(CortSig = case_when(CortSig >= (desc$X1[3]-3*desc$X1[4]) & CortSig <= (desc$X1[3]+3*desc$X1[4])~ CortSig)) desc <- data.frame(t(describe(tau$Tauopathy))) tau <- tau %>% mutate(Tauopathy = case_when(Tauopathy <= (desc$X1[3]+3*desc$X1[4]) & Tauopathy >= (desc$X1[3]-3*desc$X1[4]) ~ Tauopathy)) desc <- data.frame(t(describe(ab$Centiloid))) ab <- ab %>% mutate(Centiloid = case_when(Centiloid <= (desc$X1[3]+3*desc$X1[4]) & Centiloid >= (desc$X1[3]-3*desc$X1[4]) ~ Centiloid)) rm(desc, pib, av) # add closest cognition to each biomarker and then subset to the most complete first visit based on cognition values missing (and time must be within two years of biomarker) ab <- merge(ab, full_data, by = "ID", all.x = TRUE) %>% #ab_pos drop_na(Centiloid) %>% mutate(abs_diff=as.numeric(abs(ab_Date-psy_Date)), abs_diff = replace_na(abs_diff, 0))%>% group_by(ID, ab_Date) %>% slice_min(abs_diff) %>% ungroup() %>% filter(abs_diff <= (365.25*2)) %>% select(-abs_diff) ab$na_count <- apply(ab [, c("K1")], 1, function(x) sum(is.na(x))) ab <- ab %>% arrange(na_count) %>% group_by(ID, psy_Date) %>% slice_min(na_count) %>% ungroup() %>% arrange(ab_Date) %>% group_by(ID) %>% slice_min(ab_Date) %>% ungroup() tau <- merge(tau, full_data, by = "ID", all.x = TRUE) %>% #ab_pos drop_na(Tauopathy) %>% mutate(abs_diff=as.numeric(abs(tau_Date-psy_Date)), abs_diff = replace_na(abs_diff, 0)) %>% group_by(ID, tau_Date) %>% slice_min(abs_diff) %>% ungroup() %>% filter(abs_diff <= (365.25*2)) %>% select(-abs_diff) tau$na_count <- apply(tau [, c("K1")], 1, function(x) sum(is.na(x))) tau <- tau %>% arrange(na_count) %>% group_by(ID, psy_Date) %>% slice_min(na_count) %>% ungroup() %>% arrange(tau_Date) %>% group_by(ID) %>% slice_min(tau_Date) %>% ungroup() mri <- merge(mri, full_data, by = "ID", all.x = TRUE) %>% #ab_pos drop_na(CortSig) %>% mutate(abs_diff=as.numeric(abs(mri_Date-psy_Date)), abs_diff = replace_na(abs_diff, 0)) %>% group_by(ID, mri_Date) %>% slice_min(abs_diff) %>% ungroup() %>% filter(abs_diff <= (365.25*2)) %>% select(-abs_diff) mri$na_count <- apply(mri [, c("K1")], 1, function(x) sum(is.na(x))) mri <- mri %>% arrange(na_count) %>% group_by(ID, psy_Date) %>% slice_min(na_count) %>% ungroup() %>% arrange(mri_Date) %>% group_by(ID) %>% slice_min(mri_Date) %>% ungroup() # Add apoe, Age, sex, EDUC demo <- read_excel(paste0(clinPATH,"demographics.xlsx")) %>% mutate(birth_Date = as.Date(BIRTH) )%>% distinct(ID, birth_Date, EDUC, race, sex) %>% mutate(sex = as.numeric(str_replace_all(sex, c("M" = "1", "F" = "2")))) demo <- read_excel(paste0(genPATH,"apoe.xlsx")) %>% mutate(ID = id)%>% distinct(ID, apoe) %>% mutate(apoe = as.numeric(str_replace_all(apoe, c("22" = "0", "24" = "1", "44" = "1", "34" = "1", "23" = "0", "33" = "0")))) %>% merge(., demo, by = "ID") ab <- merge(ab, demo, by = "ID", all.x = TRUE) ab <- ab %>% mutate(Age = as.numeric(round(psy_Date - birth_Date)/365)) tau <- merge(tau, demo, by = "ID", all.x = TRUE) tau <- tau %>% mutate(Age = as.numeric(round(psy_Date - birth_Date)/365)) mri <- merge(mri, demo, by = "ID", all.x = TRUE) mri <- mri %>% mutate(Age = as.numeric(round(psy_Date - birth_Date)/365)) full_data <- merge(full_data, demo, by = "ID", all.x = TRUE) %>% mutate(Age = as.numeric(round(psy_Date - birth_Date)/365)) #### Plots #### # set data aside data_corr_ab <- ab %>% drop_na(Centiloid) %>% mutate(AgeS = as.numeric(scale(Age)), EDUCS = as.numeric(scale (EDUC, scale = FALSE)))%>% drop_na(AgeS, EDUCS, sex, apoe, K1) K1 <- pcor.test(data_corr_ab$Centiloid, data_corr_ab$K1, list(data_corr_ab$AgeS, data_corr_ab$EDUCS, data_corr_ab$sex, data_corr_ab$apoe), semi = FALSE, conf.level = 0.95, nrep = 1000, method = "pearson") K1$p.value K1$estimate K1$conf.int[1:2] describe(data_corr_ab$K1) ## Calculate the mean of K1 mean(data_corr_ab$K1, na.rm = TRUE) ## Calculate the standard error of K1 sd(data_corr_ab$K1, na.rm = TRUE) / sqrt(length(data_corr_ab$K1[!is.na(data_corr_ab$K1)])) ## data_corr_tau <- tau%>% drop_na(Tauopathy) %>% mutate(AgeS = scale(Age), EDUCS = scale (EDUC, scale = FALSE)) %>% drop_na(AgeS, EDUCS, sex, apoe, K1) K1 <- pcor.test(data_corr_tau$Tauopathy, data_corr_tau$K1, list(data_corr_tau$AgeS, data_corr_tau$EDUCS, data_corr_tau$sex, data_corr_tau$apoe), semi = FALSE, conf.level = 0.95, nrep = 1000, method = "pearson") K1$p.value K1$estimate K1$conf.int[1:2] describe(data_corr_tau$K1) ## Calculate the mean of K1 mean(data_corr_tau$K1, na.rm = TRUE) ## Calculate the standard error of K1 sd(data_corr_tau$K1, na.rm = TRUE) / sqrt(length(data_corr_tau$K1[!is.na(data_corr_tau$K1)])) ## data_corr_mri <- mri %>% drop_na(CortSig) %>% mutate(AgeS = scale(Age), EDUCS = scale (EDUC, scale = FALSE)) %>% drop_na(AgeS, EDUCS, sex, apoe, K1) K1 <- pcor.test(data_corr_mri$CortSig, data_corr_mri$K1, list(data_corr_mri$AgeS, data_corr_mri$EDUCS, data_corr_mri$sex, data_corr_mri$apoe), semi = FALSE, conf.level = 0.95, nrep = 1000, method = "pearson") K1$p.value K1$estimate K1$conf.int[1:2] describe(data_corr_mri$K1) ## Calculate the mean of K1 mean(data_corr_mri$K1, na.rm = TRUE) ## Calculate the standard error of K1 sd(data_corr_mri$K1, na.rm = TRUE) / sqrt(length(data_corr_mri$K1[!is.na(data_corr_mri$K1)])) # line plots with partial correlations added ab_plot <- data_corr_ab %>% ggplot(aes(x = Centiloid, y = K1)) + geom_point(alpha = 0.3, color = "steelblue", size = 1.5) + geom_smooth(method = "lm", color = "firebrick", fill = "firebrick", se = TRUE, linewidth = 1) + labs( x = "Amyloid PET (Centiloid)", y = "PACC Score (z-score)", title = "Association Between Amyloid Burden and Cognition" ) + coord_cartesian(ylim = c(-2.5, 3)) + # Adjust limits without clipping smooth theme_classic(base_size = 14) + theme( plot.title = element_text(size = 16, face = "bold", hjust = 0.5), axis.title = element_text(size = 14), axis.text = element_text(size = 12) ) tau_plot <- data_corr_tau %>% ggplot(aes(x = Tauopathy, y = K1)) + geom_point(alpha = 0.3, color = "steelblue", size = 1.5) + geom_smooth(method = "lm", color = "firebrick", fill = "firebrick", se = TRUE, linewidth = 1) + labs( x = "Tauopathy (SUVR)", y = "PACC Score (z-score)", title = "Association Between Tau Burden and Cognition" ) + coord_cartesian(ylim = c(-2.5, 3)) + # Keeps smoothing line intact theme_classic(base_size = 14) + theme( plot.title = element_text(size = 16, face = "bold", hjust = 0.5), axis.title = element_text(size = 14), axis.text = element_text(size = 12) ) mri_plot <- data_corr_mri %>% ggplot(aes(x = CortSig, y = K1)) + geom_point(alpha = 0.3, color = "steelblue", size = 1.5) + geom_smooth(method = "lm", color = "firebrick", fill = "firebrick", se = TRUE, linewidth = 1) + labs( x = "Cortical Thickness (mm)", y = "PACC Score (z-score)", title = "Association Between Cortical Thickness and Cognition" ) + coord_cartesian(ylim = c(-2.5, 3)) + # Prevents clipping of smoother theme_classic(base_size = 14) + theme( plot.title = element_text(size = 16, face = "bold", hjust = 0.5), axis.title = element_text(size = 14), axis.text = element_text(size = 12) ) ################################################### ## Load CDR data to allow for stratification cdr <- read_excel(paste0(clinPATH,"b4_cdr.xlsx"))%>% select(ID, TESTDATE, cdr) %>% mutate(CDR_Date = as.Date(TESTDATE, origin = "1899-12-30"))%>% select(-TESTDATE) %>% mutate(cdr_cat = as.factor(ifelse(cdr == 0, "Unimpaired", "Impaired"))) %>% mutate(cdr = as.factor(cdr)) ## Merge CDR wtih data_corr_ab data_corr_ab_cdr <- merge(data_corr_ab, cdr, by = "ID", all.x = TRUE) %>% mutate(abs_diff=as.numeric(abs(ab_Date-CDR_Date)), abs_diff = replace_na(abs_diff, 0)) %>% group_by(ID, ab_Date) %>% slice_min(abs_diff) %>% ungroup() %>% filter(abs_diff <= (365.25*2)) %>% select(-abs_diff) ## Adjust CDR factors such that 1, 2, and 3 are coded as "1+" data_corr_ab_cdr <- data_corr_ab_cdr %>% mutate(cdr = case_when( cdr == "1" ~ "1+", cdr == "2" ~ "1+", cdr == "3" ~ "1+", TRUE ~ as.character(cdr) # Keep 0 and 0.5 as they are )) %>% mutate(cdr = factor(cdr, levels = c("0", "0.5", "1+"))) ## Linear model for CDR stratification lm_ab_cdr <- lm(K1 ~ Centiloid*cdr + AgeS + EDUCS + sex + apoe, data = data_corr_ab_cdr) library(emmeans) emtrends(lm_ab_cdr, pairwise ~ cdr, var = "Centiloid") ## Plot ggplot(data_corr_ab_cdr, aes(x = Centiloid, y = K1)) + geom_point(alpha = 0.2, color = "gray40") + geom_smooth(method = "lm", se = TRUE, color = "#e41a1c", fill = "#e41a1c", linewidth = 1.2) + facet_wrap(~ cdr, labeller = as_labeller(c("0" = "CDR 0", "0.5" = "CDR 0.5", "1+" = "CDR 1+"))) + labs( x = "Amyloid PET (Centiloid)", y = "PACC Score (z-score)", title = "Association Between Amyloid PET and Cognition by CDR Group" ) + theme_minimal(base_size = 12) + theme( strip.text = element_text(size = 12, face = "bold"), panel.grid.minor = element_blank() ) ## Merge CDR wtih data_corr_tau data_corr_tau_cdr <- merge(data_corr_tau, cdr, by = "ID", all.x = TRUE) %>% mutate(abs_diff=as.numeric(abs(tau_Date-CDR_Date)), abs_diff = replace_na(abs_diff, 0)) %>% group_by(ID, tau_Date) %>% slice_min(abs_diff) %>% ungroup() %>% filter(abs_diff <= (365.25*2)) %>% select(-abs_diff) ## Adjust CDR factors such that 1, 2, and 3 are coded as "1+" data_corr_tau_cdr <- data_corr_tau_cdr %>% mutate(cdr = case_when( cdr == "1" ~ "1+", cdr == "2" ~ "1+", cdr == "3" ~ "1+", TRUE ~ as.character(cdr) # Keep 0 and 0.5 as they are )) %>% mutate(cdr = factor(cdr, levels = c("0", "0.5", "1+"))) ## Linear model for CDR stratification lm_tau_cdr <- lm(K1 ~ Tauopathy*cdr + AgeS + EDUCS + sex + apoe, data = data_corr_tau_cdr) emtrends(lm_tau_cdr, pairwise ~ cdr, var = "Tauopathy") ## Plot ggplot(data_corr_tau_cdr, aes(x = Tauopathy, y = K1, color = as.factor(cdr))) + geom_point(alpha = 0.2) + geom_smooth(method = "lm", se = TRUE, linewidth = 1.2) + scale_color_manual( values = c("0" = "#1b9e77", "0.5" = "#d95f02", "1+" = "#7570b3"), labels = c("CDR 0", "CDR 0.5", "CDR 1+"), name = "CDR Group" ) + labs( x = "Tau PET (Tauopathy)", y = "PACC Score (z-score)", title = "PACC ~ Tau PET by Clinical Status" ) + theme_minimal(base_size = 12) + theme( legend.position = "top", panel.grid.minor = element_blank() ) ## Merge CDR wtih data_corr_mri data_corr_mri_cdr <- merge(data_corr_mri, cdr, by = "ID", all.x = TRUE) %>% mutate(abs_diff=as.numeric(abs(mri_Date-CDR_Date)), abs_diff = replace_na(abs_diff, 0)) %>% group_by(ID, mri_Date) %>% slice_min(abs_diff) %>% ungroup() %>% filter(abs_diff <= (365.25*2)) %>% select(-abs_diff) ## Adjust CDR factors such that 1, 2, and 3 are coded as "1+" data_corr_mri_cdr <- data_corr_mri_cdr %>% mutate(cdr = case_when( cdr == "1" ~ "1+", cdr == "2" ~ "1+", cdr == "3" ~ "1+", TRUE ~ as.character(cdr) # Keep 0 and 0.5 as they are )) %>% mutate(cdr = factor(cdr, levels = c("0", "0.5", "1+"))) ## Linear model for CDR stratification lm_mri_cdr <- lm(K1 ~ CortSig*cdr + AgeS + EDUCS + sex + apoe, data = data_corr_mri_cdr) emtrends(lm_mri_cdr, pairwise ~ cdr, var = "CortSig") ## Plot ggplot(data_corr_mri_cdr, aes(x = CortSig, y = K1, color = as.factor(cdr))) + geom_point(alpha = 0.2) + geom_smooth(method = "lm", se = TRUE, linewidth = 1.2) + scale_color_manual( values = c("0" = "#1b9e77", "0.5" = "#d95f02", "1+" = "#7570b3"), labels = c("CDR 0", "CDR 0.5", "CDR 1+"), name = "CDR Group" ) + labs( x = "Cortical Signature", y = "PACC Score (z-score)", title = "PACC ~ Cortical Signature by Clinical Status" ) + theme_minimal(base_size = 12) + theme( legend.position = "top", panel.grid.minor = element_blank() ) ## Instead of plotting the simple slopes, lets take the original pearson corerlation plot and just add the cdr stratification to the data points ## MRI mri_plot <- data_corr_mri_cdr %>% ggplot(aes(x = CortSig, y = K1, color = as.factor(cdr))) + # color by CDR group geom_point(alpha = 0.7, size = 1.8) + geom_smooth(method = "lm", color = "black", fill = "gray", se = TRUE, linewidth = 1) + labs( x = "Cortical Signature", y = "PACC Score (z-score)", color = "CDR", title = "Association between PACC and Cortical Signature" ) + coord_cartesian(ylim = c(-2.5, 3)) + theme_classic(base_size = 14) + theme( plot.title = element_text(size = 16, face = "bold", hjust = 0.5), axis.title = element_text(size = 14), axis.text = element_text(size = 12), legend.position = "right" ) + scale_color_manual( values = c( "0" = "#73AF48FF", # Green "0.5" = "#38A6A5FF", # Teal "1+" = "#E17C05FF" # Orange ) ) ## Amyloid ab_plot <- data_corr_ab_cdr %>% ggplot(aes(x = Centiloid, y = K1, color = as.factor(cdr))) + # color by CDR geom_point(alpha = 0.7, size = 1.8) + geom_smooth(method = "lm", color = "black", fill = "gray", se = TRUE, linewidth = 1) + labs( x = "Amyloid PET - Centiloid", y = "PACC Score (z-score)", color = "CDR", title = "Association between PACC and Amyloid Centiloid" ) + coord_cartesian(ylim = c(-2.5, 3)) + theme_classic(base_size = 14) + theme( plot.title = element_text(size = 16, face = "bold", hjust = 0.5), axis.title = element_text(size = 14), axis.text = element_text(size = 12), legend.position = "right" ) + scale_color_manual( values = c( "0" = "#73AF48FF", # Green "0.5" = "#38A6A5FF", # Teal "1+" = "#E17C05FF" # Orange ) ) ## Tau tau_plot <- data_corr_tau_cdr %>% ggplot(aes(x = Tauopathy, y = K1, color = as.factor(cdr))) + # color by CDR geom_point(alpha = 0.7, size = 1.8) + geom_smooth(method = "lm", color = "black", fill = "gray", se = TRUE, linewidth = 1) + labs( x = "Tau PET - Tauopathy", y = "PACC Score (z-score)", color = "CDR", title = "Association between PACC and Tauopathy" ) + coord_cartesian(ylim = c(-2.5, 3)) + theme_classic(base_size = 14) + theme( plot.title = element_text(size = 16, face = "bold", hjust = 0.5), axis.title = element_text(size = 14), axis.text = element_text(size = 12), legend.position = "right" ) + scale_color_manual( values = c( "0" = "#73AF48FF", # Green "0.5" = "#38A6A5FF", # Teal "1+" = "#E17C05FF" # Orange ) ) ##################################################### ## Create a density plot for all PACC data by CDR score ## Merge CDR wtih PACC data full_data_cdr <- merge(full_data, cdr, by = "ID", all.x = TRUE) %>% mutate(abs_diff=as.numeric(abs(psy_Date-CDR_Date)), abs_diff = replace_na(abs_diff, 0)) %>% group_by(ID, psy_Date) %>% slice_min(abs_diff) %>% ungroup() %>% filter(abs_diff <= (365.25*2)) %>% select(-abs_diff) %>% filter(!is.na(K1)) %>% mutate(K1neg = -K1) %>% filter(!is.na(cdr)) ## Adjust CDR factors such that 1, 2, and 3 are coded as "1+" full_data_cdr <- full_data_cdr %>% mutate(cdr = case_when( cdr == "2" ~ "2+", cdr == "3" ~ "2+", TRUE ~ as.character(cdr) # Keep 0 and 0.5 as they are )) %>% mutate(cdr = factor(cdr, levels = c("0", "0.5", "1", "2+"))) ## Density plot custom_colors <- paletteer_d("rcartocolor::Prism")[c(5, 3, 7, 9)] # Means mean_PACC <- full_data_cdr %>% dplyr::ungroup() %>% dplyr::group_by(cdr) %>% dplyr::summarize(K1neg = mean(K1neg, na.rm = TRUE)) ## Plot ggplot(full_data_cdr, aes(x = K1neg, fill = as.factor(cdr))) + geom_density(alpha = 0.7) + # Corrected geom_vline line: geom_vline(data = mean_PACC, aes(xintercept = K1neg, color = as.factor(cdr)), linetype = "dashed", linewidth = .75, show.legend = FALSE) + labs(title = "PACC by CDR Score", x = "PACC Score (z-score)", y = "Density", fill = "CDR Score") + scale_fill_manual(values = custom_colors, name = "CDR Score", breaks = c("0", "0.5", "1", "2+"), labels = c("0", "0.5", "1", "2+")) + scale_color_manual(values = custom_colors, guide = "none") + # Added guide = "none" for color legend theme_minimal() + theme(plot.title = element_text(hjust = 0.5)) ## Create some stats the align with density plot library(lme4) model <- lmer(K1 ~ cdr + scale(Age, scale=FALSE) + scale(EDUC, scale=FALSE) + sex + apoe + (1 | ID), data = full_data_cdr) emm <- emmeans(model, ~cdr, df.method = "kenward-roger", pbkrtest.limit = 12000) emm.p <- pairs(emm) emm_df <- as.data.frame(emm) # ## Barplot # ggplot(emm_df, aes(x = cdr, y = emmean, fill = cdr)) + # geom_col(width = 0.6, color = "black", show.legend = FALSE) + # geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) + # scale_fill_manual(values = custom_colors) + # labs( # title = "Model-Adjusted PACC Scores by CDR Level", # x = "CDR Score", # y = "Estimated PACC Score (adjusted for covariates)" # ) + # theme_minimal() + # theme(plot.title = element_text(hjust = 0.5)) ## Violin plot with data displayed library(ggforce) # Your ggplot code to generate the plot my_plot <- ggplot(full_data_cdr, aes(x = cdr, y = K1)) + geom_violin(aes(fill = cdr), trim = FALSE, alpha = 0.3, color = NA) + geom_sina( aes(color = cdr), size = 1.5, alpha = 0.7, method = "density", maxwidth = 0.95 ) + # Optional: boxplot overlay for summary stats geom_boxplot( width = 0.1, outlier.shape = NA, alpha = 0.7, color = "black" ) + # Estimated marginal means (EMMs) geom_point( data = emm_df, aes(x = cdr, y = emmean), color = "black", shape = 18, size = 3, inherit.aes = FALSE ) + # Confidence intervals for EMMs geom_errorbar( data = emm_df, aes(x = cdr, ymin = lower.CL, ymax = upper.CL), width = 0.2, inherit.aes = FALSE ) + scale_fill_manual(values = custom_colors) + scale_color_manual(values = custom_colors) + labs( title = "Observed and Model-Adjusted PACC Scores by CDR Status", x = "CDR Score", y = "PACC Score (z-score)" ) + theme_minimal() + theme( plot.title = element_text(hjust = 0.5, size = 16), axis.title.x = element_text(size = 14), axis.title.y = element_text(size = 14), axis.text.x = element_text(size = 12), axis.text.y = element_text(size = 12), legend.position = "none" )