## Load scan_type data with header info kari_raw <- read_csv(paste0(inPATH,"KARI_scan_mapping_DF25.csv")) ## Trim Kari kari <- kari_raw %>% mutate(AcquisitionDateTime = as.POSIXct(AcquisitionDateTime, format = "%Y-%m-%d %H:%M:%S")) %>% mutate(MRdate = as.Date(AcquisitionDateTime), Time = format(AcquisitionDateTime, format = "%H:%M:%S")) %>% filter(grepl("MR", Modality)) %>% drop_na(MRdate) %>% select(MAPID=subject_label, experiment_label, scan_type, MRdate) ## Collapse across experiment_label while retaining the MRdate column mri <- kari %>% group_by(experiment_label) %>% distinct(experiment_label, .keep_all = TRUE) %>% select(MAPID, experiment_label, MRdate) %>% rename(ID = MAPID, Session = experiment_label, Date = MRdate) %>% ungroup() ## Add PET data ## Amyloid av45 <- read_excel(paste0(dfPATH,"av45.xlsx")) av45 <- av45 %>% mutate(PET_Date = as.Date(PET_Date)) %>% group_by(PET_Session) %>% distinct(PET_Session, .keep_all = TRUE) %>% select(ID, PET_Session, PET_Date) %>% rename(Session = PET_Session, Date = PET_Date) %>% ungroup() fbb <- read_excel(paste0(dfPATH,"fbb.xlsx")) fbb <- fbb %>% mutate(PET_Date = as.Date(PET_Date)) %>% group_by(PET_Session) %>% distinct(PET_Session, .keep_all = TRUE) %>% select(ID, PET_Session, PET_Date) %>% rename(Session = PET_Session, Date = PET_Date) %>% ungroup() pib <- read_excel(paste0(dfPATH,"pib.xlsx")) pib <- pib %>% mutate(PET_Date = as.Date(PET_Date)) %>% group_by(PET_Session) %>% distinct(PET_Session, .keep_all = TRUE) %>% select(ID, PET_Session, PET_Date) %>% rename(Session = PET_Session, Date = PET_Date) %>% ungroup() ## Combine ID and PET_Session for each av45, fbb, and pib amy <- bind_rows( av45 %>% select(ID, Session, Date), pib %>% select(ID, Session, Date), fbb %>% select(ID, Session, Date) ) ## Tau tau <- read_excel(paste0(dfPATH,"tau.xlsx")) tau <- tau %>% mutate(PET_Date = as.Date(PET_Date)) %>% group_by(PET_Session) %>% distinct(PET_Session, .keep_all = TRUE) %>% select(ID, PET_Session, PET_Date) %>% rename(Session = PET_Session, Date = PET_Date) %>% ungroup() ## FDG fdg <- read_excel(paste0(dfPATH,"fdg.xlsx")) fdg <- fdg %>% mutate(PET_Date = as.Date(PET_Date)) %>% group_by(PET_Session) %>% distinct(PET_Session, .keep_all = TRUE) %>% select(ID, PET_Session, PET_Date) %>% rename(Session = PET_Session, Date = PET_Date) %>% ungroup() ## Combine all rows from mri, amy, tau, and fdg combined_df <- bind_rows(mri, amy, tau, fdg) # Remove duplicate Session values, keeping only the first occurrence combined_df <- combined_df %>% distinct(Session, .keep_all = TRUE) ## Demographics dem <- read_excel(paste0(clinPATH,"demographics.xlsx")) %>% select(ID, SES, BIRTH, DEATH, ENTDATE, EDUC, HAND, grantc, race, sex) ## Clinical cdr <- read_excel(paste0(clinPATH,"b4_cdr.xlsx"))%>% select(ID, TESTDATE, cdr)%>% mutate(CDR_Date = as.Date(TESTDATE, origin = "1899-12-30"))%>% select(-TESTDATE) ## APOE apoe <- read_excel(paste0(genPATH,"apoe.xlsx"))%>% select(ID=id, apoe) ############################################################################ ############################################################################ ## Add CDR data from clinical ## Ensure Date columns are in Date format cdr$CDR_Date <- as.Date(cdr$CDR_Date) ## Create an empty column for the closest Date and CDR combined_df_cdr <- combined_df combined_df_cdr$ClosestCDR <- NA combined_df_cdr$CDR <- NA # Loop through each row in combined_df_cdr for (i in 1:nrow(combined_df_cdr)) { img_ID <- combined_df_cdr$ID[i] ## Test if img_ID is in clin$ID if (img_ID %in% cdr$ID) { } else { next } # Subset clin for the current participant subset_cdr <- cdr[cdr$ID == combined_df_cdr$ID[i], ] # Find the index of the closest Date closest_index <- which.min(abs(subset_cdr$CDR_Date - combined_df_cdr$Date[i])) # Assign the closest Date and CDR to the data frame combined_df_cdr$ClosestCDR[i] <- subset_cdr$CDR_Date[closest_index] combined_df_cdr$CDR[i] <- subset_cdr$cdr[closest_index] } combined_df_cdr$ClosestCDR <- as.Date(combined_df_cdr$ClosestCDR) combined_df_cdr <- combined_df_cdr %>% drop_na(CDR) ## Calculate time between CDR date and MR date combined_df_cdr$CDR_gap <- as.numeric(difftime(combined_df_cdr$Date, combined_df_cdr$ClosestCDR, units = "days") / 365.25) ## Remove rows where CDR is more than 2 years from MR date combined_df_cdr <- combined_df_cdr %>% filter(abs(CDR_gap) <= 2) ############################################################################ ############################################################################ ## Calculate baseline age combined_df_cdr_baseage <- combined_df_cdr %>% left_join(dem, by = "ID") %>% mutate(Age = as.numeric(difftime(Date, BIRTH, units = "days")/365.25)) %>% group_by(ID) %>% slice_min(Date, with_ties = FALSE) %>% mutate(CDR_factor = case_when(CDR == 0 ~ "Unimpaired", CDR >= 0.5 ~ "Impaired")) %>% ungroup() ## Calculate Age min, max, and standard deviation age_range <- range(combined_df_cdr_baseage$Age) age_sd <- sd(combined_df_cdr_baseage$Age) mean_age <- mean(combined_df_cdr_baseage$Age) ## Create age mean and sd for each CDR factor age_cdr_unimpaired <- combined_df_cdr_baseage %>% filter(CDR_factor == "Unimpaired") %>% summarise(mean_age = mean(Age), sd_age = sd(Age), n = n()) age_cdr_impaired <- combined_df_cdr_baseage %>% filter(CDR_factor == "Impaired") %>% summarise(mean_age = mean(Age), sd_age = sd(Age), n = n()) ## Calculate stats for Age Age_T <- t.test(Age ~ CDR_factor, data = combined_df_cdr_baseage) Age_T$CD <- cohens_d(combined_df_cdr_baseage$Age ~ combined_df_cdr_baseage$CDR_factor)$Cohens_d ## Pull Highest CDR for each ID combined_df_cdr_max <- combined_df_cdr %>% group_by(ID) %>% slice_max(order_by = CDR, with_ties = FALSE) %>% ungroup() ## Combine demographic data combined_df_cdr_max_dem <- combined_df_cdr_max %>% left_join(dem, by = c("ID" = "ID")) %>% mutate(Age = as.numeric(difftime(Date, BIRTH, units = "days")/365.25)) %>% mutate(CDR_factor = case_when(CDR == 0 ~ "Unimpaired", CDR >= 0.5 ~ "Impaired")) ## Recode handedness to R=Right, L=left, and O=Other combined_df_cdr_max_dem <- combined_df_cdr_max_dem %>% mutate(HAND = case_when( HAND %in% c("L", "R") ~ HAND, # Keep "L" and "R" as is HAND %in% c("?", "B", "O") ~ "O", # Change "?", "B", "O", and others to "O" TRUE ~ "O" # Any other value not covered in the above conditions will be changed to "O" )) ## Combine apoe combined_df_cdr_max_dem <- combined_df_cdr_max_dem %>% left_join(apoe, by = c("ID" = "ID")) combined_df_cdr_max_dem$apoe <- as.character(combined_df_cdr_max_dem$apoe) ## If apoe column contains a 4, then set that cell to "e4p", otherwise set it to "e4n" combined_df_cdr_max_dem$apoe_factor <- ifelse(grepl("4", combined_df_cdr_max_dem$apoe), "e4p", "e4n") ## Clean data further for stats table.df <- combined_df_cdr_max_dem %>% select(ID, Date, ClosestCDR, CDR, CDR_factor, Age, sex, HAND, EDUC, race, apoe, apoe_factor ) %>% mutate(sex=as.factor(sex), HAND=as.factor(HAND), apoe=as.factor(apoe), CDR_factor=as.factor(CDR_factor)) %>% group_by(CDR_factor) ## if table.df$race is not "White", "Black", or "ASIAN" then set it to "Other" table.df$race <- as.factor(ifelse(table.df$race %in% c("White", "Black", "ASIAN"), table.df$race, "Other")) ## Summary stats summary_stats_mean_SD <- table.df %>% summarise( count = n(), Age_mean = mean(Age, na.rm = TRUE), Age_SD = sd(Age, na.rm = TRUE), EDUC_mean = mean(EDUC, na.rm = TRUE), EDUC_SD = sd(EDUC, na.rm = TRUE), sex_F_sum = sum(sex == "F"), sex_M_sum = sum(sex == "M"), sex_F_per = sum(sex == "F") / n(), sex_M_per = sum(sex == "M") / n(), HAND_R_sum = sum(HAND == "R"), HAND_L_sum = sum(HAND == "L"), HAND_O_sum = sum(HAND == "O"), HAND_R_per = sum(HAND == "R") / n(), HAND_L_per = sum(HAND == "L") / n(), HAND_O_per = sum(HAND == "O") / n(), race_W_sum = sum(race == "White"), race_W_per = sum(race == "White") / n(), race_B_sum = sum(race == "Black"), race_B_per = sum(race == "Black") / n(), race_A_sum = sum(race == "ASIAN"), race_A_per = sum(race == "ASIAN") / n(), race_O_sum = sum(race == "Other"), race_O_per = sum(race == "Other") / n(), apoe_e4p_sum = sum(apoe_factor == "e4p"), apoe_e4n_sum = sum(apoe_factor == "e4n"), apoe_e4p_per = sum(apoe_factor == "e4p") / n(), apoe_e4n_per = sum(apoe_factor == "e4n") / n(), ) summary_stats_mean_SD <- t(summary_stats_mean_SD) # T-tests for continuous variables EDUC_T <- t.test(EDUC ~ CDR_factor, data = table.df) EDUC_T$CD <- cohens_d(table.df$EDUC ~ table.df$CDR_factor)$Cohens_d # Chi-square tests for factors sex_chi2_table <- table(table.df$sex, table.df$CDR_factor) sex_chi2 <- chisq.test(sex_chi2_table) sex_phi <- sqrt(sex_chi2$statistic / sum(sex_chi2_table)) sex_CV <- sqrt(sex_chi2$statistic / sum(sex_chi2_table) * (min(dim(sex_chi2_table)) - 1)) HAND_chi2_table <- table(table.df$HAND, table.df$CDR_factor) HAND_chi2 <- chisq.test(HAND_chi2_table) HAND_phi <- sqrt(HAND_chi2$statistic / sum(HAND_chi2_table)) HAND_CV <- sqrt(HAND_chi2$statistic / sum(HAND_chi2_table) * (min(dim(HAND_chi2_table)) - 1)) race_chi2_table <- table(table.df$race, table.df$CDR_factor) race_chi2 <- chisq.test(race_chi2_table) race_phi <- sqrt(race_chi2$statistic / sum(race_chi2_table)) race_CV <- sqrt(race_chi2$statistic / sum(race_chi2_table) * (min(dim(race_chi2_table)) - 1)) apoe_chi2_table <- table(table.df$apoe_factor, table.df$CDR_factor) apoe_chi2 <- chisq.test(apoe_chi2_table) apoe_phi <- sqrt(apoe_chi2$statistic / sum(apoe_chi2_table)) apoe_CV <- sqrt(apoe_chi2$statistic / sum(apoe_chi2_table) * (min(dim(apoe_chi2_table)) - 1)) # Combine all results into a single data frame summary_stats_p_ES <- bind_rows( data.frame( variable = c("Scan_Age", "EDUC"), p_value = c(Age_T$p.value, EDUC_T$p.value), ES = c(round(Age_T$CD,digits=4),round(EDUC_T$CD,digits=4)) ), data.frame( variable = c("sex", "HAND", "race", "apoe"), p_value = c(sex_chi2$p.value, HAND_chi2$p.value, race_chi2$p.value, apoe_chi2$p.value), ES = c(round(sex_phi,digits=4), round(HAND_phi,digits=4), round(race_phi,digits=4), round(apoe_phi,digits=4)) ) ) ## Remove scientific notation from p-values summary_stats_p_ES$p_value2 <- format(summary_stats_p_ES$p_value, scientific = FALSE) ## count number of table.df$sex == M when table.df$CDR_factor == Impaired summary_stats_p_ES$sex_M_imp <- sum(table.df$sex == "M" & table.df$CDR_factor == "Impaired") ## Count number of table.df$sex == F when table.df$CDR_factor == Impaired summary_stats_p_ES$sex_F_imp <- sum(table.df$sex == "F" & table.df$CDR_factor == "Impaired")