## Load packages ################################################################
list.of.packages <- c("rstatix", "dplyr", "tidyr","tidyverse", "effectsize", "ggalluvial", "ggpubr", "readxl", "gganimate", "paletteer", "Cairo")
#################################################################################
## Load scan_type data with header info
kari_raw <- read_csv(paste0(inPATH,"KARI_scan_mapping_DF25.csv"))

## Trim Kari
kari <- kari_raw %>% 
  mutate(AcquisitionDateTime = as.POSIXct(AcquisitionDateTime, format = "%Y-%m-%d %H:%M:%S")) %>% 
  mutate(MRdate = as.Date(AcquisitionDateTime), Time = format(AcquisitionDateTime, format = "%H:%M:%S")) %>%
  filter(grepl("MR", Modality)) %>%
  drop_na(MRdate) %>%
  select(MAPID=subject_label, experiment_label, scan_type, MRdate) 

## Collapse across experiment_label while retaining the MRdate column
mri <- kari %>%
  group_by(experiment_label) %>%
  distinct(experiment_label, .keep_all = TRUE) %>%
  select(MAPID, experiment_label, MRdate) %>%
  rename(ID = MAPID, Session = experiment_label, Date = MRdate) %>%
  ungroup()

## Demographics
dem <- read_excel(paste0(clinPATH,"demographics.xlsx")) %>%
  select(ID, SES, BIRTH, DEATH, ENTDATE, EDUC, HAND, grantc, race, sex)

## Add PET data
## Amyloid
av45mrf <- read_excel(paste0(dfPATH,"av45_mrfreepet.xlsx"))
av45mrf <- av45mrf %>%
  mutate(PET_Date = as.Date(PET_Date)) %>%
  group_by(PET_Session) %>%
  distinct(PET_Session, .keep_all = TRUE) %>%
  select(ID, PET_Session, PET_Date, MRFreePET_Centiloid) %>%
  rename(Centiloid = MRFreePET_Centiloid) %>%
  filter(!is.na(Centiloid)) %>%
  ungroup()

fbbmrf <- read_excel(paste0(dfPATH,"fbb_mrfreepet.xlsx"))
fbbmrf <- fbbmrf %>%
  mutate(PET_Date = as.Date(PET_Date)) %>%
  group_by(PET_Session) %>%
  distinct(PET_Session, .keep_all = TRUE) %>%
  select(ID, PET_Session, PET_Date, MRFreePET_Centiloid) %>%
  rename(Centiloid = MRFreePET_Centiloid) %>%
  filter(!is.na(Centiloid)) %>%
  ungroup()

pibmrf <- read_excel(paste0(dfPATH,"pib_mrfreepet.xlsx"))
pibmrf <- pibmrf %>%
  mutate(PET_Date = as.Date(PET_Date)) %>%
  group_by(PET_Session) %>%
  distinct(PET_Session, .keep_all = TRUE) %>%
  select(ID, PET_Session, PET_Date, MRFreePET_Centiloid) %>%
  rename(Centiloid = MRFreePET_Centiloid) %>%
  filter(!is.na(Centiloid)) %>%
  ungroup()

## Combine ID and PET_Session for each av45, fbb, and pib
amy <- bind_rows(
  av45mrf %>% select(ID, PET_Session, PET_Date, Centiloid),
  pibmrf %>% select(ID, PET_Session, PET_Date, Centiloid),
  fbbmrf %>% select(ID, PET_Session, PET_Date, Centiloid)
)

## Clinical
cdr <- read_excel(paste0(clinPATH,"b4_cdr.xlsx"))%>%
  select(ID, TESTDATE, cdr) %>%
  mutate(CDR_Date = as.Date(TESTDATE, origin = "1899-12-30"))%>%
  select(-TESTDATE) %>%
  mutate(cdr_cat = as.factor(ifelse(cdr == 0, "Unimpaired", "Impaired")))
  
## APOE
apoe <- read_excel(paste0(genPATH,"apoe.xlsx"))%>%
  select(ID=id, apoe)
############################################################################
############################################################################
## Combine data of interest (sometimes mri and amyloid, other times its not)
combined_df <- amy %>%
  rename(Date = PET_Date, Session = PET_Session)

## Add CDR data from clinical
## Ensure Date columns are in Date format
combined_df$Date <- as.Date(combined_df$Date)
cdr$CDR_Date <- as.Date(cdr$CDR_Date)

## Create an empty column for the closest Date and CDR
kari_cdr <- combined_df
kari_cdr$ClosestCDR <- NA
kari_cdr$CDR <- NA

# Loop through each row in kari_cdr
for (i in 1:nrow(kari_cdr)) {
  img_ID <- kari_cdr$ID[i]
  ## Test if img_ID is in clin$ID
  if (img_ID %in% cdr$ID) {
  } else {
    next
  }
  # Subset clin for the current participant
  subset_cdr <- cdr[cdr$ID == kari_cdr$ID[i], ]
  
  # Find the index of the closest Date
  closest_index <- which.min(abs(subset_cdr$CDR_Date - kari_cdr$Date[i]))
  
  # Assign the closest Date and CDR to the data frame
  kari_cdr$ClosestCDR[i] <- subset_cdr$CDR_Date[closest_index]
  kari_cdr$CDR[i] <- subset_cdr$cdr[closest_index]
}

kari_cdr$ClosestCDR <- as.Date(kari_cdr$ClosestCDR)
kari_cdr <- kari_cdr %>% drop_na(CDR)

## Calculate time between CDR date and MR date
kari_cdr$CDR_gap <- as.numeric(difftime(kari_cdr$Date, kari_cdr$ClosestCDR, units = "days") / 365.25)

## Remove rows where CDR is more than 2 years from MR date
kari_cdr <- kari_cdr %>% filter(abs(CDR_gap) <= 2)

############################################################################
############################################################################
# Process the data to categorize CDR trends
result <- kari_cdr %>%
  group_by(ID) %>%
  arrange(Date) %>%
  mutate(CDR_change = CDR - lag(CDR)) %>%
  summarise(
    increase_count = sum(CDR_change > 0, na.rm = TRUE),
    decrease_count = sum(CDR_change < 0, na.rm = TRUE),
    stable_count = sum(CDR_change == 0, na.rm = TRUE),
    trend = case_when(
      increase_count > 0 & decrease_count == 0 ~ "increases",
      decrease_count > 0 & increase_count == 0 ~ "decreases",
      increase_count > 0 & decrease_count > 0 ~ "fluctuates",
      TRUE ~ "stable"
  )
)

## Remove rows where the trend column is no "stable"
result_stable_CC <- result %>%
  filter(trend == "stable") %>%
  filter(stable_count == 0) %>%
  left_join(kari_cdr %>% 
  select(ID, CDR), by = "ID")

result_stable_CC_table <- result_stable_CC %>%
  distinct(ID, CDR) %>%
  group_by(CDR) %>%
  summarise(Count = n())

## Remove rows where the trend column is stable but the stable_count is greater than 0
result_stable_long <- result %>%
  filter(trend == "stable") %>%
  filter(stable_count != 0) %>%
  left_join(kari_cdr %>% 
  select(ID, CDR), by = "ID")

result_stable_long_table <- result_stable_long %>%
  distinct(ID, CDR) %>%
  group_by(CDR) %>%
  summarise(Count = n())

## Add trend column back to kari_cdr
kari_cdr <- kari_cdr %>%
  left_join(result, by = "ID")

# Condense the data by only keeping rows where the CDR value changes from the previous timepoint
kari_cdr_condensed <- kari_cdr %>%
  group_by(ID) %>%
  arrange(ID, Date) %>%  # Ensure the data is ordered by date
  filter(CDR != lag(CDR, default = first(CDR)) | row_number() == 1) %>%  # Keep rows where CDR is different or the first row
  mutate(timepoint_condensed = row_number())  # Re-assign new timepoints based on distinct CDR changes

## Categorize the different types of CDR change in the data
kari_cdr_types <- kari_cdr_condensed %>%
  summarize(CDR_sequence = paste(CDR, collapse = " -> "),
            trend = first(trend)) %>%  # Use first() to get the trend for each ID
  ungroup()

# Merge the change type back to the original data
kari_cdr <- kari_cdr %>%
  left_join(kari_cdr_types %>% 
  select(ID, CDR_sequence), by = "ID")

increase0to1 <- kari_cdr %>% 
  filter(CDR_sequence == "0 -> 0.5 -> 1")

# Count the number of participants (ID) for each unique CDR_sequence
cdr_sequence_counts <- kari_cdr_types %>%
  count(CDR_sequence, trend) %>%
  arrange(desc(n)) 

## Try Alluvial plot
## Reformat data so that each participant has a CDR for first, max, and last timepoint
kari_cdr_FML <- kari_cdr %>%
  # Group by ID
  group_by(ID) %>%
  # Summarize to get first, max, and last CDR scores
  summarise(
    first_CDR = CDR[which.min(ClosestCDR)],  # First CDR score based on earliest Date
    max_CDR = max(CDR, na.rm = TRUE),     # Maximum CDR score
    last_CDR = CDR[which.max(ClosestCDR)],    # Last CDR score based on latest Date
    max_Cent = max(Centiloid, na.rm = TRUE),  # Maximum Centiloid value
    .groups = 'drop'                              # Drop the grouping
  )


## Set first_CDR, max_CDR, and last_CDR as factors with the levels in the order of 0.0, 0.5, 1.0, 2.0
kari_cdr_FML$first_CDR <- factor(kari_cdr_FML$first_CDR, levels = c(0, 0.5, 1, 2))
kari_cdr_FML$max_CDR <- factor(kari_cdr_FML$max_CDR, levels = c(0, 0.5, 1, 2))
kari_cdr_FML$last_CDR <- factor(kari_cdr_FML$last_CDR, levels = c(0, 0.5, 1, 2))
kari_cdr_FML$max_Cent <- factor(ifelse(kari_cdr_FML$max_Cent > 18.41, "A+", "A-"))

## Remove rows with the same CDR for first, max, and last timepoints
kari_cdr_FML_converters <- kari_cdr_FML %>%
 filter(first_CDR != max_CDR | max_CDR != last_CDR)

data <- kari_cdr_FML_converters %>% 
  group_by(ID)%>%
  gather(TimePoint, CDR, first_CDR:last_CDR) %>%
  mutate(TimePoint = as.factor(TimePoint), 
         CDR = factor(CDR, levels = c("0", "0.5", "1", "2")), ID = as.factor(ID), 
         TimePoint = str_replace_all(TimePoint, c("first_CDR"= "Initial CDR", "max_CDR"= "Highest CDR", "last_CDR"= "Final CDR")), 
         TimePoint = factor(TimePoint, levels = c("Initial CDR", "Highest CDR", "Final CDR")),)

# Try simplified version of the plot...
ggplot(data, aes(x = TimePoint, stratum = CDR, alluvium = ID)) +
  geom_alluvium(aes(fill = max_Cent), stat = "alluvium", aes.bind = "alluvia") +
  scale_fill_manual(name = "Amyloid Status", values = c("A+" = "#6F4070FF", "A-" = "#EDAD08FF"), na.translate = FALSE) +
  geom_stratum(alpha=0) +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 4) +
  labs(title = "Changes in CDR Scores Throughout the Study", y = "Number of Participants", x="") +
  theme(panel.border = element_blank(), axis.line = element_line(),
        plot.title = element_text(hjust = 0.5),  
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text = element_text(size = 12), axis.title = element_text(size=12), legend.position = "right")

## #0F8554FF, #1D6996FF, #CC503EFF, #5F4690FF

paletteer_d("rcartocolor::Prism")

kari_cdr_FML2 <- kari_cdr %>%
  # Group by ID
  group_by(ID) %>%
  # Summarize to get first, max, and last CDR scores
  summarise(
    first_CDR = CDR[which.min(ClosestCDR)],  # First CDR score based on earliest Date
    max_CDR = max(CDR, na.rm = TRUE),     # Maximum CDR score
    last_CDR = CDR[which.max(ClosestCDR)],    # Last CDR score based on latest Date
    max_Cent = max(Centiloid, na.rm = TRUE),  # Maximum Centiloid value
    .groups = 'drop'                              # Drop the grouping
  )


## Set first_CDR, max_CDR, and last_CDR as factors with the levels in the order of 0.0, 0.5, 1.0, 2.0
kari_cdr_FML2$first_CDR <- factor(kari_cdr_FML2$first_CDR, levels = c(2, 1, 0.5, 0))
kari_cdr_FML2$max_CDR <- factor(kari_cdr_FML2$max_CDR, levels = c(2, 1, 0.5, 0))
kari_cdr_FML2$last_CDR <- factor(kari_cdr_FML2$last_CDR, levels = c(2, 1, 0.5, 0))
kari_cdr_FML2$max_Cent <- factor(ifelse(kari_cdr_FML2$max_Cent > 18.4, "A+", "A-"))

## Remove rows with the same CDR for first, max, and last timepoints
kari_cdr_FML_converters2 <- kari_cdr_FML2 %>%
 filter(first_CDR != max_CDR | max_CDR != last_CDR)

## Attempt 2
data2 <- kari_cdr_FML_converters2 %>% 
  group_by(ID)%>%
  mutate(colorx = as.factor(max_Cent)) %>%
  gather(TimePoint, CDR, first_CDR:max_Cent) %>%
  mutate(TimePoint = as.factor(TimePoint), 
         CDR = factor(CDR, levels = c("2", "1", "0.5", "0", "A+", "A-")), ID = as.factor(ID), 
         TimePoint = str_replace_all(TimePoint, c("first_CDR"= "Initial CDR", "max_CDR"= "Highest CDR", "last_CDR"= "Final CDR", "max_Cent" = "Amyloid")), 
         TimePoint = factor(TimePoint, levels = c("Initial CDR", "Highest CDR", "Final CDR", "Amyloid")),)


ggplot(data2, aes(x = TimePoint, stratum = CDR, alluvium = ID)) +
  geom_alluvium(aes(fill = colorx), stat = "alluvium", aes.bind = "alluvia", alpha=.5) +
  geom_stratum(aes(fill = after_stat(stratum)), alpha=1) +
  scale_fill_manual(name = "Amyloid Status", values = c("A+" = "#6F4070FF", "A-" = "#EDAD08FF"), na.translate = FALSE) +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 4) +
  labs(title = "Changes in CDR Scores by Amyloid-PET Positivity", y = "Number of Participants", x="") +
  theme(panel.border = element_blank(), axis.line = element_line(),
        plot.title = element_text(hjust = 0.5),  
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent",colour = NA),
        axis.text = element_text(size = 12), axis.title = element_text(size=12), legend.position = "none")