---
title: "smart_nature_med"
output: html_document
---

### libraries
```{r}
library(gplots) 
library(pheatmap)
library(RColorBrewer)
library(tidyr) 
library(tidyverse)
library(readxl)
library(janitor)
library(stringr)
library(ggstatsplot)
library(palmerpenguins)
library(sjPlot)
library(interactions)
library(emmeans)
library(jtools)
library(car)
library(ggeffects)
library(lme4)
library(foreign)
library(haven)
require(gridExtra)
require(cowplot)
```

### theme
```{r}
theme_Publication <- function(base_size=14, base_family="Arial") {
      library(grid)
      library(ggthemes)
      (theme_foundation(base_size=base_size, base_family=base_family)
       + theme(plot.title = element_text(face = "bold",
                                         size = rel(1.2), hjust = 0.5),
               text = element_text(),
               panel.background = element_rect(colour = "white"),
               plot.background = element_rect(colour = "white"),
               panel.border = element_rect(colour = NA),
               axis.title = element_text(face = "bold",size = rel(1)),
               axis.title.y = element_text(angle=90,vjust =2),
               axis.title.x = element_text(vjust = -0.2),
               axis.text = element_text(), 
               axis.line = element_line(colour="black"),
               axis.ticks = element_line(),
               panel.grid.major = element_line(colour="#f0f0f0"),
               panel.grid.minor = element_blank(),
               legend.key = element_rect(colour = NA),
               legend.position = "bottom",
               legend.direction = "horizontal",
               legend.key.size= unit(0.2, "cm"),
               legend.margin = unit(0, "cm"),
               legend.title = element_text(face="italic"),
               plot.margin=unit(c(10,5,5,5),"mm"),
               strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
               strip.text = element_text(face="bold")
          ))
      
}

scale_fill_Publication <- function(...){
      library(scales)
      discrete_scale("fill","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)

}

scale_colour_Publication <- function(...){
      library(scales)
      discrete_scale("colour","Publication",manual_pal(values = c("#386cb0","#fdb462","#7fc97f","#ef3b2c","#662506","#a6cee3","#fb9a99","#984ea3","#ffff33")), ...)

}
```

### data
```{r}
clinical <- read_csv("~/Desktop/smart/paper/dfs/SMART_MADRS_SHAPS_Aug25.csv") %>% 
  clean_names() %>%
  mutate(id = as.factor(screening_visit_arm_1_1_record_id))

treatment <- read_csv("~/Desktop/smart/paper/dfs/trt_assignment_090425.csv") %>% 
  clean_names() %>%
  mutate(id = as.factor(id))

dose <- read_excel("~/Desktop/smart/paper/dfs/SMARTDataTx_9.4.25.xlsx") %>% 
  clean_names() %>%
  mutate(id = as.factor(smart_id))

treatment
dose
clinical

df1 <- left_join(clinical, treatment, "id")
pre_filtered <- left_join(df1, dose, "id")


pre_filtered <- pre_filtered %>%
  mutate(consistent_group = 
           (associated_medication == "BUPROPION" & bup == 1) |
           (associated_medication == "SERTRALINE" & ser == 1)) %>%
  mutate(
    both_vs_none = 0,  
    both_vs_none = case_when(
      bup == 1 & ser == 1 ~  1,
      bup == 0 & ser == 0 ~ -1,
      is.na(bup) & ser == 0 ~ -1,
      is.na(ser) & bup == 0 ~ -1,
      is.na(bup) & is.na(ser) ~ -1,
      TRUE ~ both_vs_none 
    ),
    both_vs_none_cat = factor(both_vs_none)
  ) %>% 
  mutate(received_tx = associated_medication == "BUPROPION") %>%
    mutate(
    consistent_4groups = case_when(
      associated_medication == "BUPROPION" & bup == 1 ~ 1,
      associated_medication == "BUPROPION" & bup == 0 ~ 2,
      associated_medication == "SERTRALINE" & ser == 1 ~ 3,
      associated_medication == "SERTRALINE" & ser == 0 ~ 4,
      TRUE ~ NA_real_ 
    ),
    consistent_4groups = factor(
      consistent_4groups,
      levels = 1:4,
      labels = c("BUP_correct", "BUP_inconsistent", "SER_correct", "SER_inconsistent")
    )
  )

smart <- pre_filtered %>% 
  filter(!is.na(madrs_w0)) %>% 
  filter(screening_visit_arm_1_1_record_id != "SMART034") %>% 
  filter(screening_visit_arm_1_1_record_id != "SMART063") %>% 
  filter(screening_visit_arm_1_1_record_id != "SMART089")

smart

smart <- smart %>%
  mutate(
    bup = if_else(is.na(bup), 0, bup),   # treat NA as 0
    ser = if_else(is.na(ser), 0, ser),   # treat NA as 0
    group_4 = case_when(
      bup == 1 & ser == 1 ~ "BUP+/SER+",
      bup == 1 & ser == 0 ~ "BUP+/SER-",
      bup == 0 & ser == 1 ~ "BUP-/SER+",
      bup == 0 & ser == 0 ~ "BUP-/SER-",
      TRUE ~ NA_character_  # fallback for any unexpected cases
    )
  )
```

```{r}
merged_data_tall <- smart %>%
  pivot_longer(
    cols = c(madrs_w0, madrs_w1, madrs_w2, madrs_w3, madrs_w4, madrs_w6, madrs_w8,
             shaps_w0, shaps_w1, shaps_w2, shaps_w3, shaps_w4, shaps_w6, shaps_w8),
    names_to = c(".value", "assessment"),   
    names_pattern = "(madrs|shaps)_w(\\d+)"
  ) %>%
  mutate(
    assessment = as.numeric(assessment)  
  )
```

```{r}
mdl1 <- lmer(madrs ~ age + sex + assessment + consistent_group + (assessment | id), data = merged_data_tall)
summary(mdl1)
confint(mdl1, method = "Wald")
anova(mdl1)
fixef(mdl1) 
coef(mdl1)

mdl2 <- lmer(madrs ~ age + sex + assessment * consistent_group + (assessment | id), data = merged_data_tall)

mdl3 <- lmer(madrs ~ age + sex + assessment + both_vs_none + (assessment | id), data = merged_data_tall)
mdl4 <- lmer(madrs ~ age + sex + assessment * both_vs_none + (assessment | id), data = merged_data_tall)

mdl5 <- lmer(madrs ~ age + sex + assessment * both_vs_none_cat + (assessment | id), data = merged_data_tall)

mdl6 <- lmer(madrs ~ age + sex + assessment + received_tx + (assessment | id), data = merged_data_tall)
mdl7 <- lmer(madrs ~ age + sex + assessment * received_tx + (assessment | id), data = merged_data_tall)

mdl8 <- lmer(madrs ~ age + sex + assessment + consistent_4groups + (assessment | id), data = merged_data_tall)
mdl9 <- lmer(madrs ~ age + sex + assessment * consistent_4groups + (assessment | id), data = merged_data_tall)

mdl10 <- lmer(shaps ~ age + sex + assessment + both_vs_none + (assessment | id), data = merged_data_tall)
mdl11 <- lmer(shaps ~ age + sex + assessment * both_vs_none + (assessment | id), data = merged_data_tall)

merged_data_tall$consistent_4groups
merged_data_tall$both_vs_none
```

```{r}
merged_data_tall <- merged_data_tall %>% 
  mutate(both_vs_none = as.factor(both_vs_none))

ids_with_madrs_0_and_1 <- merged_data_tall %>%
  filter(assessment %in% c(0, 1) & !is.na(madrs)) %>%
  group_by(id) %>%
  summarise(has_both = all(c(0, 1) %in% assessment), .groups = "drop") %>%
  filter(has_both) %>%
  pull(id)

merged_data_tall <- merged_data_tall %>%
  filter(id %in% ids_with_madrs_0_and_1)

mdl <- lmer(madrs ~ assessment + both_vs_none + age + sex + (1 | id),
            data = merged_data_tall)

plot_data <- merged_data_tall %>%
  group_by(both_vs_none, assessment) %>%
  summarise(
    mean_madrs = mean(madrs, na.rm = TRUE),
    se = 1.96 * sd(madrs, na.rm = TRUE) / sqrt(48),  # n = 48
    .groups = "drop"
  )

```


```{r}
# figure paper; marker
custom_colors <- c(
  "-1" = "#A6CEE3",  # light blue
  "0"  = "#1F78B4",  # medium blue
  "1"  = "#08306B"   # dark navy
)

offsets <- c("-1" = -0.1, "0" = 0, "1" = 0.1)
plot_data <- plot_data %>%
  mutate(assessment_offset = assessment + offsets[as.character(both_vs_none)])

ggplot(plot_data, aes(
  x = assessment_offset,
  y = mean_madrs,
  color = both_vs_none,
  group = both_vs_none
)) + 
  geom_line(linewidth = 1, alpha = 0.95) +
  geom_errorbar(
    aes(ymin = mean_madrs - se, ymax = mean_madrs + se),
    width = 0.08,
    linewidth = 0.7,
    alpha = 0.8
  ) +
  scale_color_manual(
    name = "Marker",
    values = custom_colors,
    labels = c("BUP- & SER-", "One marker", "BUP+ & SER+")
  ) +
  scale_x_continuous(
    breaks = unique(plot_data$assessment),
    labels = paste0("", unique(plot_data$assessment))
  ) +
  coord_cartesian(ylim = c(7.5, 30)) +
  labs(
    x = "Assessment",
    y = "MADRS Score",
    title = "Depression Trajectories Across Treatment"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

# ggsave("~/Downloads/smart_fig.png", height = 4, width = 6)
```


```{r}
merged_data_tall <- merged_data_tall %>% 
  mutate(consistent_group = as.factor(consistent_group))

ids_with_madrs_0_and_1 <- merged_data_tall %>%
  filter(assessment %in% c(0, 1) & !is.na(madrs)) %>%
  group_by(id) %>%
  summarise(has_both = all(c(0, 1) %in% assessment), .groups = "drop") %>%
  filter(has_both) %>%
  pull(id)

merged_data_tall <- merged_data_tall %>%
  filter(id %in% ids_with_madrs_0_and_1)

mdl <- lmer(madrs ~ assessment + consistent_group + (1 | id),
            data = merged_data_tall)

plot_data <- merged_data_tall %>%
  group_by(consistent_group, assessment) %>%
  summarise(
    mean_madrs = mean(madrs, na.rm = TRUE),
    se = 1.96 * sd(madrs, na.rm = TRUE) / sqrt(48),  # n = 48
    .groups = "drop"
  )
```

# consistent
```{r}
# figure paper; consistent
custom_colors <- c(
  "TRUE" = "#F4A582",  # soft coral
  "FALSE"  = "#CA0020"   # deep crimson
)

offsets <- c(
  "FALSE" = -0.05,
  "TRUE"  = 0.05
)

plot_data <- plot_data %>%
  mutate(
    consistent_group = as.character(consistent_group),
    assessment_offset = assessment + offsets[consistent_group]
  )

ggplot(plot_data, aes(
  x = assessment_offset,
  y = mean_madrs,
  color = consistent_group,
  group = consistent_group
)) + 
  geom_line(linewidth = 1, alpha = 0.95) +
  geom_errorbar(
    aes(ymin = mean_madrs - se, ymax = mean_madrs + se),
    width = 0.08,
    linewidth = 0.7,
    alpha = 0.8
  ) +
  scale_color_manual(
    name = "Group",
    values = custom_colors,
    labels = c("FALSE" = "Inconsistent", "TRUE" = "Consistent")
  ) +
  scale_x_continuous(
    breaks = unique(plot_data$assessment),
    labels = unique(plot_data$assessment)
  ) +
  labs(
    x = "Assessment",
    y = "MADRS Score",
    title = "Depression Trajectories Across Treatment"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

ggsave("~/Desktop/smart/paper/figs/treatment_traj.png", height = 4, width = 6)
```

# drug

```{r}
merged_data_tall <- merged_data_tall %>% 
  mutate(associated_medication = as.factor(associated_medication))

ids_with_madrs_0_and_1 <- merged_data_tall %>%
  filter(assessment %in% c(0, 1) & !is.na(madrs)) %>%
  group_by(id) %>%
  summarise(has_both = all(c(0, 1) %in% assessment), .groups = "drop") %>%
  filter(has_both) %>%
  pull(id)

merged_data_tall <- merged_data_tall %>%
  filter(id %in% ids_with_madrs_0_and_1)

mdl <- lmer(madrs ~ assessment + associated_medication + (1 | id),
            data = merged_data_tall)

plot_data <- merged_data_tall %>%
  group_by(associated_medication, assessment) %>%
  summarise(
    mean_madrs = mean(madrs, na.rm = TRUE),
    se = 1.96 * sd(madrs, na.rm = TRUE) / sqrt(48),  # n = 48
    .groups = "drop"
  )
```

```{r}
# figure paper; medication
merged_data_tall <- merged_data_tall %>% 
  mutate(associated_medication = factor(associated_medication,
                                        levels = c("SERTRALINE", "BUPROPION")))

ids_with_madrs_0_and_1 <- merged_data_tall %>%
  filter(assessment %in% c(0, 1) & !is.na(madrs)) %>%
  group_by(id) %>%
  summarise(has_both = all(c(0, 1) %in% assessment), .groups = "drop") %>%
  filter(has_both) %>%
  pull(id)

merged_data_tall <- merged_data_tall %>%
  filter(id %in% ids_with_madrs_0_and_1)

mdl <- lmer(madrs ~ assessment + associated_medication + age + sex + (1 | id),
            data = merged_data_tall)

plot_data <- merged_data_tall %>%
  group_by(associated_medication, assessment) %>%
  summarise(
    mean_madrs = mean(madrs, na.rm = TRUE),
    se = 1.96 * sd(madrs, na.rm = TRUE) / sqrt(48),  # fixed n = 48 for CI width
    .groups = "drop"
  )

custom_colors <- c(
  "SERTRALINE" = "palegreen3",
  "BUPROPION"  = "darkgreen"
)

offsets <- c("SERTRALINE" = -0.05, "BUPROPION" = 0.05)
plot_data <- plot_data %>%
  mutate(assessment_offset = assessment + offsets[as.character(associated_medication)])

ggplot(plot_data, aes(
  x = assessment_offset,
  y = mean_madrs,
  color = associated_medication,
  group = associated_medication
)) +
  geom_line(linewidth = 1.1, alpha = 0.95) +
  geom_errorbar(
    aes(ymin = mean_madrs - se, ymax = mean_madrs + se),
    width = 0.08,
    linewidth = 0.7,
    alpha = 0.8
  ) +
  scale_color_manual(
    name = "Medication",
    values = custom_colors,
    labels = c("Sertraline", "Bupropion")
  ) +
  scale_x_continuous(
    breaks = unique(plot_data$assessment),
    labels = paste0("T", unique(plot_data$assessment))
  ) +
  coord_cartesian(ylim = c(7.5, 30)) +
  labs(
    x = "Assessment",
    y = "MADRS Score",
    title = "Depression Trajectories by Medication Type"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

merged_data_tall %>% 
  summarise(n_unique_ids = n_distinct(id))

ggsave("~/Desktop/smart/paper/figs/treatment_drug.png", height = 4, width = 6)
```

### tables
```{r}
# table 1
# group
smart %>%
  count(group_4)

# sex
smart %>%
  group_by(group_4, sex) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(group_4) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(smart$group_4, smart$sex)
chisq.test(table_chi)

# age
smart %>%
  group_by(group_4) %>%
  summarise(
    mean = mean(age, na.rm = TRUE),
    sd   = sd(age, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

model <- aov(age ~ group_4, data = smart)
summary(model)

# race

# income

# marital status

# dose
smart <- smart %>%
  mutate(
    final_dose_received = str_remove(final_dose_received, " MG"),
    final_dose_num = as.numeric(final_dose_received)
  )

smart %>%
  group_by(associated_medication) %>%
  summarise(
    mean = mean(final_dose_num, na.rm = TRUE),
    sd   = sd(final_dose_num, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

# bup
smart %>%
  group_by(group_4, associated_medication) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(group_4) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(smart$group_4, smart$associated_medication)
chisq.test(table_chi)

# consistent
smart %>%
  group_by(group_4, consistent_group) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(group_4) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(smart$group_4, smart$consistent_group)
chisq.test(table_chi)
```

# marker variables
```{r}
randomization <- read_csv("~/Desktop/smart/paper/dfs/smart_randomization.csv") %>% 
  clean_names()

randomization

smart_markers <- left_join(smart, randomization, "id")

# shaps
smart_markers %>%
  group_by(group_4) %>%
  summarise(
    mean = mean(shaps_score, na.rm = TRUE),
    sd   = sd(shaps_score, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

model <- aov(shaps_score ~ group_4, data = smart_markers)
summary(model)

# rsfc
smart_markers %>%
  group_by(group_4) %>%
  summarise(
    mean = mean(rsrc_nacc_racc, na.rm = TRUE),
    sd   = sd(rsrc_nacc_racc, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

model <- aov(rsrc_nacc_racc ~ group_4, data = smart_markers)
summary(model)

# response bias 
smart_markers <- smart_markers %>% 
  mutate(response_bias_prt_sigdet = as.numeric(response_bias_prt_sigdet))

smart_markers %>%
  group_by(group_4) %>%
  summarise(
    mean = mean(response_bias_prt_sigdet, na.rm = TRUE),
    sd   = sd(response_bias_prt_sigdet, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

model <- aov(response_bias_prt_sigdet ~ group_4, data = smart_markers)
summary(model)

# reward sensitivity 
smart_markers <- smart_markers %>% 
  mutate(reward_sens_prt_matlab = as.numeric(reward_sens_prt_matlab))

smart_markers %>%
  group_by(group_4) %>%
  summarise(
    mean = mean(reward_sens_prt_matlab, na.rm = TRUE),
    sd   = sd(reward_sens_prt_matlab, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

model <- aov(reward_sens_prt_matlab ~ group_4, data = smart_markers)
summary(model)

# hrsd
smart_markers %>%
  group_by(group_4) %>%
  summarise(
    mean = mean(hrsd_total, na.rm = TRUE),
    sd   = sd(hrsd_total, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

model <- aov(hrsd_total ~ group_4, data = smart_markers)
summary(model)

# interference flanker
smart_markers <- smart_markers %>% 
  mutate(interference_flanker_flanker_acc = as.numeric(interference_flanker_flanker_acc))

smart_markers %>%
  group_by(group_4) %>%
  summarise(
    mean = mean(interference_flanker_flanker_acc, na.rm = TRUE),
    sd   = sd(interference_flanker_flanker_acc, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

model <- aov(interference_flanker_flanker_acc ~ group_4, data = smart_markers)
summary(model)

# employment
smart_markers %>%
  group_by(group_4, employment_binary_0_unemployed_1_employed) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(group_4) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(smart_markers$group_4, smart_markers$employment_binary_0_unemployed_1_employed)
chisq.test(table_chi)

# neuroticism
smart_markers %>%
  group_by(group_4) %>%
  summarise(
    mean = mean(neuroticism_score, na.rm = TRUE),
    sd   = sd(neuroticism_score, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

model <- aov(neuroticism_score ~ group_4, data = smart_markers)
summary(model)
```

## comparison to embarc
```{r}
embarc_ids <- read_csv("~/Desktop/smart/paper/dfs/ID_final.csv") %>% 
  clean_names() %>% 
  select(-x1)

embarc_paper <- read_sav("~/Desktop/smart/paper/dfs/EMBARC_BUP_paper_data_6August2019_forJASP_N87_Analysis_YA.sav") %>% 
  clean_names()

embarc_imputed <- read_csv("~/Desktop/smart/paper/dfs/EMBARC_imputed_data.csv") %>% 
  clean_names() %>% 
  mutate(id = as.factor(project_specific_id))

embarc_ids
embarc_paper
embarc_imputed

df1 <- left_join(embarc_ids, embarc_paper, "id")
embarc <- left_join(df1, embarc_imputed, "id")

embarc
```

```{r}
smart_markers <- smart_markers %>%
  mutate(
    associated_medication = case_when(
      associated_medication == "BUPROPION" ~ 1,
      associated_medication == "SERTRALINE" ~ 0,
      TRUE ~ NA_real_     
    )
  )

smart_comparison <- smart_markers %>% 
  select(id, sex, age, associated_medication, shaps_score) %>% 
  mutate(study = 1)

embarc <- embarc %>%
  mutate(bup_use = as.integer(bup_use))

embarc_comparison <- embarc %>% 
  select(id, sex = gendernum, age = age.x, associated_medication = bup_use, shaps_score = shaps_continuous) %>% 
  mutate(study = 0)

comparison <- bind_rows(smart_comparison, embarc_comparison)
```


```{r}
# embarc is 0; smart is 1
# sex; need to mutate so sexes are coded comparably
# smart; 1 male 0 female
# embarc; (1 male 2 female)
comparison %>%
  group_by(study, sex) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(study) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(comparison$study, comparison$sex)
chisq.test(table_chi)

# age
comparison %>%
  group_by(study) %>%
  summarise(
    mean = mean(age, na.rm = TRUE),
    sd   = sd(age, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(age ~ study, data = comparison, var.equal = FALSE)

# shaps
comparison %>%
  group_by(study) %>%
  summarise(
    mean = mean(shaps_score, na.rm = TRUE),
    sd   = sd(shaps_score, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(shaps_score ~ study, data = comparison, var.equal = FALSE)
comparison$shaps_score

# sex 
combined <- comparison %>%
  mutate(
    sex_unified = case_when(
      study == 0 & sex == -0.5 ~ 0,
      study == 0 & sex == 0.5  ~ 1,
      study == 1 ~ sex,         # keep original coding for study 1
      TRUE ~ NA_real_
    )
  )

combined %>%
  group_by(study, sex_unified) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(study) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(combined$study, combined$sex_unified)
chisq.test(table_chi)
```

```{r}
# making smart and embarc comparison dfs
smart_markers <- smart_markers %>% 
  mutate(response_bias_prt_sigdet = as.numeric(response_bias_prt_sigdet)) %>% 
  mutate(reward_sens_prt_matlab = as.numeric(reward_sens_prt_matlab)) %>% 
  mutate(interference_flanker_flanker_acc = as.numeric(interference_flanker_flanker_acc)) 

smart_across <- smart_markers %>% 
  select(id, rsfc = rsrc_nacc_racc, response_bias = response_bias_prt_sigdet, 
         reward_sens = reward_sens_prt_matlab, 
         dep_baseline = hrsd_total, flanker = interference_flanker_flanker_acc,
         employment = employment_binary_0_unemployed_1_employed,
         neo = neuroticism_score) %>% 
  mutate(study = 1)

embarc_across <- embarc %>% 
  select(id, rsfc = drug_xresp_bi_nac_cseed_r_ac_ceffect_p001fdr, response_bias = resp_bias_tot_sess1, 
         reward_sens = reward_sensitivity, dep_baseline = w0_hamd, flanker = flanker_acc, 
         employment = demo_employ_status, neo = neo2_score_ne) %>% 
  mutate(study = 0)

across <- bind_rows(smart_across, embarc_across)

# rsfc
across %>%
  group_by(study) %>%
  summarise(
    mean = mean(rsfc, na.rm = TRUE),
    sd   = sd(rsfc, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(rsfc ~ study, data = across, var.equal = FALSE)

# response_bias
across %>%
  group_by(study) %>%
  summarise(
    mean = mean(response_bias, na.rm = TRUE),
    sd   = sd(response_bias, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(response_bias ~ study, data = across, var.equal = FALSE)

# reward_sens
across %>%
  group_by(study) %>%
  summarise(
    mean = mean(reward_sens, na.rm = TRUE),
    sd   = sd(reward_sens, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(reward_sens ~ study, data = across, var.equal = FALSE)

# dep_baseline
across %>%
  group_by(study) %>%
  summarise(
    mean = mean(dep_baseline, na.rm = TRUE),
    sd   = sd(dep_baseline, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(dep_baseline ~ study, data = across, var.equal = FALSE)

# flanker
across %>%
  group_by(study) %>%
  summarise(
    mean = mean(flanker, na.rm = TRUE),
    sd   = sd(flanker, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(flanker ~ study, data = across, var.equal = FALSE)

# neo
across %>%
  group_by(study) %>%
  summarise(
    mean = mean(neo, na.rm = TRUE),
    sd   = sd(neo, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(neo ~ study, data = across, var.equal = FALSE)
```

```{r}
smart_across_2 <- df %>% 
  select(id, race, marital = demog_marital_latn_adapted, edu = demog_edu_latn_adapted) %>% 
  mutate(study = 1)

embarc_across_2 <- embarc %>% 
  select(id, race = racenum, marital = demo_maritial_status, edu = demo_educa_years) %>%
  mutate(study = 0)

across_2 <- bind_rows(smart_across_2, embarc_across_2)

# race
across_2 <- across_2 %>%
  mutate(
    race_new = case_when(
      study == 0 & race == 0.5 ~ 1,
      study == 0 & race != 0.5 ~ 0,
      study == 1 & race == 1   ~ 1,
      study == 1 & race != 1   ~ 0,
      TRUE ~ NA_real_  
    )
  )

across_2 %>%
  group_by(study, race_new) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(study) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(across_2$study, across_2$race_new)
chisq.test(table_chi)

# marital
across_2 <- across_2 %>%
  mutate(
    marital_new = case_when(
      study == 0 & marital == 0.5 ~ 1,
      study == 0 & marital != 0.5 ~ 0,
      study == 1 & marital == 1   ~ 1,
      study == 1 & marital != 1   ~ 0,
      TRUE ~ NA_real_  # optional, handles any other cases
    )
  )

across_2 %>%
  group_by(study, marital_new) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(study) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(across_2$study, across_2$marital_new)
chisq.test(table_chi)

# education
across_2 <- across_2 %>%
  mutate(
    edu_new = case_when(
      study == 0 & edu == 18 ~ 1,
      study == 0 & edu != 18 ~ 0,
      study == 1 & edu == 0  ~ 1,
      study == 1 & edu != 0  ~ 0,
      TRUE ~ NA_real_  # optional, handles other cases
    )
  )

across_2 %>%
  group_by(study, edu_new) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(study) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(across_2$study, across_2$edu_new)
chisq.test(table_chi)

# employment status
smart_across_3 <- smart_markers %>% 
  select(id, employ = employment_binary_0_unemployed_1_employed) %>% 
  mutate(study = 1)

embarc_across_3 <- embarc %>% 
  select(id, employ = demo_employ_status) %>%
  mutate(study = 0)

across_3 <- bind_rows(smart_across_3, embarc_across_3)

across_3 <- across_3 %>%
  mutate(
    employ_new = case_when(
      study == 0 & employ == 0.5 ~ 1,
      study == 0 & employ != 0.5 ~ 0,
      study == 1 ~ employ,
      TRUE ~ NA_real_
    )
  )

across_3 %>%
  group_by(study, employ_new) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(study) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(across_3$study, across_3$employ_new)
chisq.test(table_chi)
```

## response for 3 groups
```{r}
smart %>%
  mutate(both_vs_none_label =
           ifelse(both_vs_none == -1, "None", "Both")) %>% 
  group_by(both_vs_none_label) %>% 
  summarise(
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    percent = round(100 * mean(madrs_response, na.rm = TRUE), 1)
  )

custom_colors <- c(
  "BUP- & SER-" = "#A6CEE3",  # light blue
  "One marker"  = "#1F78B4",  # medium blue
  "BUP+ & SER+"  = "#08306B"   # dark navy
)

group_labels <- c(
  "-1" = "BUP- & SER-",
  "0"  = "One marker",
  "1"  = "BUP+ & SER+"
)

plot_df <- smart %>%
  mutate(both_vs_none = as.character(both_vs_none)) %>%
  group_by(both_vs_none) %>%
  summarise(
    percent = mean(madrs_response, na.rm = TRUE) * 100,
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(both_vs_none = factor(both_vs_none, levels = names(group_labels), labels = group_labels[names(group_labels) %in% both_vs_none]))

ggplot(plot_df, aes(x = both_vs_none, y = percent, fill = both_vs_none)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(round(percent,0), "%")), 
            vjust = -0.5, size = 5) +
  scale_fill_manual(values = custom_colors, name = "Marker") +
  labs(
    x = "Marker Group",
    y = "MADRS Response Rate",
    title = ""
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  ) +
  coord_cartesian(ylim = c(0, 80)) +
  theme(
  legend.position = "none",
  axis.ticks.x = element_blank()
) +
  labs(x = "")

ggsave("~/Desktop/smart/paper/figs/smart_response.png", height = 4, width = 6)
```

```{r}
# figure paper; medication
merged_data_tall <- merged_data_tall %>% 
  mutate(associated_medication = factor(associated_medication,
                                        levels = c("SERTRALINE", "BUPROPION")))

ids_with_madrs_0_and_1 <- merged_data_tall %>%
  filter(assessment %in% c(0, 1) & !is.na(madrs)) %>%
  group_by(id) %>%
  summarise(has_both = all(c(0, 1) %in% assessment), .groups = "drop") %>%
  filter(has_both) %>%
  pull(id)

merged_data_tall <- merged_data_tall %>%
  filter(id %in% ids_with_madrs_0_and_1)

mdl <- lmer(madrs ~ assessment + associated_medication + age + sex + (1 | id),
            data = merged_data_tall)

plot_data <- merged_data_tall %>%
  group_by(associated_medication, assessment) %>%
  summarise(
    mean_madrs = mean(madrs, na.rm = TRUE),
    se = 1.96 * sd(madrs, na.rm = TRUE) / sqrt(48),  # fixed n = 48 for CI width
    .groups = "drop"
  )

custom_colors <- c(
  "SERTRALINE" = "palegreen3",
  "BUPROPION"  = "darkgreen"
)

offsets <- c("SERTRALINE" = -0.05, "BUPROPION" = 0.05)
plot_data <- plot_data %>%
  mutate(assessment_offset = assessment + offsets[as.character(associated_medication)])

ggplot(plot_data, aes(
  x = assessment_offset,
  y = mean_madrs,
  color = associated_medication,
  group = associated_medication
)) +
  geom_line(linewidth = 1.1, alpha = 0.95) +
  geom_errorbar(
    aes(ymin = mean_madrs - se, ymax = mean_madrs + se),
    width = 0.08,
    linewidth = 0.7,
    alpha = 0.8
  ) +
  scale_color_manual(
    name = "Medication",
    values = custom_colors,
    labels = c("Sertraline", "Bupropion")
  ) +
  scale_x_continuous(
    breaks = unique(plot_data$assessment),
    labels = paste0("T", unique(plot_data$assessment))
  ) +
  coord_cartesian(ylim = c(7.5, 30)) +
  labs(
    x = "Assessment",
    y = "MADRS Score",
    title = "Depression Trajectories by Medication Type"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

merged_data_tall %>% 
  summarise(n_unique_ids = n_distinct(id))
```


## response for medication
```{r}
custom_colors <- c(
  "SERTRALINE" = "palegreen3",
  "BUPROPION"  = "darkgreen"
)

group_labels <- c(
  "SERTRALINE" = "Sertraline",
  "BUPROPION"  = "Bupropion"
)

plot_df <- smart %>%
  filter(associated_medication %in% c("SERTRALINE", "BUPROPION")) %>%
  group_by(associated_medication) %>%
  summarise(
    percent = mean(madrs_response, na.rm = TRUE) * 100,
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    associated_medication = factor(associated_medication,
                                   levels = names(group_labels),
                                   labels = group_labels)
  )

ggplot(plot_df, aes(x = associated_medication, y = percent, fill = associated_medication)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(round(percent, 0), "%")), vjust = -0.5, size = 5) +
  scale_fill_manual(values = c("Sertraline" = "palegreen3", "Bupropion" = "darkgreen")) +
  labs(x = NULL, y = "MADRS Response Rate") +
  coord_cartesian(ylim = c(0, 80)) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    #axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    legend.position = "none",
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

ggsave("~/Desktop/smart/paper/figs/smart_response_med.png", height = 4, width = 6)
```


## response for consistent marker
```{r}
custom_colors <- c(
  "Consistent" = "#F4A582",   
  "Inconsistent" = "#CA0020"   
)

group_labels <- c(
  "FALSE" = "Inconsistent",
  "TRUE" = "Consistent"
)

plot_df <- smart %>%
  filter(consistent_group %in% c(TRUE, FALSE)) %>%
  group_by(consistent_group) %>%
  summarise(
    percent = mean(madrs_response, na.rm = TRUE) * 100,
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    consistent_group = factor(as.character(consistent_group),
                              levels = names(group_labels),
                              labels = group_labels)
  )

ggplot(plot_df, aes(x = consistent_group, y = percent, fill = consistent_group)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(round(percent, 0), "%")), vjust = -0.5, size = 5) +
  scale_fill_manual(values = custom_colors) +
  labs(x = NULL, y = "MADRS Response Rate") +
  coord_cartesian(ylim = c(0, 80)) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    axis.ticks.x = element_blank(),
    legend.position = "none",
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

ggsave("~/Desktop/smart/paper/figs/smart_marker_consistent.png", height = 4, width = 6)
```

## attrition
```{r}
clinical <- clinical %>%
  mutate(attrition = case_when(
    is.na(madrs_w0) ~ 1,  # missing baseline MADRS = attrited
    screening_visit_arm_1_1_record_id %in% c("SMART034", "SMART063", "SMART089") ~ 1,  # specific IDs = attrited
    TRUE ~ 0  # everyone else stayed
  ))

clinical %>% 
  group_by(attrition) %>%
  summarise(n = n())

# sex
clinical %>%
  group_by(attrition, sex) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(attrition) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(clinical$attrition, clinical$sex)
chisq.test(table_chi)

# age
clinical %>%
  group_by(attrition) %>%
  summarise(
    mean = mean(age, na.rm = TRUE),
    sd   = sd(age, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(age ~ attrition, data = clinical, var.equal = FALSE)

# shaps screener
clinical %>%
  group_by(attrition) %>%
  summarise(
    mean = mean(screening_visit_arm_1_1_shaps_sum, na.rm = TRUE),
    sd   = sd(screening_visit_arm_1_1_shaps_sum, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

t.test(screening_visit_arm_1_1_shaps_sum ~ attrition, data = clinical, var.equal = FALSE)

clinical %>%
  group_by(attrition) %>%
  summarise(
    mean = mean(screening_visit_arm_1_1_shaps_sum, na.rm = TRUE),
    sd   = sd(screening_visit_arm_1_1_shaps_sum, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )

clinical

clinical %>%
  group_by(attrition) %>%
  summarise(
    mean = mean(screening_visit_arm_1_1_shaps_sum, na.rm = TRUE),
    sd   = sd(screening_visit_arm_1_1_shaps_sum, na.rm = TRUE)
  ) %>%
  pivot_longer(
    cols = c(mean, sd),
    names_to = "stat",
    values_to = "value"
  )
```

```{r}
merged_data_tall <- merged_data_tall %>% 
  mutate(
    aligned4 = case_when(

      # ---- BUPROPION ----
      associated_medication == "BUPROPION" & group_4 %in% c("bup+ser+", "bup+ser-") ~ "aligned_bup",
      associated_medication == "BUPROPION" & group_4 %in% c("bup-ser+", "bup-ser-") ~ "not_aligned_bup",

      # ---- SERTRALINE ----
      associated_medication == "SERTRALINE" & group_4 %in% c("bup+ser+", "bup-ser+") ~ "aligned_ser",
      associated_medication == "SERTRALINE" & group_4 %in% c("bup+ser-", "bup-ser-") ~ "not_aligned_ser",

      TRUE ~ NA_character_
    )
  ) %>%
  mutate(
    aligned4 = factor(
      aligned4,
      levels = c("aligned_bup", "aligned_ser", "not_aligned_bup", "not_aligned_ser")
    )
  )

```

```{r}
smart %>% 
  count(associated_medication)

treatment %>% 
  count(ser)

treatment %>% 
  count(bup)
```

## more demo
```{r}
more_demo <- read_csv("~/Desktop/smart/paper/dfs/smart_demographics.csv") %>% 
  clean_names() %>%
  mutate(id = as.factor(record_id))

table_1_cont <- left_join(smart, more_demo, "id")

more_demo$race
# 
df <- table_1_cont %>%
  rowwise() %>%
  mutate(
    race = case_when(
      demog_race_specify_latn_adapted_1b2e4a == 1 ~ 10,
      demog_race_latn_adapted_f9f59a_1 == 1 ~ 1,
      demog_race_latn_adapted_f9f59a_2 == 1 ~ 2,
      demog_race_latn_adapted_f9f59a_3 == 1 ~ 3,
      demog_race_latn_adapted_f9f59a_4 == 1 ~ 4,
      demog_race_latn_adapted_f9f59a_5 == 1 ~ 5,
      demog_race_latn_adapted_f9f59a_6 == 1 ~ 6,
      demog_race_latn_adapted_f9f59a_7 == 1 ~ 7,
      demog_race_latn_adapted_f9f59a_8 == 1 ~ 8,
      demog_race_latn_adapted_f9f59a_9 == 1 ~ 9,
      demog_race_asian_latn_adapted_a15d44_1 == 1 ~ 1,
      demog_race_asian_latn_adapted_a15d44_2 == 1 ~ 2,
      demog_race_asian_latn_adapted_a15d44_3 == 1 ~ 3,
      demog_race_asian_latn_adapted_a15d44_4 == 1 ~ 4,
      demog_race_asian_latn_adapted_a15d44_5 == 1 ~ 5,
      demog_race_asian_latn_adapted_a15d44_6 == 1 ~ 6,
      demog_race_asian_latn_adapted_a15d44_7 == 1 ~ 7,
      demog_race_pac_island_latn_adapted_47e53e_1 == 1 ~ 1,
      demog_race_pac_island_latn_adapted_47e53e_2 == 1 ~ 2,
      demog_race_pac_island_latn_adapted_47e53e_3 == 1 ~ 3,
      demog_race_pac_island_latn_adapted_47e53e_4 == 1 ~ 4,
      TRUE ~ NA_integer_
    )
  ) %>%
  ungroup()

df %>%
  group_by(group_4, race) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(group_4) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(df$group_4, df$race)
chisq.test(table_chi)

# marital status
table_1_cont %>%
  group_by(group_4, demog_marital_latn_adapted) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(group_4) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(table_1_cont$group_4, table_1_cont$demog_marital_latn_adapted)
chisq.test(table_chi)

# education
table_1_cont %>%
  group_by(group_4, demog_edu_latn_adapted) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(group_4) %>%
  mutate(
    percent = n / sum(n) * 100
  ) %>%
  ungroup()

table_chi <- table(table_1_cont$group_4, table_1_cont$demog_edu_latn_adapted)
chisq.test(table_chi)

more_demo$demog_edu_latn_adapted

table_1_cont %>% 
  filter(is.na(demog_edu_latn_adapted)) %>% 
  select(screening_visit_arm_1_1_record_id)

df %>% 
  filter(!is.na(race)) %>% 
  select(screening_visit_arm_1_1_record_id)
```

### merging figures
```{r}
# figure paper; marker
custom_colors <- c(
  "-1" = "#A6CEE3",  # light blue
  "0"  = "#1F78B4",  # medium blue
  "1"  = "#08306B"   # dark navy
)

offsets <- c("-1" = -0.1, "0" = 0, "1" = 0.1)
plot_data <- plot_data %>%
  mutate(assessment_offset = assessment + offsets[as.character(both_vs_none)])

fig1 <- ggplot(plot_data, aes(
  x = assessment_offset,
  y = mean_madrs,
  color = both_vs_none,
  group = both_vs_none
)) + 
  geom_line(linewidth = 1, alpha = 0.95) +
  geom_errorbar(
    aes(ymin = mean_madrs - se, ymax = mean_madrs + se),
    width = 0.08,
    linewidth = 0.7,
    alpha = 0.8
  ) +
  scale_color_manual(
    name = "Marker",
    values = custom_colors,
    labels = c("BUP- & SER-", "One marker", "BUP+ & SER+")
  ) +
  scale_x_continuous(
    breaks = unique(plot_data$assessment),
    labels = paste0("", unique(plot_data$assessment))
  ) +
  coord_cartesian(ylim = c(7.5, 30)) +
  labs(
    x = "Assessment",
    y = "MADRS Score",
    title = ""
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  ) +
  annotate("text", x = 7, y = 10, label = "*", size = 6) +
  annotate("text", x = 7, y = 13.5, label = "#", size = 4)

fig1

# ggsave("~/Downloads/smart_fig.png", height = 4, width = 6)

smart %>%
  mutate(both_vs_none_label =
           ifelse(both_vs_none == -1, "None", "Both")) %>% 
  group_by(both_vs_none_label) %>% 
  summarise(
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    percent = round(100 * mean(madrs_response, na.rm = TRUE), 1)
  )

custom_colors <- c(
  "BUP-/SER-" = "#A6CEE3",  # light blue
  "One marker"  = "#1F78B4",  # medium blue
  "BUP+/SER+"  = "#08306B"   # dark navy
)

group_labels <- c(
  "-1" = "BUP-/SER-",
  "0"  = "One marker",
  "1"  = "BUP+/SER+"
)

# Prepare plot data
plot_df <- smart %>%
  mutate(both_vs_none = as.character(both_vs_none)) %>%
  group_by(both_vs_none) %>%
  summarise(
    percent = mean(madrs_response, na.rm = TRUE) * 100,
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(both_vs_none = factor(both_vs_none, levels = names(group_labels), labels = group_labels[names(group_labels) %in% both_vs_none]))

fig2 <- ggplot(plot_df, aes(x = both_vs_none, y = percent, fill = both_vs_none)) +
  geom_col(width = 0.85) +
  geom_text(aes(label = paste0(round(percent,0), "%")), 
            vjust = -0.5, size = 3) +
  scale_fill_manual(values = custom_colors, name = "Marker") +
  labs(
    x = "Marker Group",
    y = "MADRS Response Rate",
    title = ""
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  ) +
  coord_cartesian(ylim = c(0, 80)) +
  theme(
  legend.position = "none",
  axis.ticks.x = element_blank()
) +
  labs(x = "")

grid.arrange(fig1, fig2, ncol=2)

g <- arrangeGrob(fig1, fig2, ncol=2) #generates g
p3 <- g + theme(axis.text.y=element_blank(), axis.title.y=element_blank())
g=plot_grid(fig1, fig2, align = "v", ncol = 2, rel_heights = c(10/10, 10/10))

ggsave("~/Downloads/test.png", height = 4, width = 8)
```

```{r}
base_font <- 14  
axis_title_size <- 14
axis_text_size <- 10
legend_text_size <- 10
plot_title_size <- 14

fig1 <- fig1 +
  theme(
    axis.title = element_text(size = axis_title_size, face = "bold"),
    axis.text = element_text(size = axis_text_size),
    legend.text = element_text(size = legend_text_size),
    legend.position = "top",
    plot.title = element_text(size = plot_title_size),
    plot.margin = margin(5, 10, 5, 10)
  ) 

fig2 <- fig2 +
  theme(
    axis.title = element_text(size = axis_title_size, face = "bold"),
    axis.text = element_text(size = axis_text_size),
    legend.text = element_text(size = legend_text_size),
    plot.title = element_text(size = plot_title_size),
    plot.margin = margin(5, 10, 5, 10)
  )

combined <- plot_grid(
  fig1, fig2,
  ncol = 2,
  align = "h",
  rel_widths = c(1, 1)  
)

# Display
combined

combined <- plot_grid(
  fig1, fig2,
  ncol = 2,
  align = "h",
  rel_widths = c(1.5, 1),
  labels = c("A", "B"),    
  label_size = 16,
  label_fontfamily = "Helvetica",
  label_fontface = "bold"
)

# Display
combined

ggsave("~/Downloads/test.png", combined, height = 4, width = 9) 
```


```{r}
custom_colors <- c(
  "TRUE" = "#F4A582",  
  "FALSE"  = "#CA0020"  
)

offsets <- c(
  "FALSE" = -0.05,
  "TRUE"  = 0.05
)

plot_data <- plot_data %>%
  mutate(
    consistent_group = as.character(consistent_group),
    assessment_offset = assessment + offsets[consistent_group]
  )

fig1 <- 
ggplot(plot_data, aes(
  x = assessment_offset,
  y = mean_madrs,
  color = consistent_group,
  group = consistent_group
)) + 
  geom_line(linewidth = 1, alpha = 0.95) +
  geom_errorbar(
    aes(ymin = mean_madrs - se, ymax = mean_madrs + se),
    width = 0.08,
    linewidth = 0.7,
    alpha = 0.8
  ) +
  scale_color_manual(
    name = "Group",
    values = custom_colors,
    labels = c("FALSE" = "Inconsistent", "TRUE" = "Consistent")
  ) +
  scale_x_continuous(
    breaks = unique(plot_data$assessment),
    labels = unique(plot_data$assessment)
  ) +
  labs(
    x = "Assessment",
    y = "MADRS Score",
    title = ""
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "top",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

group_labels <- c("FALSE" = "Inconsistent", "TRUE" = "Consistent")

plot_df <- smart %>%
  filter(consistent_group %in% c(TRUE, FALSE)) %>%
  group_by(consistent_group) %>%
  summarise(
    percent = mean(madrs_response, na.rm = TRUE) * 100,
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    consistent_group = factor(as.character(consistent_group),
                              levels = names(group_labels),
                              labels = group_labels)
  )

custom_colors <- c(
  "Consistent" = "#F4A582", 
  "Inconsistent" = "#CA0020"   
)

group_labels <- c(
  "FALSE" = "Inconsistent",
  "TRUE" = "Consistent"
)

plot_df <- smart %>%
  filter(consistent_group %in% c(TRUE, FALSE)) %>%
  group_by(consistent_group) %>%
  summarise(
    percent = mean(madrs_response, na.rm = TRUE) * 100,
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    consistent_group = factor(as.character(consistent_group),
                              levels = names(group_labels),
                              labels = group_labels)
  )

fig2 <- ggplot(plot_df, aes(x = consistent_group, y = percent, fill = consistent_group)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(round(percent, 0), "%")), vjust = -0.5, size = 4) +
  scale_fill_manual(values = custom_colors) +
  labs(x = NULL, y = "MADRS Response Rate") +
  coord_cartesian(ylim = c(0, 80)) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    axis.ticks.x = element_blank(),
    legend.position = "",
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

combined <- plot_grid(
  fig1, fig2,
  ncol = 2,
  align = "h",
  rel_widths = c(1.5, 1),
  labels = c("A", "B"),
  label_size = 16,
  label_fontfamily = "Helvetica",
  label_fontface = "bold"
)

combined

ggsave("~/Downloads/consistent_group_combined.png", combined, height = 4, width = 9)
```

```{r}
# figure paper; medication
merged_data_tall <- merged_data_tall %>% 
  mutate(associated_medication = factor(associated_medication,
                                        levels = c("SERTRALINE", "BUPROPION")))

ids_with_madrs_0_and_1 <- merged_data_tall %>%
  filter(assessment %in% c(0, 1) & !is.na(madrs)) %>%
  group_by(id) %>%
  summarise(has_both = all(c(0, 1) %in% assessment), .groups = "drop") %>%
  filter(has_both) %>%
  pull(id)

merged_data_tall <- merged_data_tall %>%
  filter(id %in% ids_with_madrs_0_and_1)

mdl <- lmer(madrs ~ assessment + associated_medication + age + sex + (1 | id),
            data = merged_data_tall)

plot_data <- merged_data_tall %>%
  group_by(associated_medication, assessment) %>%
  summarise(
    mean_madrs = mean(madrs, na.rm = TRUE),
    se = 1.96 * sd(madrs, na.rm = TRUE) / sqrt(48),  # fixed n = 48 for CI width
    .groups = "drop"
  )

custom_colors <- c(
  "SERTRALINE" = "palegreen3",
  "BUPROPION"  = "darkgreen"
)

offsets <- c("SERTRALINE" = -0.05, "BUPROPION" = 0.05)
plot_data <- plot_data %>%
  mutate(assessment_offset = assessment + offsets[as.character(associated_medication)])

fig1 <- ggplot(plot_data, aes(
  x = assessment_offset,
  y = mean_madrs,
  color = associated_medication,
  group = associated_medication
)) +
  geom_line(linewidth = 1.1, alpha = 0.95) +
  geom_errorbar(
    aes(ymin = mean_madrs - se, ymax = mean_madrs + se),
    width = 0.08,
    linewidth = 0.7,
    alpha = 0.8
  ) +
  scale_color_manual(
    name = "Medication",
    values = custom_colors,
    labels = c("Sertraline", "Bupropion")
  ) +
  scale_x_continuous(
    breaks = unique(plot_data$assessment),
    labels = paste0("T", unique(plot_data$assessment))
  ) +
  coord_cartesian(ylim = c(7.5, 30)) +
  labs(
    x = "Assessment",
    y = "MADRS Score",
    title = ""
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "top",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

merged_data_tall %>% 
  summarise(n_unique_ids = n_distinct(id))

ggsave("~/Desktop/smart/paper/figs/treatment_drug.png", height = 4, width = 6)

custom_colors <- c(
  "SERTRALINE" = "palegreen3",
  "BUPROPION"  = "darkgreen"
)

group_labels <- c(
  "SERTRALINE" = "Sertraline",
  "BUPROPION"  = "Bupropion"
)

plot_df <- smart %>%
  filter(associated_medication %in% c("SERTRALINE", "BUPROPION")) %>%
  group_by(associated_medication) %>%
  summarise(
    percent = mean(madrs_response, na.rm = TRUE) * 100,
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    associated_medication = factor(associated_medication,
                                   levels = names(group_labels),
                                   labels = group_labels)
  )

fig2 <- ggplot(plot_df, aes(x = associated_medication, y = percent, fill = associated_medication)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(round(percent, 0), "%")), vjust = -0.5, size = 4) +
  scale_fill_manual(values = c("Sertraline" = "palegreen3", "Bupropion" = "darkgreen")) +
  labs(x = NULL, y = "MADRS Response Rate") +
  coord_cartesian(ylim = c(0, 80)) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    #axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    legend.position = "none",
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  )

combined <- plot_grid(
  fig1, fig2,
  ncol = 2,
  align = "h",
  rel_widths = c(1.5, 1),
  labels = c("A", "B"),
  label_size = 16,
  label_fontfamily = "Helvetica",
  label_fontface = "bold"
)

combined

ggsave("~/Downloads/medication_group_combined.png", combined, height = 4, width = 9)
```

```{r}
smart <- smart %>%
  mutate(
    bup = if_else(is.na(bup), 0, bup),
    ser = if_else(is.na(ser), 0, ser),
    
    group_4 = case_when(
      bup == 1 & ser == 1 ~ "BUP+/SER+",
      bup == 1 & ser == 0 ~ "BUP+/SER-",
      bup == 0 & ser == 1 ~ "BUP-/SER+",
      bup == 0 & ser == 0 ~ "BUP-/SER-",
      TRUE ~ NA_character_
    ),
    
    bup_consistency = case_when(
      bup == 1 & associated_medication == "BUPROPION" ~ "consistent",
      bup == 0 & associated_medication == "BUPROPION" ~ "inconsistent",
      TRUE ~ NA_character_
    ),
    
    ser_consistency = case_when(
      ser == 1 & associated_medication == "SERTRALINE" ~ "consistent",
      ser == 0 & associated_medication == "SERTRALINE" ~ "inconsistent",
      TRUE ~ NA_character_
    )
  )

```


```{r}
# figure paper; 2x2 grouping 
merged_data_tall <- merged_data_tall %>% 
  mutate(group_4 = factor(
    group_4,
    levels = c("BUP+/SER+", "BUP+/SER-", "BUP-/SER+", "BUP-/SER-")
  ))

ids_with_madrs_0_and_1 <- merged_data_tall %>%
  filter(assessment %in% c(0, 1) & !is.na(madrs)) %>%
  group_by(id) %>%
  summarise(has_both = all(c(0, 1) %in% assessment), .groups = "drop") %>%
  filter(has_both) %>%
  pull(id)

merged_data_tall <- merged_data_tall %>%
  filter(id %in% ids_with_madrs_0_and_1)

mdl <- lmer(
  madrs ~ assessment + group_4 + age + sex + (1 | id),
  data = merged_data_tall
)

plot_data <- merged_data_tall %>%
  group_by(group_4, assessment) %>%
  summarise(
    mean_madrs = mean(madrs, na.rm = TRUE),
    se = 1.96 * sd(madrs, na.rm = TRUE) / sqrt(48),
    .groups = "drop"
  )

custom_colors <- c(
 "BUP-/SER-" = "#A6CEE3",   # light blue
  "BUP-/SER+" = "#B2DF8A",   # light green
  "BUP+/SER-" = "#1F78B4",   # medium blue
  "BUP+/SER+" = "#08306B"    # dark navy
)

offsets <- c(
  "BUP+/SER+" = -0.06,
  "BUP+/SER-" = -0.02,
  "BUP-/SER+" =  0.02,
  "BUP-/SER-" =  0.06
)

plot_data <- plot_data %>%
  mutate(assessment_offset = assessment + offsets[as.character(group_4)])

fig1 <- ggplot(plot_data, aes(
  x = assessment_offset,
  y = mean_madrs,
  color = group_4,
  group = group_4
)) +
  geom_line(linewidth = 1.1, alpha = 0.95) +
  geom_errorbar(
    aes(ymin = mean_madrs - se, ymax = mean_madrs + se),
    width = 0.08,
    linewidth = 0.7,
    alpha = 0.8
  ) +
  scale_color_manual(
    name = "Medication Group",
    values = custom_colors) +
  scale_x_continuous(
    breaks = unique(plot_data$assessment),
    labels = paste0("T", unique(plot_data$assessment))
  ) +
  coord_cartesian(ylim = c(7.5, 30)) +
  labs(
    x = "Assessment",
    y = "MADRS Score",
    title = ""
  ) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 14, color = "black"),
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    legend.position = "top",
    legend.key.height = unit(0.4, "cm"),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5, margin = margin(b = 10)),
    plot.margin = margin(10, 20, 10, 20)
  ) +
    coord_cartesian(ylim = c(6, 31))

plot_df <- smart %>%
  filter(!is.na(group_4)) %>%  # keep only valid groups
  group_by(group_4) %>%
  summarise(
    n = n(),
    responders = sum(madrs_response == 1, na.rm = TRUE),
    percent = round(100 * mean(madrs_response, na.rm = TRUE), 1),
    .groups = "drop"
  )

custom_colors <- c(
  "BUP-/SER-" = "#A6CEE3",   # light blue
  "BUP-/SER+" = "#B2DF8A",   # light green
  "BUP+/SER-" = "#1F78B4",   # medium blue
  "BUP+/SER+" = "#08306B"    # dark navy
)

group_labels <- c(
  "BUP-/SER-" = "BUP-/SER-",
  "BUP-/SER+" = "BUP-/SER+",
  "BUP+/SER-" = "BUP+/SER-",
  "BUP+/SER+" = "BUP+/SER+"
)

plot_df <- plot_df %>%
  mutate(group_4 = factor(group_4, levels = names(group_labels), labels = group_labels))

fig2 <- ggplot(plot_df, aes(x = group_4, y = percent, fill = group_4)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = paste0(round(percent, 0), "%")), vjust = -0.5, size = 4) +
  scale_fill_manual(values = custom_colors) +
  labs(x = NULL, y = "MADRS Response Rate", title = "") +
  coord_cartesian(ylim = c(0, 100)) +
  theme_minimal(base_size = 14) +
  theme(
    text = element_text(family = "Helvetica", color = "black"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.line = element_line(color = "black", linewidth = 0.6),
    axis.ticks = element_line(color = "black", linewidth = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 8_, color = "black"),
    axis.ticks.x = element_blank(),
    legend.position = "none",
    plot.margin = margin(10, 20, 10, 20)
  )

combined <- plot_grid(
  fig1, fig2,
  ncol = 2,
  align = "h",
  rel_widths = c(1.5, 1),
  labels = c("A", "B"),
  label_size = 16,
  label_fontfamily = "Helvetica",
  label_fontface = "bold"
)

combined

ggsave("~/Downloads/2x2_combined.png", combined, height = 4, width = 9)
```