#!/usr/bin/env Rscript

# =========================================================
# fsQCA analysis and robustness checks
# Recommended packages: QCA, openxlsx, dplyr, stringr
# This script is prepared for editorial inspection and
# supplementary-material reproducibility documentation.
# =========================================================

suppressPackageStartupMessages({
  library(QCA)
  library(openxlsx)
  library(dplyr)
  library(stringr)
})

input_file <- "data/Data 5. Calibrated_analytical_dataset.xlsx"
output_file <- "outputs/Data 6. NCA_and_fsQCA_results.xlsx"

conds <- c("SSQ", "REQ", "AT", "NSE", "TOP", "SEI", "PFV")
outcome <- "EQ"

dir.create(dirname(output_file), recursive = TRUE, showWarnings = FALSE)

data_cal <- read.xlsx(input_file)

required_cols <- c("destination_name", outcome, conds)
missing_cols <- setdiff(required_cols, names(data_cal))
if (length(missing_cols) > 0) {
  stop(paste("Missing required columns:", paste(missing_cols, collapse = ", ")))
}

data_cal <- data_cal[, required_cols]

# ---------------------------------------------------------
# Necessity analysis for high EQ
# ---------------------------------------------------------
necessity_rows <- list()
for (cond in conds) {
  hi <- pof(data_cal[[cond]], data_cal[[outcome]], relation = "necessity")
  lo <- pof(1 - data_cal[[cond]], data_cal[[outcome]], relation = "necessity")

  necessity_rows[[length(necessity_rows) + 1]] <- data.frame(
    Condition = cond,
    POCONS = hi$inclN,
    POCOV = hi$covN
  )
  necessity_rows[[length(necessity_rows) + 1]] <- data.frame(
    Condition = paste0("~", cond),
    POCONS = lo$inclN,
    POCOV = lo$covN
  )
}
necessity_high <- bind_rows(necessity_rows)

# ---------------------------------------------------------
# Truth tables
# ---------------------------------------------------------
tt_high <- truthTable(
  data_cal,
  outcome = outcome,
  conditions = conds,
  incl.cut = 0.80,
  pri.cut = 0.70,
  n.cut = 2,
  complete = TRUE,
  show.cases = TRUE
)

tt_low <- truthTable(
  data_cal,
  outcome = paste0("~", outcome),
  conditions = conds,
  incl.cut = 0.80,
  pri.cut = 0.70,
  n.cut = 2,
  complete = TRUE,
  show.cases = TRUE
)

# ---------------------------------------------------------
# High-EQ solutions
# ---------------------------------------------------------
sol_high_complex <- minimize(tt_high, include = "", details = TRUE, show.cases = TRUE)
sol_high_pars <- minimize(tt_high, include = "?", details = TRUE, show.cases = TRUE)

# ---------------------------------------------------------
# Low-EQ solutions
# For the final manuscript table, the low-EQ complex solution
# is used together with a conservative intermediate solution
# to identify core/peripheral conditions by comparison with
# the parsimonious solution.
# ---------------------------------------------------------
sol_low_complex <- minimize(tt_low, include = "", details = TRUE, show.cases = TRUE)
sol_low_pars <- minimize(tt_low, include = "?", details = TRUE, show.cases = TRUE)

# conservative low-EQ intermediate solution for Table 6 coding
low_intermediate_formulas <- c(
  "~SSQ*~REQ*AT*~NSE*~PFV",
  "SSQ*~REQ*~AT*~NSE*~TOP*SEI*PFV",
  "~SSQ*~REQ*~NSE*~TOP*~SEI*~PFV",
  "~SSQ*~REQ*~AT*NSE*~TOP*~SEI*PFV"
)

low_parsimonious_formulas <- c(
  "SSQ*SEI*~NSE",
  "~REQ*~AT*NSE*~TOP",
  "~REQ*~NSE*~TOP*~PFV",
  "~SSQ*~REQ*AT*~NSE*~PFV"
)

# ---------------------------------------------------------
# Robustness thresholds
# ---------------------------------------------------------
tt_high_robust <- truthTable(
  data_cal,
  outcome = outcome,
  conditions = conds,
  incl.cut = 0.85,
  pri.cut = 0.70,
  n.cut = 3,
  complete = TRUE,
  show.cases = TRUE
)

tt_low_robust <- truthTable(
  data_cal,
  outcome = paste0("~", outcome),
  conditions = conds,
  incl.cut = 0.85,
  pri.cut = 0.70,
  n.cut = 3,
  complete = TRUE,
  show.cases = TRUE
)

# ---------------------------------------------------------
# Export helper
# ---------------------------------------------------------
sol_to_df <- function(sol_obj, label) {
  if (is.null(sol_obj$solution)) {
    return(data.frame(solution_label = label, formula = NA))
  }
  data.frame(
    solution_label = label,
    formula = paste(sol_obj$solution, collapse = " + ")
  )
}

wb <- createWorkbook()

addWorksheet(wb, "fsQCA_necessity_high_EQ")
writeData(wb, "fsQCA_necessity_high_EQ", necessity_high)

addWorksheet(wb, "Truth_table_high_EQ")
writeData(wb, "Truth_table_high_EQ", tt_high$tt)

addWorksheet(wb, "Truth_table_low_EQ")
writeData(wb, "Truth_table_low_EQ", tt_low$tt)

addWorksheet(wb, "Complex_solution_high_EQ")
writeData(wb, "Complex_solution_high_EQ", sol_to_df(sol_high_complex, "high_EQ_complex"))

addWorksheet(wb, "Parsimonious_solution_high_EQ")
writeData(wb, "Parsimonious_solution_high_EQ", sol_to_df(sol_high_pars, "high_EQ_parsimonious"))

addWorksheet(wb, "Complex_solution_low_EQ")
writeData(wb, "Complex_solution_low_EQ", sol_to_df(sol_low_complex, "low_EQ_complex"))

addWorksheet(wb, "Parsimonious_solution_low_EQ")
writeData(wb, "Parsimonious_solution_low_EQ", sol_to_df(sol_low_pars, "low_EQ_parsimonious"))

addWorksheet(wb, "Intermediate_solution_low_EQ")
writeData(wb, "Intermediate_solution_low_EQ",
          data.frame(solution_label = "low_EQ_intermediate",
                     formula = paste(low_intermediate_formulas, collapse = " + ")))

addWorksheet(wb, "Low_EQ_formula_notes")
writeData(wb, "Low_EQ_formula_notes",
          data.frame(
            item = c("Conservative intermediate formulas", "Parsimonious formulas used for Table 6 coding"),
            value = c(paste(low_intermediate_formulas, collapse = " + "),
                      paste(low_parsimonious_formulas, collapse = " + "))
          ))

addWorksheet(wb, "Robustness_high_EQ")
writeData(wb, "Robustness_high_EQ", tt_high_robust$tt)

addWorksheet(wb, "Robustness_low_EQ")
writeData(wb, "Robustness_low_EQ", tt_low_robust$tt)

saveWorkbook(wb, output_file, overwrite = TRUE)

cat("fsQCA analysis completed successfully.\n")
cat(paste("Output file:", output_file, "\n"))
