####Model A

# install.packages(c("readxl","dplyr","stringr"))   
library(readxl)
library(dplyr)
library(stringr)

# 1) Path to Excel file
file_path <- "SI04.xlsx"

# 2) Read the entire “Model A (only boundaries)” sheet without headers
raw <- read_excel(
  path      = file_path,
  sheet     = "Model A (only boundaries)",
  col_names = FALSE
)

# 3) Find the header row where column A == "Order"
hdr <- which(raw[[1]] == "Order")
if (length(hdr) != 1) stop("Could not locate a unique 'Order' row in column A.")

# 4) Extract event labels (columns B → end) from that header row
event_labels <- raw[hdr, -1] %>% 
  unlist() %>% 
  as.character() %>% 
  str_trim()

# 5) Pull the block of probabilities below that header
data_block <- raw[(hdr + 1):nrow(raw), -1]

# 6) Pull row labels (column A of those same rows)
row_labels <- raw[(hdr + 1):nrow(raw), 1] %>% 
  unlist() %>% 
  as.character() %>% 
  str_trim()

# 7) Coerce the data block to a numeric matrix
Pmat_numeric <- data_block %>%
  mutate_all(as.numeric) %>%  # convert every column to numeric
  as.matrix()

# 8) Assign row‐ and column‐names
rownames(Pmat_numeric) <- row_labels
colnames(Pmat_numeric) <- event_labels

# 9) Specify exactly the Start‐boundaries you want
keep_starts <- c(
  "Start Huaricanga","Start Huaricoto","Start Huaynuna","Start Piruro",
  "Start Acshicupoto","Start Shillacoto","Start Hualcayan","Start Caral",
  "Start Kotosh","Start La Galgada","Start Pampa de las Llamas",
  "Start Buena Vista","Start El Paraiso","Start Bahia Seca",
  "Start Taukachi-Konkan","Start Huacaloma","Start Gramalote","Start Chavin"
)

# 10) Check they all exist
missing <- setdiff(keep_starts, row_labels)
if (length(missing) > 0) {
  stop("These Start‐labels are missing:\n", paste(missing, collapse = ", "))
}

# 11) Subset to the Start×Start numeric matrix
Pstart <- Pmat_numeric[keep_starts, keep_starts, drop = FALSE]

# 12) Compute ranking metrics
BeforeCount <- rowSums(Pstart > 0.50, na.rm = TRUE)
SumProb     <- rowSums(Pstart, na.rm = TRUE)

# 13) Assemble, rank, and display
result <- tibble(
  Event       = keep_starts,
  BeforeCount = BeforeCount,
  SumProb     = SumProb
) %>%
  arrange(desc(BeforeCount), desc(SumProb)) %>%
  mutate(Rank = row_number()) %>%
  select(Rank, Event, BeforeCount, SumProb)

print(result)
View(result)

####Model D

# install.packages(c("readxl","dplyr","stringr"))   
library(readxl)
library(dplyr)
library(stringr)
library(ggplot2)

# 1) Path to Excel file
file_path <- "SI04.xlsx"

# 2) Read the entire “Model D (only bondaries)” sheet without headers
raw <- read_excel(
  path      = file_path,
  sheet     = "Model D (only bondaries)",
  col_names = FALSE
)

# 3) Find the header row where column A == "Order"
hdr <- which(raw[[1]] == "Order")
if (length(hdr) != 1) stop("Could not locate a unique 'Order' row in column A.")

# 4) Extract event labels (columns B → end) from that header row
event_labels <- raw[hdr, -1] %>% 
  unlist() %>% 
  as.character() %>% 
  str_trim()

# 5) Pull the block of probabilities below that header
data_block <- raw[(hdr + 1):nrow(raw), -1]

# 6) Pull row labels (column A of those same rows)
row_labels <- raw[(hdr + 1):nrow(raw), 1] %>% 
  unlist() %>% 
  as.character() %>% 
  str_trim()

# 7) Coerce the data block to a numeric matrix
Pmat_numeric <- data_block %>%
  mutate_all(as.numeric) %>%  # convert every column to numeric
  as.matrix()

# 8) Assign row‐ and column‐names
rownames(Pmat_numeric) <- row_labels
colnames(Pmat_numeric) <- event_labels

# 9) Specify exactly the Start‐boundaries you want
keep_starts <- c(
  "Start Huaricanga","Start Huaynuna","Start Piruro",
  "Start Acshicupoto","Start Hualcayan","Start Caral",
  "Start La Galgada","Start Pampa de las Llamas",
  "Start Buena Vista","Start El Paraiso",
  "Start Taukachi-Konkan","Start Gramalote","Start Chavin"
)

# 10) Check they all exist
missing <- setdiff(keep_starts, row_labels)
if (length(missing) > 0) {
  stop("These Start‐labels are missing:\n", paste(missing, collapse = ", "))
}

# 11) Subset to the Start×Start numeric matrix
Pstart <- Pmat_numeric[keep_starts, keep_starts, drop = FALSE]

# 12) Compute ranking metrics
BeforeCount <- rowSums(Pstart > 0.50, na.rm = TRUE)
SumProb     <- rowSums(Pstart, na.rm = TRUE)

# 13) Assemble, rank, and display
result <- tibble(
  Event       = keep_starts,
  BeforeCount = BeforeCount,
  SumProb     = SumProb
) %>%
  arrange(desc(BeforeCount), desc(SumProb)) %>%
  mutate(Rank = row_number()) %>%
  select(Rank, Event, BeforeCount, SumProb)

print(result)
View(result)