#### Model A library(readxl) library(dplyr) library(tidyr) library(purrr) library(pracma) library(ggplot2) #Read and clean the HPD Excel file (first sheet, headers present) df <- read_excel( path = "HDP_bounaries_model_A.xlsx", sheet = 1, col_names = TRUE ) %>% mutate( raw_lo = pmin(Lower95.4, Upper95.4), raw_hi = pmax(Lower95.4, Upper95.4) ) %>% transmute( Boundary, Lower95.4 = -abs(raw_hi), # older = more negative Upper95.4 = -abs(raw_lo) ) %>% drop_na(Lower95.4, Upper95.4) # Check if (any(df$Lower95.4 > df$Upper95.4, na.rm=TRUE)) stop("Inverted bounds detected!") # KDE + Monte Carlo parameters n_iter <- 10000 # resamples bw <- 100 # KDE bandwidth (years) n_trough <- 5 # expected number of density minima get_troughs <- function(x, bw, n) { d <- density(x, bw = bw, n = 4096) idx <- which(diff(sign(diff(d$y))) > 0) + 1 t <- sort(d$x[idx]); length(t) <- n; t } # Run Monte Carlo KDE set.seed(42) minima_mat <- t(replicate(n_iter, { draws <- runif(nrow(df), df$Lower95.4, df$Upper95.4) get_troughs(draws, bw, n_trough) })) # Summarise phase-breaks phase_breaks <- as_tibble(minima_mat) %>% summarise(across( everything(), list( med = ~ median(.x, na.rm = TRUE), lo = ~ quantile(.x, 0.025, na.rm = TRUE), hi = ~ quantile(.x, 0.975, na.rm = TRUE) ), .names = "{col}_{fn}")) %>% pivot_longer( cols = everything(), names_to = c("phase","stat"), names_pattern = "(V[0-9]+)_(.*)" , values_to = "cal_BCE" ) %>% pivot_wider(names_from = stat, values_from = cal_BCE) %>% mutate(phase = factor(phase, levels = paste0("V", 1:n_trough))) %>% arrange(phase) cat("Phase-break medians ±95% HPD (cal BCE):\n") print(phase_breaks, digits = 0) # Build KDE envelope (10k draws) env_n <- min(10000, n_iter) env_idx <- sample.int(n_iter, env_n) dens_x <- sort(unique(unlist( map(env_idx, ~ density( runif(nrow(df), df$Lower95.4, df$Upper95.4), bw = bw )$x) ))) dens_mat <- map_dfc(env_idx, ~ { d <- density(runif(nrow(df), df$Lower95.4, df$Upper95.4), bw = bw) approx(d$x, d$y, xout = dens_x)$y }) kde_summary <- tibble( x = dens_x, median = apply(dens_mat, 1, median, na.rm = TRUE), lower95 = apply(dens_mat, 1, quantile, probs = 0.025, na.rm = TRUE), upper95 = apply(dens_mat, 1, quantile, probs = 0.975, na.rm = TRUE) ) # Plot with 1000-year ticks and custom y-axis # Oldest (most negative cal BCE) on left, y-axis breaks every 0.00025 ggplot(kde_summary, aes(x = x)) + geom_ribbon(aes(ymin = lower95, ymax = upper95), alpha = .25, na.rm = TRUE) + geom_line(aes(y = median), size = 0.8, na.rm = TRUE) + geom_vline(xintercept = phase_breaks$med, linetype = "dashed") + scale_x_continuous( "cal BCE", breaks = seq( floor(min(kde_summary$x)/1000) * 1000, ceiling(max(kde_summary$x)/1000) * 1000, by = 1000 ), labels = function(x) abs(x) ) + scale_y_continuous( "Kernel density (a.u.)", limits = c(0, max(kde_summary$upper95, na.rm = TRUE)), breaks = seq(0, max(kde_summary$upper95, na.rm = TRUE), by = 0.00025) ) + theme_minimal(base_size = 12) + theme(panel.grid = element_blank()) + ggtitle("Model A \nMonte Carlo KDE Phase Break Distributions") cat("\nSuggested phase-breaks:\n") for (i in seq_len(nrow(phase_breaks))) { cat(sprintf( "• Phase %d boundary: %d (95%% HPD %d–%d) cal BCE\n", i, round(phase_breaks$med[i]), round(phase_breaks$lo[i]), round(phase_breaks$hi[i]) )) } #### Model D library(readr) # read_lines, read_delim, parse_number library(dplyr) # %>%, mutate, summarise library(tidyr) # pivot_longer, pivot_wider library(purrr) # map, map_dfc library(pracma) # density minima logic library(ggplot2) # plotting library(stringr) # string utilities # Robustly read the header and detect delimiter --------------- csv_path <- "HDP_bounaries_model_D.csv" raw <- read_lines(csv_path, skip_empty_rows = FALSE) hdr <- raw[!str_detect(raw, "^\\s*$|^#")][1] if (is.na(hdr)) stop("No usable header line found in CSV.") hdr_clean <- hdr %>% str_replace("^\uFEFF", "") %>% # drop BOM str_replace_all('^"|"$', "") %>% # drop outer quotes str_trim() delim <- case_when( str_count(hdr_clean, ",") >= 2 ~ ",", str_count(hdr_clean, ";") >= 2 ~ ";", str_count(hdr_clean, "\t") >= 2 ~ "\t", TRUE ~ "," ) tokens <- str_split(hdr_clean, delim)[[1]] %>% str_trim() cat("Detected header tokens:", paste(tokens, collapse=", "), "\n") cat("Using delimiter:", delim, "\n\n") required <- c("Boundary","Lower95.4","Upper95.4") if (!all(required %in% tokens)) { stop("Required columns missing. Detected: ", paste(tokens, collapse=", ")) } # Read the full CSV (treat quotes literally) file = csv_path, delim = delim, quote = "", # so commas always split fields trim_ws = TRUE, comment = "#", col_types = cols() # guess column types ) # Clean and standardise column names names(df) <- names(df) %>% str_replace("^\uFEFF","") %>% str_replace_all('^"|"$',"") %>% str_trim() rename_map <- c( "Name" = "Boundary", "Label" = "Boundary", "Boundary" = "Boundary", "Lower95.4" = "Lower95.4", "Upper95.4" = "Upper95.4" ) to_rename <- rename_map[names(rename_map) %in% names(df)] df <- df %>% rename(!!!to_rename) if (!all(required %in% names(df))) { stop("Still missing required columns: ", paste(setdiff(required, names(df)), collapse=", ")) } # Parse HPDs to numeric and flip sign for cal BCE hpd <- df %>% select(Boundary, Lower95.4, Upper95.4) %>% mutate( Lower95.4 = parse_number(as.character(Lower95.4)), Upper95.4 = parse_number(as.character(Upper95.4)) ) %>% { bad <- filter(., is.na(Lower95.4) | is.na(Upper95.4)) if (nrow(bad)) { stop("Failed to parse HPDs for: ", paste(bad$Boundary, collapse=", ")) } . } %>% mutate( Lower95.4 = -abs(Lower95.4), Upper95.4 = -abs(Upper95.4) ) cat("Parsed HPD table:\n"); print(hpd); cat("\n") # Monte-Carlo + KDE parameters n_iter <- 10000 # number of random draws bw <- 100 # KDE bandwidth in years n_breaks <- 5 # number of phase-breaks expected get_minima <- function(x, bw, n) { d <- density(x, bw=bw, n=4096) idx <- which(diff(sign(diff(d$y))) > 0) + 1 m <- sort(d$x[idx]) length(m) <- n m } # 6. Run Monte-Carlo KDE set.seed(42) minima_mat <- replicate(n_iter, get_minima(runif(nrow(hpd), hpd$Lower95.4, hpd$Upper95.4), bw, n_breaks) ) minima_mat <- t(minima_mat) # each row = one simulation # 7. Summarise phase-breaks (median ±95% HPD) --------------------- phase_breaks <- as_tibble(minima_mat) %>% summarise(across( everything(), list( med = ~median(.x, na.rm=TRUE), lo = ~quantile(.x, 0.025, na.rm=TRUE), hi = ~quantile(.x, 0.975, na.rm=TRUE) ), .names = "{col}_{fn}" )) %>% pivot_longer( cols = everything(), names_to = c("phase","stat"), names_pattern = "(V[0-9]+)_(.*)", values_to = "cal_BCE" ) %>% pivot_wider(names_from=stat, values_from=cal_BCE) %>% mutate( phase = factor(phase, levels=paste0("V", 1:n_breaks)) ) %>% arrange(phase) cat("Phase-break medians ±95% HPD (cal BCE):\n") print(phase_breaks, digits=0) #Build a KDE envelope (10k draws) env_n <- min(10000, n_iter) env_idxs <- sample(n_iter, env_n) dens_x <- sort(unique(unlist( map(env_idxs, ~ density( runif(nrow(hpd), hpd$Lower95.4, hpd$Upper95.4), bw=bw )$x) ))) dens_mat <- map_dfc(env_idxs, ~ { d <- density(runif(nrow(hpd), hpd$Lower95.4, hpd$Upper95.4), bw=bw) approx(d$x, d$y, xout=dens_x)$y }) kde_summary <- tibble( x = dens_x, median = apply(dens_mat, 1, median, na.rm=TRUE), lower95 = apply(dens_mat, 1, quantile, probs=0.025, na.rm=TRUE), upper95 = apply(dens_mat, 1, quantile, probs=0.975, na.rm=TRUE) ) %>% filter(!is.na(median)) ggplot(kde_summary, aes(x = x)) + geom_ribbon(aes(ymin = lower95, ymax = upper95), alpha = .25, na.rm = TRUE) + geom_line(aes(y = median), size = 0.8, na.rm = TRUE) + geom_vline(xintercept = phase_breaks$med, linetype = "dashed") + scale_x_continuous( "cal BCE", breaks = seq( floor(min(kde_summary$x)/1000) * 1000, ceiling(max(kde_summary$x)/1000) * 1000, by = 1000 ), labels = function(x) abs(x) ) + scale_y_continuous( "Kernel density (a.u.)", limits = c(0, max(kde_summary$upper95, na.rm = TRUE)), breaks = seq(0, max(kde_summary$upper95, na.rm = TRUE), by = 0.00025) ) + theme_minimal(base_size = 12) + theme(panel.grid = element_blank()) + theme(panel.grid = element_blank()) + ggtitle("Model D \nMonte Carlo KDE Phase Break Distributions") cat("\nSuggested phase-breaks:\n") for (i in seq_len(nrow(phase_breaks))) { cat(sprintf( "• Phase %d boundary: %d (95%% HPD %d–%d) cal BCE\n", i, round(phase_breaks$med[i]), round(phase_breaks$lo[i]), round(phase_breaks$hi[i]) )) }