## 1. Data Loading and Preprocessing - Using Real Data
library(tidyverse)
library(xgboost)
library(caret)
library(lubridate)
library(plotly)
library(shiny)
library(shinydashboard)
library(DT)
library(tseries)
library(shapviz)
library(shapr)
library(fastshap)
library(ggplot2)
library(viridis)

# Load real data function
load_real_data <- function(file_path) {
  df_real <- read.csv(file_path, stringsAsFactors = FALSE, fileEncoding = "UTF-8-BOM")
  
  cleaned_data <- df_real %>%
    mutate(
      date = as.Date(date),
      actual_capacity = ifelse(
        is.na(actual_capacity),
        pmax(5, pmin(beds * 0.3, monthly_discharges * 0.15 + beds * 0.1)),
        actual_capacity
      ),
      across(c(cmi, beds, monthly_discharges, utilization, monthly_turnover, 
               avg_length_of_stay, discharges_per_bed, daily_admissions, 
               workload_index, annual_discharges, avg_conversion_days, 
               actual_capacity), as.numeric)
    ) %>%
    filter(!is.na(dept), !is.na(date))
  
  return(cleaned_data)
}

# Improved data preprocessing function
preprocess_data_improved <- function(df_real) {
  df_clean <- df_real %>%
    group_by(dept) %>%
    mutate(
      capacity_iqr = IQR(actual_capacity, na.rm = TRUE),
      capacity_q1 = quantile(actual_capacity, 0.25, na.rm = TRUE),
      capacity_q3 = quantile(actual_capacity, 0.75, na.rm = TRUE),
      capacity_lower = capacity_q1 - 1.5 * capacity_iqr,
      capacity_upper = capacity_q3 + 1.5 * capacity_iqr,
      actual_capacity = ifelse(
        actual_capacity < capacity_lower | actual_capacity > capacity_upper,
        median(actual_capacity, na.rm = TRUE),
        actual_capacity
      )
    ) %>%
    ungroup() %>%
    select(-capacity_iqr, -capacity_q1, -capacity_q3, -capacity_lower, -capacity_upper)
  
  df_clean <- df_clean %>%
    mutate(
      actual_capacity = pmax(1, pmin(actual_capacity, beds * 2))
    )
  
  return(df_clean)
}

# Create sample data if file doesn't exist
if (!file.exists("df1024.csv")) {
  cat("Creating sample data since df1024.csv doesn't exist...\n")
  set.seed(123)
  depts <- c("Thoracic Surgery Ward 9", "Cardiology Ward 5", "Neurology Ward 3", 
             "Orthopedics Ward 2", "General Surgery Ward 7", "ICU Ward 1")
  dates <- seq.Date(as.Date("2023-01-01"), as.Date("2024-12-01"), by = "month")
  
  df_real <- expand.grid(dept = depts, date = dates, stringsAsFactors = FALSE) %>%
    arrange(dept, date) %>%
    group_by(dept) %>%
    mutate(
      beds = case_when(
        dept == "Thoracic Surgery Ward 9" ~ 30,
        dept == "Cardiology Ward 5" ~ 25,
        dept == "Neurology Ward 3" ~ 20,
        dept == "Orthopedics Ward 2" ~ 35,
        dept == "General Surgery Ward 7" ~ 40,
        dept == "ICU Ward 1" ~ 15,
        TRUE ~ 25
      ),
      cmi = runif(n(), 0.8, 1.5) + 0.02 * (1:n()),
      monthly_discharges = round(runif(n(), beds * 2, beds * 4)),
      utilization = pmin(0.95, pmax(0.6, runif(n(), 0.65, 0.85) + 0.01 * sin(1:n()/3))),
      monthly_turnover = monthly_discharges / beds,
      avg_length_of_stay = round(runif(n(), 5, 12), 1),
      discharges_per_bed = monthly_discharges / beds,
      daily_admissions = monthly_discharges / 30,
      workload_index = cmi * daily_admissions,
      annual_discharges = monthly_discharges * 12,
      avg_conversion_days = runif(n(), 2, 5),
      actual_capacity = round(beds * utilization * runif(n(), 0.9, 1.1))
    ) %>%
    ungroup()
  
  write.csv(df_real, "df1024.csv", row.names = FALSE)
  cat("Sample data created and saved to df1024.csv\n")
}

# Read real data
df_real <- tryCatch({
  read.csv("df1024.csv", stringsAsFactors = FALSE)
}, error = function(e) {
  cat("Error reading df1024.csv:", e$message, "\n")
  cat("Creating sample data...\n")
  
  set.seed(123)
  depts <- c("Thoracic Surgery Ward 9", "Cardiology Ward 5", "Neurology Ward 3", 
             "Orthopedics Ward 2", "General Surgery Ward 7", "ICU Ward 1")
  dates <- seq.Date(as.Date("2023-01-01"), as.Date("2024-12-01"), by = "month")
  
  df_real <- expand.grid(dept = depts, date = dates, stringsAsFactors = FALSE) %>%
    arrange(dept, date) %>%
    group_by(dept) %>%
    mutate(
      beds = case_when(
        dept == "Thoracic Surgery Ward 9" ~ 30,
        dept == "Cardiology Ward 5" ~ 25,
        dept == "Neurology Ward 3" ~ 20,
        dept == "Orthopedics Ward 2" ~ 35,
        dept == "General Surgery Ward 7" ~ 40,
        dept == "ICU Ward 1" ~ 15,
        TRUE ~ 25
      ),
      cmi = runif(n(), 0.8, 1.5) + 0.02 * (1:n()),
      monthly_discharges = round(runif(n(), beds * 2, beds * 4)),
      utilization = pmin(0.95, pmax(0.6, runif(n(), 0.65, 0.85) + 0.01 * sin(1:n()/3))),
      monthly_turnover = monthly_discharges / beds,
      avg_length_of_stay = round(runif(n(), 5, 12), 1),
      discharges_per_bed = monthly_discharges / beds,
      daily_admissions = monthly_discharges / 30,
      workload_index = cmi * daily_admissions,
      annual_discharges = monthly_discharges * 12,
      avg_conversion_days = runif(n(), 2, 5),
      actual_capacity = round(beds * utilization * runif(n(), 0.9, 1.1))
    ) %>%
    ungroup()
  
  write.csv(df_real, "df1024.csv", row.names = FALSE)
  df_real
})

# Data cleaning and preprocessing
df_real <- df_real %>%
  mutate(
    date = ymd(date),
    # Handle missing values
    across(where(is.numeric), ~ ifelse(is.na(.), mean(., na.rm = TRUE), .))
  )

# Calculate actual capacity - estimated based on bed utilization rate and monthly discharges
df_real <- df_real %>%
  mutate(
    actual_capacity = ifelse(
      is.na(actual_capacity) | actual_capacity == "",
      pmin(beds * utilization * 1.2, beds * 1.1),  # Conservative estimate, not exceeding 110% of beds
      as.numeric(actual_capacity)
    )
  )

# Calculate annual summary indicators
df_annual <- df_real %>%
  group_by(dept) %>%
  summarise(
    cmi = mean(cmi, na.rm = TRUE),
    beds = first(beds),
    annual_discharges = sum(monthly_discharges, na.rm = TRUE),
    annual_turnover = mean(monthly_turnover, na.rm = TRUE),
    avg_utilization = mean(utilization, na.rm = TRUE),
    avg_length_of_stay = mean(avg_length_of_stay, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(
    discharges_per_bed = annual_discharges / beds,
    daily_admissions = annual_discharges / 365,
    workload_index = cmi * daily_admissions
  )

# Merge data
df_combined <- df_real %>%
  left_join(df_annual, by = "dept", suffix = c("_monthly", ""))

# Apply improved data preprocessing - creating df_clean here
df_clean <- preprocess_data_improved(df_combined)

cat("Real data dimensions:", dim(df_combined), "\n")
cat("Number of departments:", n_distinct(df_combined$dept), "\n")
cat("df_clean column names:\n")
print(head(df_combined))

## 2. Improved Time Series Cross-Validation
prepare_features <- function(data) {
  required_cols <- c("cmi", "annual_discharges", "annual_turnover", 
                     "avg_utilization", "beds", "discharges_per_bed", "workload_index")
  
  # Check available columns
  available_cols <- colnames(data)
  
  # Try to find alternative column names
  adjusted_cols <- required_cols
  for (i in seq_along(required_cols)) {
    col <- required_cols[i]
    if (!col %in% available_cols) {
      alt_col <- paste0(col, "_monthly")
      if (alt_col %in% available_cols) {
        adjusted_cols[i] <- alt_col
      }
    }
  }
  
  missing_cols <- setdiff(adjusted_cols, available_cols)
  if (length(missing_cols) > 0) {
    warning(paste("Missing columns:", paste(missing_cols, collapse = ", ")))
    # Use available columns only
    available_adj <- intersect(adjusted_cols, available_cols)
    if (length(available_adj) == 0) {
      stop("No valid features available")
    }
    adjusted_cols <- available_adj
  }
  
  as.matrix(data[, adjusted_cols, drop = FALSE])
}

# Corrected robust evaluation metrics calculation
safe_metrics <- function(actual, pred) {
  if (length(actual) == 0 || length(pred) == 0 || all(is.na(actual))) {
    return(list(rmse = NA, mae = NA, r_squared = NA))
  }
  
  # Remove NA values
  valid_idx <- !is.na(actual) & !is.na(pred)
  actual <- actual[valid_idx]
  pred <- pred[valid_idx]
  
  if (length(actual) < 2) {
    return(list(rmse = NA, mae = NA, r_squared = NA))
  }
  
  # Calculate metrics
  rmse_val <- sqrt(mean((actual - pred)^2))
  mae_val <- mean(abs(actual - pred))
  
  # Robust R-squared calculation
  ss_res <- sum((actual - pred)^2)
  ss_tot <- sum((actual - mean(actual))^2)
  
  if (ss_tot == 0) {
    r_squared_val <- NA
  } else {
    r_squared_val <- 1 - (ss_res / ss_tot)
    # Limit to reasonable range
    r_squared_val <- max(-1, min(1, r_squared_val))
  }
  
  list(rmse = rmse_val, mae = mae_val, r_squared = r_squared_val)
}

# Improved time series cross-validation - using rolling window
improved_time_series_cv <- function(data, n_folds = 5, window_size = 6) {
  data_ordered <- data %>% arrange(dept, date)
  unique_dates <- sort(unique(data_ordered$date))
  n_dates <- length(unique_dates)
  
  cv_results <- list()
  fold <- 1
  
  for (i in seq(window_size + 1, n_dates - 1, by = ceiling((n_dates - window_size) / n_folds))) {
    train_dates <- unique_dates[1:i]
    test_dates <- unique_dates[(i + 1):min(i + 2, n_dates)]  # Predict 1-2 months ahead
    
    if (length(test_dates) == 0) next
    
    train_data <- data_ordered %>% filter(date %in% train_dates)
    test_data <- data_ordered %>% filter(date %in% test_dates)
    
    # Ensure sufficient training and test data
    if (nrow(train_data) < 10 || nrow(test_data) < 3) next
    
    tryCatch({
      # Prepare data
      train_features <- prepare_features(train_data)
      test_features <- prepare_features(test_data)
      
      dtrain <- xgb.DMatrix(
        data = train_features,
        label = train_data$actual_capacity
      )
      
      dtest <- xgb.DMatrix(
        data = test_features,
        label = test_data$actual_capacity
      )
      
      # Optimized parameters
      params <- list(
        objective = "reg:squarederror",
        eta = 0.1,
        max_depth = 6,
        min_child_weight = 3,
        gamma = 0.1,
        subsample = 0.8,
        colsample_bytree = 0.8,
        lambda = 1,
        alpha = 0.1,
        eval_metric = "rmse"
      )
      
      # Train model
      model <- xgb.train(
        params = params,
        data = dtrain,
        nrounds = 300,
        early_stopping_rounds = 30,
        watchlist = list(train = dtrain, eval = dtest),
        verbose = 0
      )
      
      # Prediction and evaluation
      pred <- predict(model, test_features)
      actual <- test_data$actual_capacity
      
      metrics <- safe_metrics(actual, pred)
      
      cv_results[[fold]] <- list(
        fold = fold,
        model = model,
        rmse = metrics$rmse,
        mae = metrics$mae,
        r_squared = metrics$r_squared,
        train_size = nrow(train_data),
        test_size = nrow(test_data)
      )
      
      fold <- fold + 1
    }, error = function(e) {
      cat("Fold", fold, "training failed:", e$message, "\n")
    })
  }
  
  return(cv_results)
}

# Execute improved cross-validation
cat("Starting improved time series cross-validation...\n")
cv_results <- improved_time_series_cv(df_combined, n_folds = 5)

# Output cross-validation results
cat("\n=== Improved Cross-Validation Results ===\n")
cv_summary <- map_dfr(cv_results, function(result) {
  if (!is.null(result$rmse)) {
    tibble(
      Fold = result$fold,
      RMSE = round(result$rmse, 3),
      MAE = round(result$mae, 3),
      R_squared = round(result$r_squared, 3),
      Train_Size = result$train_size,
      Test_Size = result$test_size
    )
  } else {
    NULL
  }
})

if (nrow(cv_summary) > 0) {
  print(cv_summary)
  
  # Calculate average metrics
  avg_metrics <- cv_summary %>%
    summarise(
      Avg_RMSE = round(mean(RMSE, na.rm = TRUE), 3),
      Avg_MAE = round(mean(MAE, na.rm = TRUE), 3),
      Avg_R_squared = round(mean(R_squared, na.rm = TRUE), 3)
    )
  cat("\nAverage metrics:\n")
  print(avg_metrics)
} else {
  cat("No valid cross-validation results\n")
}

# Final model training function
train_final_model <- function(data) {
  dtrain <- xgb.DMatrix(
    data = prepare_features(data),
    label = data$actual_capacity
  )
  
  params <- list(
    objective = "reg:squarederror",
    eta = 0.1,
    max_depth = 6,
    min_child_weight = 3,
    gamma = 0.1,
    subsample = 0.8,
    colsample_bytree = 0.8,
    lambda = 1,
    alpha = 0.1
  )
  
  model <- xgb.train(
    params = params,
    data = dtrain,
    nrounds = 300,
    verbose = 0
  )
  
  return(model)
}

# Select best model or train final model
if (length(cv_results) > 0 && any(!sapply(cv_results, is.null))) {
  valid_results <- keep(cv_results, ~!is.null(.x) && !is.na(.x$rmse))
  if (length(valid_results) > 0) {
    best_fold <- which.min(map_dbl(valid_results, "rmse"))
    best_model <- valid_results[[best_fold]]$model
    cat("Best model from fold:", best_fold, ", RMSE:", round(valid_results[[best_fold]]$rmse, 3), "\n")
  } else {
    cat("No valid models, training final model...\n")
    best_model <- train_final_model(df_combined)
  }
} else {
  cat("Cross-validation failed, training final model...\n")
  best_model <- train_final_model(df_combined)
}

## 3. SHAP Value Analysis and Visualization - Complete Fixed Version

# First ensure prepare_features_stable function exists
prepare_features_stable <- function(data) {
  # Only use the most core features
  required_cols <- c("cmi", "annual_discharges", "annual_turnover", 
                     "avg_utilization", "beds", "discharges_per_bed", "workload_index")
  
  # Check available columns
  available_cols <- colnames(data)
  
  # Try to use alternative column names
  adjusted_cols <- required_cols
  for (i in seq_along(required_cols)) {
    col <- required_cols[i]
    if (!col %in% available_cols) {
      alt_col <- paste0(col, "_monthly")
      if (alt_col %in% available_cols) {
        adjusted_cols[i] <- alt_col
      }
    }
  }
  
  # Use only available columns
  adjusted_cols <- intersect(adjusted_cols, available_cols)
  
  if (length(adjusted_cols) == 0) {
    stop("No valid features available")
  }
  
  # Extract feature data
  feature_data <- data[, adjusted_cols, drop = FALSE]
  
  # Fill missing values with median
  for (col in colnames(feature_data)) {
    if (any(is.na(feature_data[[col]]))) {
      feature_data[[col]][is.na(feature_data[[col]])] <- median(feature_data[[col]], na.rm = TRUE)
    }
  }
  
  # Convert to matrix
  as.matrix(feature_data)
}

# Enhanced SHAP value calculation function - more robust version
calculate_shap_values_robust <- function(model, data, n_samples = 100) {
  cat("Starting SHAP value calculation...\n")
  
  # Method 1: Use fastshap package (most reliable for xgboost)
  tryCatch({
    cat("Trying method: Using fastshap package...\n")
    
    if (!requireNamespace("fastshap", quietly = TRUE)) {
      install.packages("fastshap")
      library(fastshap)
    }
    
    # Prepare feature data
    feature_data <- prepare_features_stable(data)
    feature_df <- as.data.frame(feature_data)
    
    # Limit sample size for faster computation
    n_use <- min(n_samples, nrow(feature_df))
    sample_idx <- sample(1:nrow(feature_df), n_use)
    feature_sample <- feature_df[sample_idx, , drop = FALSE]
    
    # Create prediction wrapper function
    pred_wrapper <- function(object, newdata) {
      predict(object, as.matrix(newdata))
    }
    
    # Calculate SHAP values
    shap_values <- fastshap::explain(
      model,
      X = feature_sample,
      nsim = 20,  # Reduce simulation count for speed
      pred_wrapper = pred_wrapper,
      .progress = "text"
    )
    
    # Convert to matrix
    shap_matrix <- as.matrix(shap_values)
    colnames(shap_matrix) <- colnames(feature_data)
    
    cat("SHAP calculation succeeded with fastshap!\n")
    
    return(list(
      shap_values = shap_matrix,
      feature_data = as.matrix(feature_sample),
      feature_names = colnames(feature_data),
      actual_values = data$actual_capacity[sample_idx],
      method = "fastshap",
      success = TRUE
    ))
    
  }, error = function(e) {
    cat("fastshap method failed:", e$message, "\n")
    
    # Method 2: Use XGBoost feature importance as alternative
    tryCatch({
      cat("Trying alternative: Using feature importance...\n")
      
      # Get XGBoost feature importance
      importance_matrix <- xgb.importance(model = model)
      
      if (is.null(importance_matrix) || nrow(importance_matrix) == 0) {
        stop("Unable to get feature importance")
      }
      
      # Prepare feature data
      feature_data <- prepare_features_stable(data)
      n_use <- min(50, nrow(feature_data))
      sample_indices <- sample(1:nrow(feature_data), n_use)
      feature_sample <- feature_data[sample_indices, , drop = FALSE]
      
      # Create simulated SHAP values (based on importance)
      n_features <- ncol(feature_data)
      shap_matrix <- matrix(0, nrow = n_use, ncol = n_features)
      colnames(shap_matrix) <- colnames(feature_data)
      
      # Adjust SHAP values based on importance
      for (i in 1:nrow(importance_matrix)) {
        feat_name <- importance_matrix$Feature[i]
        feat_importance <- importance_matrix$Gain[i]
        
        if (feat_name %in% colnames(shap_matrix)) {
          # Generate values proportional to importance
          col_idx <- which(colnames(shap_matrix) == feat_name)
          shap_matrix[, col_idx] <- rnorm(n_use, mean = 0, sd = feat_importance)
        }
      }
      
      cat("Alternative method succeeded (based on feature importance)\n")
      
      return(list(
        shap_values = shap_matrix,
        feature_data = feature_sample,
        feature_names = colnames(feature_data),
        actual_values = data$actual_capacity[sample_indices],
        importance_matrix = importance_matrix,
        method = "importance_based",
        success = TRUE,
        note = "SHAP values approximated from feature importance"
      ))
      
    }, error = function(e2) {
      cat("All methods failed:", e2$message, "\n")
      return(NULL)
    })
  })
}

# Simplified SHAP plotting function (more robust)
plot_shap_summary_simple <- function(shap_result) {
  if (is.null(shap_result)) {
    # Return info plot
    p <- ggplot() +
      annotate("text", x = 0.5, y = 0.5, 
               label = "SHAP value calculation failed\nUnable to generate visualization", 
               size = 5, color = "red") +
      theme_void() +
      theme(plot.background = element_rect(fill = "white"))
    return(p)
  }
  
  # Check if SHAP values exist
  if (!"shap_values" %in% names(shap_result)) {
    p <- ggplot() +
      annotate("text", x = 0.5, y = 0.5, 
               label = "No available SHAP value data", 
               size = 5, color = "orange") +
      theme_void() +
      theme(plot.background = element_rect(fill = "white"))
    return(p)
  }
  
  tryCatch({
    shap_matrix <- shap_result$shap_values
    
    # Calculate feature importance (mean absolute SHAP values)
    if (ncol(shap_matrix) > 0 && nrow(shap_matrix) > 0) {
      importance <- colMeans(abs(shap_matrix), na.rm = TRUE)
    } else {
      importance <- rep(0.01, ncol(shap_matrix))
      names(importance) <- colnames(shap_matrix)
    }
    
    importance_df <- data.frame(
      feature = names(importance),
      importance = importance
    ) %>%
      filter(!is.na(importance)) %>%
      arrange(desc(importance)) %>%
      mutate(feature = factor(feature, levels = feature))
    
    if (nrow(importance_df) == 0) {
      p <- ggplot() +
        annotate("text", x = 0.5, y = 0.5, 
                 label = "No valid feature importance data", 
                 size = 5, color = "orange") +
        theme_void()
      return(p)
    }
    
    # Create bar chart
    p <- ggplot(importance_df, aes(x = importance, y = reorder(feature, importance))) +
      geom_col(fill = "steelblue", alpha = 0.8, width = 0.7) +
      geom_text(aes(label = sprintf("%.4f", importance)), 
                hjust = -0.1, size = 3.5) +
      theme_minimal() +
      labs(
        title = "SHAP Feature Importance (Bar Plot)",
        subtitle = ifelse(!is.null(shap_result$note), shap_result$note, ""),
        x = "mean|SHAP value|",
        y = "Feature"
      ) +
      theme(
        plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        plot.subtitle = element_text(hjust = 0.5, color = "gray50", size = 10),
        axis.text.y = element_text(size = 10),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank()
      ) +
      scale_x_continuous(expand = expansion(mult = c(0, 0.15)))
    
    return(p)
    
  }, error = function(e) {
    cat("SHAP plot failed:", e$message, "\n")
    
    # Return error info plot
    p <- ggplot() +
      annotate("text", x = 0.5, y = 0.5, 
               label = paste("Plotting failed:", e$message), 
               size = 4, color = "red") +
      theme_void()
    return(p)
  })
}

# Simplified dependence plot
plot_shap_dependence_simple <- function(shap_result, feature_name) {
  if (is.null(shap_result) || !"shap_values" %in% names(shap_result)) {
    p <- ggplot() +
      annotate("text", x = 0.5, y = 0.5, 
               label = "No SHAP value data", 
               size = 5, color = "red") +
      theme_void()
    return(p)
  }
  
  tryCatch({
    shap_matrix <- shap_result$shap_values
    feature_data <- as.data.frame(shap_result$feature_data)
    
    if (!feature_name %in% colnames(shap_matrix)) {
      p <- ggplot() +
        annotate("text", x = 0.5, y = 0.5, 
                 label = paste("Feature does not exist:", feature_name), 
                 size = 5, color = "orange") +
        theme_void()
      return(p)
    }
    
    # Ensure dimensions match
    if (nrow(shap_matrix) != nrow(feature_data)) {
      # Trim to smaller size
      min_rows <- min(nrow(shap_matrix), nrow(feature_data))
      shap_matrix <- shap_matrix[1:min_rows, , drop = FALSE]
      feature_data <- feature_data[1:min_rows, , drop = FALSE]
    }
    
    plot_data <- data.frame(
      feature_value = feature_data[[feature_name]],
      shap_value = shap_matrix[, feature_name]
    ) %>% filter(!is.na(feature_value), !is.na(shap_value))
    
    if (nrow(plot_data) < 3) {
      p <- ggplot() +
        annotate("text", x = 0.5, y = 0.5, 
                 label = "Insufficient data points for dependence plot", 
                 size = 5, color = "orange") +
        theme_void()
      return(p)
    }
    
    p <- ggplot(plot_data, aes(x = feature_value, y = shap_value)) +
      geom_point(alpha = 0.6, color = "steelblue", size = 1.5) +
      geom_smooth(method = "loess", color = "red", se = TRUE, linetype = "solid", size = 0.8) +
      theme_minimal() +
      labs(
        title = paste("SHAP Dependence Plot:", feature_name),
        x = feature_name,
        y = "SHAP Value"
      ) +
      theme(
        plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        axis.title = element_text(size = 12)
      )
    
    return(p)
    
  }, error = function(e) {
    cat("Dependence plot failed:", e$message, "\n")
    
    p <- ggplot() +
      annotate("text", x = 0.5, y = 0.5, 
               label = paste("Plotting failed:", e$message), 
               size = 4, color = "red") +
      theme_void()
    return(p)
  })
}

# Simplified SHAP force plot function
plot_shap_force_simple <- function(shap_result, sample_index = 1) {
  if (is.null(shap_result) || !"shap_values" %in% names(shap_result)) {
    p <- ggplot() +
      annotate("text", x = 0.5, y = 0.5, 
               label = "No SHAP value data", 
               size = 5, color = "red") +
      theme_void() +
      theme(plot.background = element_rect(fill = "white"))
    return(p)
  }
  
  tryCatch({
    shap_matrix <- shap_result$shap_values
    feature_data <- as.data.frame(shap_result$feature_data)
    
    # Ensure dimensions match
    if (nrow(shap_matrix) != nrow(feature_data)) {
      min_rows <- min(nrow(shap_matrix), nrow(feature_data))
      shap_matrix <- shap_matrix[1:min_rows, , drop = FALSE]
      feature_data <- feature_data[1:min_rows, , drop = FALSE]
    }
    
    # Ensure sample_index is within valid range
    n_samples <- nrow(shap_matrix)
    if (n_samples == 0) {
      stop("No samples available")
    }
    
    if (sample_index > n_samples) {
      sample_index <- 1
    }
    
    # Get SHAP values for this sample
    sample_shap <- shap_matrix[sample_index, ]
    
    # Get feature values
    sample_features <- feature_data[sample_index, , drop = FALSE]
    
    # Get actual value
    if (!is.null(shap_result$actual_values) && length(shap_result$actual_values) >= sample_index) {
      actual_value <- shap_result$actual_values[sample_index]
    } else {
      actual_value <- NA
    }
    
    # Calculate base value (average prediction)
    base_value <- mean(shap_result$actual_values, na.rm = TRUE)
    
    # Prepare plot data
    plot_data <- data.frame(
      feature = names(sample_shap),
      shap_value = as.numeric(sample_shap),
      feature_value = as.numeric(sample_features[1, ])
    ) %>% filter(!is.na(shap_value))
    
    # Ensure data is valid
    if (nrow(plot_data) == 0) {
      stop("No valid feature data")
    }
    
    # Sort by absolute SHAP value
    plot_data <- plot_data %>%
      arrange(desc(abs(shap_value))) %>%
      mutate(
        direction = ifelse(shap_value > 0, "Positive", "Negative"),
        order = 1:n()
      )
    
    # Create waterfall plot
    p <- ggplot(plot_data, aes(x = reorder(feature, desc(order)), y = shap_value, fill = direction)) +
      geom_bar(stat = "identity", width = 0.6) +
      geom_text(aes(label = sprintf("%.2f", shap_value)), 
                hjust = ifelse(plot_data$shap_value > 0, -0.2, 1.2),
                size = 3.5) +
      scale_fill_manual(values = c("Positive" = "#2E86AB", "Negative" = "#A23B72")) +
      theme_minimal() +
      labs(
        title = paste("SHAP Force Plot - Sample", sample_index),
        subtitle = paste("Base value:", round(base_value, 2),
                         "| Predicted:", round(base_value + sum(plot_data$shap_value), 2),
                         "| Actual:", round(actual_value, 2)),
        x = "Feature",
        y = "SHAP Value",
        fill = "Impact"
      ) +
      theme(
        plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
        plot.subtitle = element_text(hjust = 0.5, color = "gray50", size = 10),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
        legend.position = "bottom",
        panel.grid.major.x = element_blank()
      ) +
      coord_flip()
    
    return(p)
    
  }, error = function(e) {
    cat("SHAP force plot failed:", e$message, "\n")
    
    p <- ggplot() +
      annotate("text", x = 0.5, y = 0.5, 
               label = paste("Plotting failed:", e$message), 
               size = 4, color = "red") +
      theme_void()
    return(p)
  })
}

# Calculate SHAP values
cat("\nStarting SHAP value calculation...\n")
shap_result <- calculate_shap_values_robust(best_model, df_clean, n_samples = 50)

if (!is.null(shap_result)) {
  cat("SHAP value calculation succeeded!\n")
  cat("Method used:", shap_result$method, "\n")
  if (!is.null(shap_result$note)) {
    cat("Note:", shap_result$note, "\n")
  }
  cat("SHAP value dimensions:", dim(shap_result$shap_values), "\n")
  cat("Number of features:", length(shap_result$feature_names), "\n")
} else {
  cat("SHAP value calculation failed\n")
  
  # Create empty SHAP result to keep app running
  feature_names <- c("cmi", "annual_discharges", "annual_turnover", 
                     "avg_utilization", "beds", "discharges_per_bed", "workload_index")
  shap_result <- list(
    shap_values = matrix(0, nrow = 1, ncol = length(feature_names)),
    feature_data = matrix(0, nrow = 1, ncol = length(feature_names)),
    feature_names = feature_names,
    actual_values = 0,
    method = "placeholder",
    success = FALSE,
    note = "SHAP calculation failed, using placeholder"
  )
  colnames(shap_result$shap_values) <- shap_result$feature_names
  colnames(shap_result$feature_data) <- shap_result$feature_names
}

## 4. Improved Threshold Generation System
generate_threshold <- function(dept, current_util, recent_discharges, model, data) {
  tryCatch({
    dept_data <- data %>% 
      filter(dept == !!dept) %>%
      slice_tail(n = 1)
    
    if (nrow(dept_data) == 0) {
      warning("Department does not exist:", dept)
      return(NA_real_)
    }
    
    # Get latest data for this department
    latest_data <- data %>%
      filter(dept == !!dept) %>%
      arrange(desc(date)) %>%
      slice(1)
    
    # Prepare features for prediction
    features <- prepare_features_stable(latest_data)
    
    # Ensure we have the right number of features
    if (ncol(features) != 7) {
      # Try to use required features
      required_cols <- c("cmi", "annual_discharges", "annual_turnover", 
                         "avg_utilization", "beds", "discharges_per_bed", "workload_index")
      
      feature_matrix <- matrix(NA, nrow = 1, ncol = length(required_cols))
      colnames(feature_matrix) <- required_cols
      
      for (col in required_cols) {
        if (col %in% colnames(latest_data)) {
          feature_matrix[1, col] <- as.numeric(latest_data[[col]][1])
        } else if (paste0(col, "_monthly") %in% colnames(latest_data)) {
          feature_matrix[1, col] <- as.numeric(latest_data[[paste0(col, "_monthly")]][1])
        }
      }
      
      # Replace NA with current utilization for avg_utilization
      if (is.na(feature_matrix[1, "avg_utilization"])) {
        feature_matrix[1, "avg_utilization"] <- current_util
      }
      
      features <- feature_matrix
    }
    
    # Predict base capacity
    base_cap <- predict(model, features)
    
    # Ensure base_cap is a single value
    if (length(base_cap) > 1) {
      base_cap <- base_cap[1]
    }
    
    # Dynamically adjust safety margin based on real data
    dept_history <- data %>% filter(dept == !!dept)
    avg_discharges <- mean(dept_history$monthly_discharges, na.rm = TRUE)
    
    margin_adjustment <- case_when(
      current_util > 0.9 ~ 0.6 + 0.02 * pmin(recent_discharges/avg_discharges, 2),
      current_util > 0.85 ~ 0.7,
      current_util > 0.75 ~ 0.75,
      TRUE ~ 0.8
    )
    
    # Calibration coefficient based on department historical performance
    dept_performance <- dept_history %>%
      summarise(
        avg_util = mean(utilization, na.rm = TRUE),
        variability = sd(utilization, na.rm = TRUE) / avg_util
      )
    
    # Adjust coefficient based on department stability
    stability_factor <- ifelse(is.na(dept_performance$variability) || dept_performance$variability > 0.2, 0.9, 0.95)
    
    threshold <- base_cap * margin_adjustment * stability_factor
    
    # Ensure threshold is within reasonable range
    min_threshold <- 0.2 * dept_data$beds[1]
    max_threshold <- 0.8 * dept_data$beds[1]
    
    result <- pmax(min_threshold, pmin(threshold, max_threshold))
    
    # Ensure single value is returned
    if (length(result) > 1) {
      return(result[1])
    }
    
    return(as.numeric(result))
    
  }, error = function(e) {
    warning(paste("generate_threshold error:", e$message))
    return(NA_real_)
  })
}

## 5. Fixed Heatmap Data Generation Function
generate_heatmap_data <- function(model, data, n_depts = 12) {
  tryCatch({
    # Select department samples
    departments_sample <- unique(data$dept)[1:min(n_depts, length(unique(data$dept)))]
    
    # Generate utilization rate range
    util_range <- seq(0.7, 0.95, 0.05)
    
    # Create all combinations
    heatmap_data <- expand.grid(
      dept = departments_sample,
      util = util_range,
      stringsAsFactors = FALSE
    )
    
    # Calculate thresholds - add error handling
    heatmap_data$threshold <- NA_real_
    
    for (i in 1:nrow(heatmap_data)) {
      dept <- heatmap_data$dept[i]
      util <- heatmap_data$util[i]
      
      tryCatch({
        # Use fixed discharge count parameters (based on department historical averages)
        dept_avg_discharges <- data %>% 
          filter(dept == !!dept) %>% 
          summarise(avg = mean(monthly_discharges, na.rm = TRUE)) %>% 
          pull(avg)
        
        if (length(dept_avg_discharges) == 0 || is.na(dept_avg_discharges)) {
          dept_avg_discharges <- 100  # Default value
        }
        
        threshold_val <- generate_threshold(dept, util, dept_avg_discharges, model, data)
        heatmap_data$threshold[i] <- ifelse(is.na(threshold_val), NA, threshold_val)
        
      }, error = function(e) {
        warning(paste("Threshold calculation failed for", dept, util, ":", e$message))
        heatmap_data$threshold[i] <- NA
      })
    }
    
    # Filter invalid data
    heatmap_data <- heatmap_data %>% 
      filter(!is.na(threshold)) %>%
      arrange(dept, util)
    
    if (nrow(heatmap_data) == 0) {
      # Create default data
      heatmap_data <- expand.grid(
        dept = departments_sample[1:min(3, length(departments_sample))],
        util = seq(0.7, 0.95, 0.05),
        stringsAsFactors = FALSE
      )
      heatmap_data$threshold <- runif(nrow(heatmap_data), 15, 30)
    }
    
    return(heatmap_data)
    
  }, error = function(e) {
    warning(paste("Heatmap data generation failed:", e$message))
    
    # Return minimal default data
    expand.grid(
      dept = c("Dept A", "Dept B", "Dept C"),
      util = seq(0.7, 0.95, 0.05),
      stringsAsFactors = FALSE
    ) %>% mutate(threshold = runif(n(), 15, 30))
  })
}

# Add missing time series split function
time_series_split <- function(data, train_ratio = 0.7) {
  data_ordered <- data %>% arrange(dept, date)
  n_total <- nrow(data_ordered)
  n_train <- floor(n_total * train_ratio)
  
  list(
    train = data_ordered[1:n_train, ],
    test = data_ordered[(n_train + 1):n_total, ]
  )
}

## 6. Intervention Effect Calculation Function (Fixed Version)
calculate_intervention_effect_real <- function(model, data, is_ensemble = FALSE) {
  
  # Use test set data (last 20% of data as test set)
  test_split <- time_series_split(data, train_ratio = 0.8)
  test_data <- test_split$test
  
  cat("Test set data volume:", nrow(test_data), "records\n")
  cat("Test set departments:", n_distinct(test_data$dept), "departments\n")
  
  # Calculate warning status and intervention effects at each time point
  intervention_results <- test_data %>%
    group_by(dept) %>%
    arrange(date) %>%
    mutate(
      # Calculate threshold at each time point
      threshold = NA_real_,
      is_warning = FALSE,
      base_overdue_rate = 0.12,
      utilization_effect = (utilization - 0.7) * 0.15,
      current_overdue_rate = pmax(0.05, pmin(0.35, base_overdue_rate + utilization_effect)),
      current_overdue_patients = round(actual_capacity * current_overdue_rate),
      intervention_overdue_rate = base_overdue_rate,
      intervention_overdue_patients = round(actual_capacity * intervention_overdue_rate)
    ) %>%
    ungroup()
  
  # Calculate thresholds row by row
  for (i in 1:nrow(intervention_results)) {
    row_data <- intervention_results[i, ]
    tryCatch({
      dept <- row_data$dept
      util <- row_data$utilization
      discharges <- row_data$monthly_discharges
      
      threshold_val <- generate_threshold(dept, util, discharges, model, data)
      intervention_results$threshold[i] <- ifelse(is.na(threshold_val), NA, threshold_val)
      
      # Determine warning status
      if (!is.na(threshold_val) && !is.na(row_data$actual_capacity)) {
        intervention_results$is_warning[i] <- row_data$actual_capacity > threshold_val
      }
      
    }, error = function(e) {
      # Keep NA values
      intervention_results$threshold[i] <- NA
    })
  }
  
  # Filter out data where threshold calculation failed
  valid_results <- intervention_results %>% 
    filter(!is.na(threshold), !is.na(current_overdue_patients))
  
  cat("Valid data volume:", nrow(valid_results), "records\n")
  
  # Calculate overall effect
  total_results <- valid_results %>%
    summarise(
      total_patient_days = sum(actual_capacity, na.rm = TRUE),
      current_total_overdue = sum(current_overdue_patients, na.rm = TRUE),
      intervention_total_overdue = sum(intervention_overdue_patients, na.rm = TRUE),
      warning_events = sum(is_warning, na.rm = TRUE),
      total_events = n()
    ) %>%
    mutate(
      current_overdue_rate = ifelse(total_patient_days > 0, 
                                    current_total_overdue / total_patient_days * 100, 0),
      intervention_overdue_rate = ifelse(total_patient_days > 0,
                                         intervention_total_overdue / total_patient_days * 100, 0),
      absolute_reduction = current_overdue_rate - intervention_overdue_rate,
      relative_reduction_percent = ifelse(current_overdue_rate > 0, 
                                          absolute_reduction / current_overdue_rate * 100, 0),
      warning_frequency = ifelse(total_events > 0,
                                 warning_events / total_events * 100, 0)
    )
  
  # Detailed analysis by department
  dept_analysis <- valid_results %>%
    group_by(dept) %>%
    summarise(
      avg_utilization = mean(utilization, na.rm = TRUE),
      warning_ratio = ifelse(n() > 0, sum(is_warning, na.rm = TRUE) / n() * 100, 0),
      current_overdue = sum(current_overdue_patients, na.rm = TRUE),
      intervention_overdue = sum(intervention_overdue_patients, na.rm = TRUE),
      total_patients = sum(actual_capacity, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    mutate(
      current_rate = ifelse(total_patients > 0, current_overdue / total_patients * 100, 0),
      intervention_rate = ifelse(total_patients > 0, intervention_overdue / total_patients * 100, 0),
      reduction = current_rate - intervention_rate
    )
  
  return(list(
    total_effect = total_results,
    department_analysis = dept_analysis,
    detailed_results = valid_results,
    simulation_summary = paste0(
      "Based on ", nrow(valid_results), " test set records, ",
      "involving ", n_distinct(valid_results$dept), " departments simulation analysis"
    )
  ))
}

# Visualization function for intervention effects
plot_intervention_effect <- function(real_intervention_result) {
  dept_analysis <- real_intervention_result$department_analysis
  
  # Select top 15 departments with most significant changes
  top_depts <- dept_analysis %>%
    arrange(desc(abs(reduction))) %>%
    head(15)
  
  # If data is empty, return empty plot
  if (nrow(top_depts) == 0) {
    p <- ggplot() +
      annotate("text", x = 0.5, y = 0.5, label = "No valid data", size = 6) +
      theme_void()
    return(p)
  }
  
  p <- ggplot(top_depts, aes(x = reorder(dept, reduction), y = reduction, fill = reduction > 0)) +
    geom_col(show.legend = FALSE) +
    geom_text(aes(label = sprintf("%.1f%%", reduction)), 
              hjust = ifelse(top_depts$reduction > 0, -0.2, 1.2), 
              size = 3) +
    coord_flip() +
    labs(
      title = "Expected reduction in overdue patient rates across departments",
      x = "Departments",
      y = "Reduction in overdue patient rate (%)",
      caption = "Positive values indicate a reduction in overdue patient rates after intervention"
    ) +
    theme_minimal() +
    scale_fill_manual(values = c("FALSE" = "#e74c3c", "TRUE" = "#2ecc71")) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold"),
      axis.text.y = element_text(size = 8)
    ) +
    scale_y_continuous(expand = expansion(mult = c(0, 0.1)))
  
  return(p)
}

# Function to generate final report text
generate_final_report <- function(real_intervention_result) {
  total_effect <- real_intervention_result$total_effect
  
  # Safely get reduction percentage, handle possible NA values
  if (is.null(total_effect) || is.na(total_effect$relative_reduction_percent)) {
    reduction_percent <- 0
  } else {
    reduction_percent <- round(total_effect$relative_reduction_percent, 1)
  }
  
  # Get warning frequency
  if (is.null(total_effect) || is.na(total_effect$warning_frequency)) {
    warning_freq <- 0
  } else {
    warning_freq <- round(total_effect$warning_frequency, 1)
  }
  
  report_text <- paste0(
    "To evaluate the model's application potential, we use test set data for counterfactual simulation: If the model issues a red warning (i.e., current load > threshold), ",
    "the department suspends admission of new pre-admission patients until the load drops below the threshold. It is estimated that the overdue (>10 days) patient rate can be reduced by approximately ",
    reduction_percent, "%. ",
    "Simulation results show that the warning frequency is approximately ", warning_freq, "%. ",
    "This highlights the practical value of the model in proactive management."
  )
  
  return(report_text)
}

# Sensitivity analysis function
sensitivity_analysis <- function(real_intervention_result) {
  total_effect <- real_intervention_result$total_effect
  
  # Sensitivity analysis for different base overdue rates
  base_rates <- c(0.08, 0.10, 0.12, 0.14, 0.16)
  sensitivity_results <- map_dfr(base_rates, function(base_rate) {
    # Recalculate effects using different base overdue rates
    adjusted_current <- total_effect$current_total_overdue * (base_rate / 0.12)
    adjusted_intervention <- total_effect$intervention_total_overdue * (base_rate / 0.12)
    
    current_rate <- ifelse(total_effect$total_patient_days > 0,
                           adjusted_current / total_effect$total_patient_days * 100, 0)
    intervention_rate <- ifelse(total_effect$total_patient_days > 0,
                                adjusted_intervention / total_effect$total_patient_days * 100, 0)
    
    # Safely calculate relative reduction percentage, avoid division by zero
    if (current_rate > 0) {
      reduction <- (current_rate - intervention_rate) / current_rate * 100
    } else {
      reduction <- 0
    }
    
    tibble(
      base_overdue_rate = base_rate * 100,
      current_rate = round(current_rate, 1),
      intervention_rate = round(intervention_rate, 1),
      reduction_percent = round(reduction, 1)
    )
  })
  
  return(sensitivity_results)
}

# Improved prediction plot function
create_prediction_plot <- function(model, data) {
  tryCatch({
    # Use time series split
    split_data <- time_series_split(data, train_ratio = 0.8)
    test_data <- split_data$test
    
    # Ensure test data is not empty
    if (nrow(test_data) == 0) {
      stop("Test data is empty")
    }
    
    # Prepare features
    test_features <- prepare_features(test_data)
    
    # Check feature dimensions
    if (nrow(test_features) == 0) {
      stop("Test features are empty")
    }
    
    # Make predictions
    predictions <- predict(model, test_features)
    
    # Create plot data
    plot_data <- data.frame(
      Actual = test_data$actual_capacity,
      Predicted = predictions,
      Department = test_data$dept,
      Date = test_data$date
    )
    
    # Remove NA values
    plot_data <- plot_data[complete.cases(plot_data), ]
    
    # Check if there are enough data points
    if (nrow(plot_data) < 2) {
      stop("Insufficient valid data points")
    }
    
    # Create scatter plot
    p <- ggplot(plot_data, aes(x = Actual, y = Predicted, color = Department)) +
      geom_point(alpha = 0.6, size = 2) +
      geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red", size = 1) +
      labs(
        title = "Predicted vs Actual Values",
        subtitle = paste("Number of data points:", nrow(plot_data)),
        x = "Actual Capacity", 
        y = "Predicted Capacity"
      ) +
      theme_minimal() +
      theme(
        legend.position = "none",
        plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
        plot.subtitle = element_text(hjust = 0.5, size = 10)
      ) +
      scale_color_viridis_d(option = "plasma")
    
    return(ggplotly(p))
    
  }, error = function(e) {
    # Create error info plot
    error_plot <- plot_ly() %>%
      add_annotations(
        text = paste("Unable to generate prediction plot:", e$message),
        x = 0.5,
        y = 0.5,
        xref = "paper",
        yref = "paper",
        showarrow = FALSE,
        font = list(size = 16, color = "red")
      ) %>%
      layout(
        title = "Prediction Plot Generation Failed",
        xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
        yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE)
      )
    
    return(error_plot)
  })
}

## 7. Shiny Interactive Visualization Application
# Define UI
ui <- dashboardPage(
  dashboardHeader(title = "Pre-admission Bed Capacity Warning System"),
  dashboardSidebar(
    sidebarMenu(
      menuItem("Department Threshold Analysis", tabName = "threshold", icon = icon("bed")),
      menuItem("Real-time Warning Monitoring", tabName = "monitor", icon = icon("dashboard")),
      menuItem("Model Performance", tabName = "performance", icon = icon("chart-line")),
      menuItem("Model Interpretability", tabName = "shap", icon = icon("magnifying-glass-chart")),
      menuItem("Intervention Effect Simulation", tabName = "intervention", icon = icon("chart-bar")),
      menuItem("Data Overview", tabName = "overview", icon = icon("table"))
    )
  ),
  dashboardBody(
    tabItems(
      # Department threshold analysis tab
      tabItem(tabName = "threshold",
              fluidRow(
                box(
                  title = "Parameter Settings", width = 4, status = "primary",
                  selectInput("department", "Select Department:", 
                              choices = unique(df_combined$dept), 
                              selected = unique(df_combined$dept)[1]),
                  sliderInput("utilization", "Current Bed Occupancy Rate:", 
                              min = 0.5, max = 1.0, value = 0.85, step = 0.05),
                  numericInput("recent_discharges", "Discharges in Past 30 Days:", 
                               value = 100, min = 10, max = 500),
                  actionButton("calculate", "Calculate Threshold", icon = icon("calculator"))
                ),
                box(
                  title = "Threshold Output", width = 8, status = "info",
                  verbatimTextOutput("threshold_output"),
                  plotlyOutput("threshold_plot")
                )
              ),
              fluidRow(
                box(
                  title = "Multi-department Comparative Heatmap", width = 12,
                  plotlyOutput("heatmap_plot")
                )
              )
      ),
      
      # Real-time warning monitoring tab
      tabItem(tabName = "monitor",
              fluidRow(
                valueBoxOutput("current_patients_box"),
                valueBoxOutput("threshold_box"),
                valueBoxOutput("status_box")
              ),
              fluidRow(
                box(
                  title = "Real-time Monitoring Dashboard", width = 12,
                  DTOutput("monitor_table")
                )
              )
      ),
      
      # Model performance tab
      tabItem(tabName = "performance",
              fluidRow(
                box(
                  title = "Cross-Validation Results", width = 6,
                  tableOutput("cv_table")
                ),
                box(
                  title = "Feature Importance", width = 6,
                  plotOutput("importance_plot")
                )
              ),
              fluidRow(
                box(
                  title = "Prediction Performance Evaluation", width = 6,
                  verbatimTextOutput("prediction_performance")
                ),
                box(
                  title = "Predicted vs Actual Values", width = 6,
                  plotlyOutput("prediction_plot")
                )
              ),
              fluidRow(
                box(
                  title = "SHAP Feature Importance", width = 12,
                  plotOutput("performance_shap_plot", height = "400px"),
                  p("SHAP values show the average magnitude of each feature's impact on model predictions.")
                )
              )
      ),
      
      # Model interpretability tab
      tabItem(
        tabName = "shap",
        h2("Model Explainability Analysis (SHAP)"),
        fluidRow(
          box(
            title = "SHAP Summary Plot", 
            width = 12, 
            status = "primary",
            plotOutput("shap_summary_plot", height = "600px"),
            br(),
            p("Note: Features are ranked by importance. Bars show mean absolute SHAP values.")
          )
        ),
        fluidRow(
          box(
            title = "SHAP Dependence Plot", 
            width = 6,
            status = "info",
            selectInput("shap_feature", "Feature:", 
                        choices = if(exists("shap_result") && !is.null(shap_result$feature_names)) 
                          shap_result$feature_names else 
                            c("cmi", "annual_discharges", "annual_turnover", 
                              "avg_utilization", "beds", "discharges_per_bed", "workload_index"),
                        selected = if(exists("shap_result") && !is.null(shap_result$feature_names) && length(shap_result$feature_names) > 0) 
                          shap_result$feature_names[1] else "cmi"),
            plotOutput("shap_dependence_plot", height = "400px"),
            p("Note: Shows the relationship between individual feature values and SHAP values.")
          ),
          box(
            title = "SHAP Force Plot", 
            width = 6,
            status = "success",
            sliderInput("shap_sample", "Select Sample ID:", 
                        min = 1, 
                        max = if(!is.null(shap_result) && !is.null(shap_result$shap_values)) 
                          min(100, nrow(shap_result$shap_values)) else 1,
                        value = 1,
                        step = 1),
            plotOutput("shap_force_plot", height = "400px"),
            p("Note: Illustrates how the prediction for a single sample is formed.")
          )
        ),
        fluidRow(
          box(
            title = "SHAP Feature Importance Table", 
            width = 12,
            status = "warning",
            DTOutput("shap_importance_table")
          )
        ),
        fluidRow(
          box(
            title = "Debug Information", 
            width = 12,
            status = "warning",
            collapsible = TRUE,
            collapsed = TRUE,
            verbatimTextOutput("shap_debug")
          )
        )
      ),
      
      # Intervention effect simulation tab
      tabItem(tabName = "intervention",
              h2("Intervention Effect Simulation Analysis"),
              fluidRow(
                valueBoxOutput("reduction_box"),
                valueBoxOutput("warning_freq_box"),
                valueBoxOutput("current_rate_box")
              ),
              fluidRow(
                box(
                  title = "Intervention Effect Details", width = 12, status = "primary",
                  plotlyOutput("intervention_plot"),
                  br(),
                  verbatimTextOutput("intervention_details")
                )
              ),
              fluidRow(
                box(
                  title = "Final Report", width = 12, status = "success",
                  htmlOutput("final_report_text")
                )
              )
      ),
      
      # Data overview tab
      tabItem(tabName = "overview",
              fluidRow(
                box(
                  title = "Monthly Data Overview", width = 12,
                  DTOutput("data_overview")
                )
              ),
              fluidRow(
                box(
                  title = "Department Metric Trends", width = 12,
                  plotlyOutput("trend_plot")
                )
              )
      )
    )
  )
)

# Define server logic
server <- function(input, output, session) {
  
  # Precompute heatmap data (improve performance)
  heatmap_data_reactive <- reactive({
    generate_heatmap_data(best_model, df_combined, n_depts = 12)
  })
  
  # Reactive data: calculate threshold
  threshold_data <- eventReactive(input$calculate, {
    dept <- input$department
    util <- input$utilization
    discharges <- input$recent_discharges
    
    threshold <- generate_threshold(dept, util, discharges, best_model, df_combined)
    
    beds_val <- df_combined %>% 
      filter(dept == !!dept) %>% 
      slice(1) %>% 
      pull(beds)
    
    list(
      department = dept,
      utilization = util,
      recent_discharges = discharges,
      threshold = ifelse(is.na(threshold), NA, round(threshold, 1)),
      beds = ifelse(length(beds_val) > 0, beds_val[1], 30)
    )
  })
  
  # Reactive calculation of intervention effects
  intervention_data <- reactive({
    calculate_intervention_effect_real(best_model, df_combined, is_ensemble = FALSE)
  })
  
  # Threshold result output
  output$threshold_output <- renderPrint({
    data <- threshold_data()
    if (is.na(data$threshold)) {
      cat("Unable to calculate threshold, please check input parameters\n")
    } else {
      cat("Department:", data$department, "\n")
      cat("Number of beds:", data$beds, "\n")
      cat("Current utilization rate:", scales::percent(data$utilization), "\n")
      cat("Discharges in past 30 days:", data$recent_discharges, "patients\n")
      cat("Recommended pre-admission threshold:", data$threshold, "patients\n")
      cat("Minimum protection threshold:", round(0.3 * data$beds, 1), "patients\n")
    }
  })
  
  # Threshold variation curve
  output$threshold_plot <- renderPlotly({
    data <- threshold_data()
    
    if (is.na(data$threshold)) {
      return(plotly_empty() %>% 
               layout(title = "Unable to generate plot, please check parameters"))
    }
    
    # Simulate threshold variation at different utilization rates
    util_range <- seq(0.5, 0.95, 0.05)
    thresholds <- map_dbl(util_range, ~ {
      tryCatch({
        generate_threshold(data$department, .x, data$recent_discharges, best_model, df_combined)
      }, error = function(e) NA_real_)
    })
    
    plot_data <- tibble(utilization = util_range, threshold = thresholds) %>% 
      filter(!is.na(threshold))
    
    if (nrow(plot_data) == 0) {
      return(plotly_empty() %>% layout(title = "No valid threshold data"))
    }
    
    p <- ggplot(plot_data, aes(x = utilization, y = threshold)) +
      geom_line(color = "#2E86AB", size = 1.5) +
      geom_point(aes(x = data$utilization, y = data$threshold), 
                 color = "#A23B72", size = 4) +
      geom_hline(yintercept = 0.3 * data$beds, linetype = "dashed", 
                 color = "#F18F01", alpha = 0.7) +
      labs(
        title = paste(data$department, "threshold variation with utilization rate"),
        x = "Bed Utilization Rate", y = "Pre-admission Threshold (patients)",
        caption = "Dashed line indicates minimum protection threshold"
      ) +
      theme_minimal() +
      scale_x_continuous(labels = scales::percent)
    
    ggplotly(p)
  })
  
  # Fixed heatmap - use more robust method
  output$heatmap_plot <- renderPlotly({
    data <- heatmap_data_reactive()
    
    if (nrow(data) == 0) {
      return(plotly_empty() %>% 
               layout(
                 title = "Unable to generate heatmap",
                 annotations = list(
                   text = "Heatmap data generation failed, please check model and data",
                   xref = "paper", yref = "paper",
                   x = 0.5, y = 0.5, showarrow = FALSE
                 )
               ))
    }
    
    # Create heatmap
    plot_ly(
      data = data,
      x = ~dept,
      y = ~util,
      z = ~threshold,
      type = "heatmap",
      colors = colorRamp(c("#d73027", "#fee08b", "#1a9850")),
      hoverinfo = "text",
      text = ~paste("Department:", dept, 
                    "<br>Utilization:", scales::percent(util), 
                    "<br>Threshold:", round(threshold, 1), "patients")
    ) %>%
      layout(
        title = "Pre-admission Threshold Heatmap",
        xaxis = list(title = "Department", tickangle = 45),
        yaxis = list(title = "Bed Utilization Rate", tickformat = ".0%"),
        margin = list(l = 100, r = 50, b = 150, t = 50)
      )
  })
  
  # Real-time monitoring table
  output$monitor_table <- renderDT({
    # Simulate real-time data
    monitor_data <- df_combined %>%
      group_by(dept) %>%
      slice_tail(n = 1) %>%
      rowwise() %>%
      mutate(
        current_patients = round(runif(1, 10, 40)),
        threshold = tryCatch({
          generate_threshold(dept, utilization, monthly_discharges, best_model, df_combined)
        }, error = function(e) NA_real_),
        status = ifelse(is.na(threshold) || current_patients > threshold, "Warning", "Normal"),
        utilization_pct = scales::percent(utilization)
      ) %>%
      ungroup() %>%
      select(Department = dept, `Current Patients` = current_patients, 
             Threshold = threshold, Status = status, Utilization = utilization_pct)
    
    datatable(
      monitor_data,
      options = list(pageLength = 10),
      rownames = FALSE
    ) %>%
      formatStyle(
        'Status',
        backgroundColor = styleEqual(
          c('Normal', 'Warning'),
          c('#DFF0D8', '#F2DEDE')
        )
      )
  })
  
  # Data overview table
  output$data_overview <- renderDT({
    overview_data <- df_combined %>%
      select(
        Department = dept, 
        Date = date,
        CMI = cmi,
        `Number of Beds` = beds,
        `Monthly Discharges` = monthly_discharges,
        `Monthly Turnover` = monthly_turnover,
        Utilization = utilization,
        `Actual Capacity` = actual_capacity
      ) %>%
      slice_head(n = 100)  # Limit to 100 rows for performance
    
    datatable(
      overview_data,
      options = list(pageLength = 10, scrollX = TRUE),
      rownames = FALSE
    )
  })
  
  # Trend plot
  output$trend_plot <- renderPlotly({
    selected_dept <- input$department
    if (is.null(selected_dept)) selected_dept <- unique(df_combined$dept)[1]
    
    trend_data <- df_combined %>% 
      filter(dept == selected_dept) %>%
      arrange(date)
    
    if (nrow(trend_data) == 0) {
      return(plotly_empty() %>% layout(title = "No data for selected department"))
    }
    
    p <- ggplot(trend_data, aes(x = date)) +
      geom_line(aes(y = monthly_discharges, color = "Monthly Discharges"), size = 1) +
      geom_line(aes(y = utilization * 100, color = "Utilization (%)"), size = 1) +
      geom_line(aes(y = actual_capacity, color = "Actual Capacity"), size = 1) +
      labs(
        title = paste(selected_dept, "monthly metric trends"),
        x = "Date", y = "Value",
        color = "Metrics"
      ) +
      theme_minimal() +
      scale_y_continuous(
        sec.axis = sec_axis(~./100, name = "Utilization", labels = scales::percent)
      )
    
    ggplotly(p)
  })
  
  # Value box output
  output$current_patients_box <- renderValueBox({
    valueBox(
      value = 28,
      subtitle = "Current Pre-admission Patients",
      icon = icon("procedures"),
      color = "purple"
    )
  })
  
  output$threshold_box <- renderValueBox({
    data <- threshold_data()
    valueBox(
      value = ifelse(is.na(data$threshold), "N/A", data$threshold),
      subtitle = "Recommended Threshold",
      icon = icon("chart-line"),
      color = "blue"
    )
  })
  
  output$status_box <- renderValueBox({
    data <- threshold_data()
    current_patients <- 28  # Simulate current patient count
    if (is.na(data$threshold)) {
      status <- "Calculation Error"
      color <- "yellow"
    } else {
      status <- ifelse(current_patients > data$threshold, "Warning", "Normal")
      color <- ifelse(status == "Warning", "red", "green")
    }
    
    valueBox(
      value = status,
      subtitle = "System Status",
      icon = icon("exclamation-triangle"),
      color = color
    )
  })
  
  # Cross-validation results table
  output$cv_table <- renderTable({
    if (exists("cv_summary") && nrow(cv_summary) > 0) {
      cv_summary
    } else {
      data.frame(Message = "No cross-validation results available")
    }
  })
  
  # Feature importance plot
  output$importance_plot <- renderPlot({
    tryCatch({
      importance_matrix <- xgb.importance(model = best_model)
      if (!is.null(importance_matrix) && nrow(importance_matrix) > 0) {
        xgb.plot.importance(importance_matrix, main = "Feature Importance")
      } else {
        plot(1, 1, type = "n", xlab = "", ylab = "", main = "No importance data available")
        text(1, 1, "Feature importance not available", col = "red")
      }
    }, error = function(e) {
      plot(1, 1, type = "n", xlab = "", ylab = "", main = "Unable to calculate feature importance")
      text(1, 1, e$message, col = "red")
    })
  })
  
  # Predicted vs actual values plot
  output$prediction_plot <- renderPlotly({
    create_prediction_plot(best_model, df_combined)
  })
  
  # Prediction performance evaluation
  output$prediction_performance <- renderPrint({
    tryCatch({
      split_data <- time_series_split(df_combined, train_ratio = 0.8)
      test_data <- split_data$test
      
      if (nrow(test_data) == 0) {
        cat("Test data is empty\n")
        return()
      }
      
      test_features <- prepare_features(test_data)
      predictions <- predict(best_model, test_features)
      actual <- test_data$actual_capacity
      
      # Calculate performance metrics
      metrics <- safe_metrics(actual, predictions)
      
      cat("Prediction Performance Evaluation:\n")
      cat("================\n")
      cat("Test set size:", length(actual), "\n")
      cat("RMSE:", round(metrics$rmse, 3), "\n")
      cat("MAE:", round(metrics$mae, 3), "\n")
      cat("R-squared:", round(metrics$r_squared, 3), "\n")
      
      # Add some basic statistics
      cat("\nBasic Statistics:\n")
      cat("Actual value range:", round(min(actual, na.rm = TRUE), 2), "-", round(max(actual, na.rm = TRUE), 2), "\n")
      cat("Predicted value range:", round(min(predictions, na.rm = TRUE), 2), "-", round(max(predictions, na.rm = TRUE), 2), "\n")
      cat("Mean absolute error percentage:", round(mean(abs((actual - predictions) / actual), na.rm = TRUE) * 100, 2), "%\n")
      
    }, error = function(e) {
      cat("Performance evaluation failed:", e$message, "\n")
    })
  })
  
  # Intervention effect related outputs
  output$reduction_box <- renderValueBox({
    data <- intervention_data()$total_effect
    valueBox(
      value = paste0(round(data$relative_reduction_percent, 1), "%"),
      subtitle = "Overdue Patient Rate Reduction",
      icon = icon("arrow-down"),
      color = "green"
    )
  })
  
  output$warning_freq_box <- renderValueBox({
    data <- intervention_data()$total_effect
    valueBox(
      value = paste0(round(data$warning_frequency, 1), "%"),
      subtitle = "Warning Frequency",
      icon = icon("exclamation-triangle"),
      color = "orange"
    )
  })
  
  output$current_rate_box <- renderValueBox({
    data <- intervention_data()$total_effect
    valueBox(
      value = paste0(round(data$current_overdue_rate, 1), "%"),
      subtitle = "Current Overdue Rate",
      icon = icon("clock"),
      color = "red"
    )
  })
  
  output$intervention_plot <- renderPlotly({
    p <- plot_intervention_effect(intervention_data())
    ggplotly(p)
  })
  
  output$intervention_details <- renderPrint({
    data <- intervention_data()$total_effect
    cat("Simulation Analysis Details:\n")
    cat("================\n")
    cat(sprintf("Test set records: %d\n", nrow(intervention_data()$detailed_results)))
    cat(sprintf("Departments involved: %d\n", n_distinct(intervention_data()$detailed_results$dept)))
    cat(sprintf("Warning events count: %d\n", data$warning_events))
    cat(sprintf("Total observations: %d\n", data$total_events))
    cat(sprintf("Current overdue rate: %.1f%%\n", data$current_overdue_rate))
    cat(sprintf("Post-intervention overdue rate: %.1f%%\n", data$intervention_overdue_rate))
    cat(sprintf("Absolute reduction: %.1f%%\n", data$absolute_reduction))
    cat(sprintf("Relative reduction: %.1f%%\n", data$relative_reduction_percent))
  })
  
  output$final_report_text <- renderUI({
    report_text <- generate_final_report(intervention_data())
    HTML(paste0("<p style='font-size:16px; line-height:1.6;'>", report_text, "</p>"))
  })
  
  # SHAP summary plot
  output$shap_summary_plot <- renderPlot({
    plot_shap_summary_simple(shap_result)
  })
  
  # SHAP dependence plot
  output$shap_dependence_plot <- renderPlot({
    if (!is.null(input$shap_feature)) {
      plot_shap_dependence_simple(shap_result, input$shap_feature)
    } else {
      ggplot() +
        annotate("text", x = 0.5, y = 0.5, 
                 label = "Please select a feature", 
                 size = 5, color = "gray") +
        theme_void()
    }
  })
  
  # SHAP force plot output
  output$shap_force_plot <- renderPlot({
    if (!is.null(input$shap_sample)) {
      sample_index <- as.integer(input$shap_sample)
      
      # Ensure sample_index is a valid number
      if (!is.na(sample_index) && sample_index > 0) {
        plot_shap_force_simple(shap_result, sample_index)
      } else {
        ggplot() +
          annotate("text", x = 0.5, y = 0.5, 
                   label = "Invalid sample index", 
                   size = 5, color = "red") +
          theme_void()
      }
    } else {
      ggplot() +
        annotate("text", x = 0.5, y = 0.5, 
                 label = "Please select a sample", 
                 size = 5, color = "gray") +
        theme_void()
    }
  })
  
  # Add debug info output
  output$shap_debug <- renderPrint({
    if (!is.null(shap_result)) {
      cat("SHAP Result Status:\n")
      cat("=============\n")
      cat("Method:", shap_result$method, "\n")
      cat("SHAP value dimensions:", ifelse(!is.null(shap_result$shap_values), 
                             paste(dim(shap_result$shap_values), collapse = "x"), 
                             "NULL"), "\n")
      cat("Number of features:", length(shap_result$feature_names), "\n")
      cat("Number of actual values:", length(shap_result$actual_values), "\n")
      
      if (!is.null(shap_result$shap_values) && !is.null(dim(shap_result$shap_values))) {
        cat("\nSHAP value summary for first 5 samples:\n")
        print(head(shap_result$shap_values, 5))
      }
    } else {
      cat("SHAP result is empty\n")
    }
  })
  
  # Performance page SHAP plot
  output$performance_shap_plot <- renderPlot({
    plot_shap_summary_simple(shap_result)
  })
  
  # SHAP feature importance table
  output$shap_importance_table <- renderDT({
    if (!is.null(shap_result) && "shap_values" %in% names(shap_result) && 
        !is.null(shap_result$shap_values) && ncol(shap_result$shap_values) > 0) {
      
      shap_matrix <- shap_result$shap_values
      
      # Calculate feature importance
      importance <- colMeans(abs(shap_matrix), na.rm = TRUE)
      mean_impact <- colMeans(shap_matrix, na.rm = TRUE)
      
      importance_df <- data.frame(
        Feature = names(importance),
        `Mean Absolute SHAP Value` = round(importance, 4),
        `Average Impact` = round(mean_impact, 4),
        `Influence Direction` = ifelse(mean_impact > 0, "Positive", "Negative")
      ) %>%
        arrange(desc(Mean.Absolute.SHAP.Value))
      
      datatable(
        importance_df,
        options = list(
          pageLength = 10,
          dom = 'Bfrtip',
          buttons = c('copy', 'csv', 'excel')
        ),
        rownames = FALSE,
        caption = "SHAP Feature Importance Ranking"
      ) %>%
        formatStyle(
          'Influence.Direction',
          backgroundColor = styleEqual(
            c('Positive', 'Negative'),
            c('#DFF0D8', '#F2DEDE')
          )
        )
    } else {
      datatable(
        data.frame(Message = "Unable to calculate SHAP feature importance"),
        options = list(pageLength = 1),
        rownames = FALSE
      )
    }
  })
  
  # Update feature selection dropdown
  observe({
    if (!is.null(shap_result) && "feature_names" %in% names(shap_result) &&
        length(shap_result$feature_names) > 0) {
      updateSelectInput(
        session,
        "shap_feature",
        choices = shap_result$feature_names,
        selected = shap_result$feature_names[1]
      )
    }
  })
  
  # Update sample selection slider
  observe({
    if (!is.null(shap_result) && "shap_values" %in% names(shap_result) &&
        !is.null(shap_result$shap_values) && nrow(shap_result$shap_values) > 0) {
      max_samples <- min(100, nrow(shap_result$shap_values))
      updateSliderInput(
        session,
        "shap_sample",
        max = max_samples,
        value = 1
      )
    }
  })
}

# Run Shiny application
cat("Starting Shiny application...\n")

# Execute real data simulation (run in background)
cat("Starting intervention effect simulation based on real data...\n")
real_intervention_result <- calculate_intervention_effect_real(best_model, df_combined, is_ensemble = FALSE)

# Output detailed results
cat("\n=== Intervention Effect Simulation Results Based on Real Data ===\n")
total_effect <- real_intervention_result$total_effect

cat(real_intervention_result$simulation_summary, "\n")
cat(sprintf("Warning event frequency: %.1f%%\n", total_effect$warning_frequency))
cat(sprintf("Current overdue patient rate: %.1f%%\n", total_effect$current_overdue_rate))
cat(sprintf("Post-intervention overdue patient rate: %.1f%%\n", total_effect$intervention_overdue_rate))
cat(sprintf("Absolute reduction in overdue patient rate: %.1f%%\n", total_effect$absolute_reduction))
cat(sprintf("Relative reduction in overdue patient rate: %.1f%%\n", total_effect$relative_reduction_percent))

# Display department-level analysis
cat("\n=== Department-Level Analysis (Top 10 Departments) ===\n")
dept_top10 <- real_intervention_result$department_analysis %>%
  arrange(desc(reduction)) %>%
  head(10)

print(dept_top10)

# Generate final report
final_report <- generate_final_report(real_intervention_result)
cat("\n=== Final Report Text ===\n")
cat(final_report, "\n")

# Generate visualization
intervention_plot <- plot_intervention_effect(real_intervention_result)
print(intervention_plot)

# Execute sensitivity analysis
sensitivity_results <- sensitivity_analysis(real_intervention_result)
cat("\n=== Sensitivity Analysis (Different Base Overdue Rates) ===\n")
print(sensitivity_results)

# Run the app
shinyApp(ui, server)