#=============================================================================
# Solar PV simulation pipeline
# 
# This script runs the simulation and projection pipeline for solar power 
# using PROLONG.
#
#The simulation generation and training data preparation steps are
#computationally intensive and take several hours to run with the default settings.
#Runtime for the full pipeline on a MacBook 16" with an M1 Pro chip and 16GB RAM = 6-7 hours
#It is recommended to first test the code with a reduced number of runs.
#=============================================================================

#--------------------
# 1. Setup and Source Required Files
#--------------------
# Load libraries
library(tidyverse)

# Source files
source("PROLONG.R")  # Main simulation framework
source("fitting_functions.R")         # Required for parameter fitting

# Load market data for simulations
load_market_data <- function(global_data_file, national_data_file, filter_year = 2019) {
  # Load the CSV files
  message(paste("Loading global data from:", global_data_file))
  global_data <- read_csv(global_data_file, show_col_types = FALSE) %>% 
    filter(Year == filter_year)
  
  message(paste("Loading national data from:", national_data_file))
  national_data <- read_csv(national_data_file, show_col_types = FALSE) %>% 
    filter(Year == filter_year)
  
  # Process the data to calculate shares
  market_data <- process_market_data(global_data, national_data)
  
  message(paste("Processed market data with", length(market_data$market_sizes), 
                "countries, total value:", market_data$global_total))
  
  return(market_data)
}

#--------------------
# 2. Generate Parameter Configurations
#--------------------
#
base_config_solar <- list(
  n_years = 50,
  available_models = c("logistic", "logistic-linear", "bilogistic"),
  model_weights = c(0.5, 0, 0.5),
  parameter_ranges = list(
    L = list(shape = 3, rate = 3/0.0405, min=0.01, max=0.95),  
    k = list(shape = 4, rate = 4/0.772, min=0.1, max=1.2),    
    takeoff = list(mean = 5.4, sd = 1.98, min=1, max=51)  
  )
)


#For logistic run set model_weights = c(1, 0, 0)
#For bi-logistic run set model_weights = c(0, 0, 1)
#For mixed run set model_weights = c(0.5, 0, 0.5)

# Visualize parameter coverage
coverage_plots_solar <- visualize_parameter_coverage(balanced_configs_solar$summary)

# Save parameter coverage plot
# ggsave("data/output/solar/parameter_coverage.pdf", 
#        plot = coverage_plots_solar$parameter_plot,
#        width = 8, height = 8, units = "in", dpi = 300)

# Get all configurations for solar PV
all_configs_solar <- balanced_configs_solar$configurations

# Save configurations for future use
# saveRDS(all_configs_solar, 'data/output/solar/training_configs.Rds')

#--------------------
# 3. Load Market Data for Solar PV
#--------------------
# Load market data for the base year 2016
market_data_solar <- load_market_data(
  "data/input/global_electricity.csv",
  "data/input/national_electricity.csv", 
  2016  # Solar-specific base year
)

#--------------------
# 4. Run Multi-Configuration Simulations
#--------------------
# Run simulation with multiple parameter configurations
multi_config_results_solar <- run_multi_config_simulations(
  configs = all_configs_solar,
  market_data = market_data_solar,
  runs_per_config = 1000  # Number of Monte Carlo runs per configuration; advisable to try with reduced no. of runs if testing code
)

# Visualize global trajectories
global_trajs_solar <- ggplot(multi_config_results_solar$global_results) +
  geom_line(aes(x=year, y=global_deployment, group=run), 
            linewidth=0.05, alpha=0.3, colour='steelblue') +
  coord_cartesian(ylim = c(0, 0.8)) +  # Solar-specific y-axis limit
  theme_minimal()

# Save global trajectories visualization
# ggsave("data/output/solar/global_trajectories.pdf", 
#        plot = global_trajs_solar,
#        width = 8, height = 8, units = "in", dpi = 300)

#--------------------
# 5. Prepare Training Data
#--------------------
# Define cutoff years for curtailment analysis
cutoff_years <- c(12, 15, 18, 24, 30, 35)  

# Select diverse runs and prepare training data
efficient_results_solar <- prepare_efficient_training_data_simplified(
  multi_config_results = multi_config_results_solar,
  cutoff_years = cutoff_years,
  runs_per_config = 200,  # Number of diverse runs to select; advisable to try with reduced no. of runs if testing code
  debug = TRUE            # Enable detailed logging
)

# Filter training data (ensure sufficient countries per simulation)
training_data_solar <- efficient_results_solar$training_data %>% filter(n_countries >= 5)

# Save training data
# write_csv(training_data_solar, 'data/output/solar/training_data.csv')

#--------------------
# 6. Build Projection Model
#--------------------
# Build random forest model for parameter projection
unified_model_solar <- build_unified_projection_model(
  training_data = training_data_solar,
  keep_forest = TRUE,           # Retain forest for prediction intervals
  use_config_features = FALSE   # Use only empirical features
)

# Save model for future use
# saveRDS(unified_model_solar, 'data/output/solar/model.Rds')

#--------------------
# 7. Make Empirical Projections
#--------------------
# Load historical global solar deployment data
global_dep <- read_csv("data/input/global_solarpv.csv") %>% 
  mutate(Share = Value/Total) %>%
  filter(Year >= 2000) %>%
  select(Year, global_deployment = Share)

# Load country market size data
national_timeseries <- read_csv("data/input/national_solarpv.csv") %>%
  filter(Year==2015) %>%
  select(Country, Year, National=Total) %>% 
  left_join(read_csv("data/input/global_electricity.csv") %>% rename(Global=Value)) %>%
  mutate(market_size=(National*1000)/Global) %>%
  select(Country, market_size) %>% 
  mutate(market_size=case_when(market_size==Inf~0, TRUE~market_size))


# Load fitted national growth parameters
fit_params <- read_csv("data/input/national_solar_parameters_share.csv") %>% 
  filter(Year >= 2015) %>%
  left_join(national_timeseries)

# Prepare empirical data for projections
empirical_data <- prepare_empirical_data(
  fit_params %>%
    filter(Fit == "S", Maturity >= 0.5) %>%  # Filter for mature S-curves
    select(Year, Country, K, L, G, market_size),
  global_dep
) %>% filter(n_mature_countries >= 5)

# Create container for projections
empirical_projections <- data.frame()

# Generate projections for each year with sufficient data
for (yz in unique(empirical_data$Year)) {
  # Filter data for this year
  emp_data <- empirical_data %>% filter(Year == yz)
  
  # Generate trajectory projections
  projected_trajectory <- project_trajectory(
    emp_data,
    unified_model_solar,
    last_year = 2050  # Project to 2050
  )
  
  # Format results for export
  projected_trajectory_export <- projected_trajectory$summary %>% 
    mutate(Max.Year = yz, Model = "Logistic")
  
  # Combine with previous projections
  empirical_projections <- empirical_projections %>% rbind(projected_trajectory_export)
}

# Save projection results
# write_csv(empirical_projections, 'data/output/solar/empirical_projections.csv')

#--------------------
# 8. Visualize Projections
#--------------------
# Create projection visualization
projection_plot <- ggplot(empirical_projections %>% filter(Max.Year <= 2023), 
                          aes(x = year)) +
  geom_ribbon(aes(ymin = p05, ymax = p95), fill = "lightblue", alpha = 0.3) +
  geom_ribbon(aes(ymin = p25, ymax = p75), fill = "lightblue", alpha = 0.5) +
  geom_line(aes(y = median), color = "darkblue", size = 1) +
  geom_point(data = global_dep, aes(x = Year, y = global_deployment), size = 0.5) +
  scale_y_continuous(labels = scales::percent) +
  labs(
    x = "Year",
    y = "Global Deployment (share)"
  ) +
  facet_wrap(~Max.Year) +
  coord_cartesian(ylim = c(0, 0.6), xlim = c(2010, 2050)) +  # Solar-specific axis limits
  theme_minimal()

# Save projection visualization
# ggsave("data/output/solar/projection_plot.pdf", 
#        plot = projection_plot,
#        width = 8, height = 8, units = "in", dpi = 300)

# Clean up environment
rm(list = ls())
gc(full = TRUE)