# ======================================================================
#
# Script: Simulate a breeding program that selects on true genetic values for the mean performance across a TPE
# Each generation, we undertake different strategies for crossing genotypes to create products for release
# and compare their performance in terms of the TPE mean and stability across the TPE
#
# Authors: D.L. Waters and D.J. Tolhurst
#
# ======================================================================

# ---- Load necessary libraries ---- #
rm(list = ls())  # Clean up the environment

library(AlphaSimR)
library(FieldSimR)
library(data.table)
library(dplyr)
library(ggplot2)
source("0dCross_fn.R")

#============================================# Prepare simulation parameters

# Load global parameters
source("2GlobalParams.R")

# Prepare lists to collect results
rmsd_percentiles <- c(0.1, 1, 2.5, 5, 10, 20, 50, 100)   # Stability percentiles used to assess performance 

# Set directory for simulation output
output_dir <- "simOutput"
if (!dir.exists(output_dir)) dir.create(output_dir)

# List scenario(s) to be simulated
sim_scen <- c("mod50_het") 


#=====================================================#
#Begin Simulation
#=====================================================#

for (ge_scen in sim_scen){

   cat(" Ge scenario:",ge_scen,"\n")
  
  
  # Create dataframes to record stats
  release_full <- data.frame()                        # Record the best genotypes per strategy per stability percentile
  release_top_full <- data.frame()                    # Record the average of the top 5 genotypes per strategy per stability percentile
  release_full_var <- data.frame()                    # Record  the best genotypes per strategy per stability percentile based on raw variance
  winners_full <- data.frame()                        # Record which strategy had the best genotype per stability percentile
  winners_full_var <- data.frame()                    # Record which strategy had the best genotype per stability percentile based on raw variance 
  pop_stats <- data.frame()                           # Store summaries of the population development populations
  cor_stats <- data.frame()                           # Store average genetic correlation among progeny in PD population
  pd_stats <- data.frame()                            # Store summaries in the PD population 
  
  
  # Load GEI matrix and define relevant variables 
  Ge <- readRDS(paste0("9simGe/",ge_scen,"_GEI.RDS"))
  Ce <- cov2cor(Ge)                                   # Correlation matrix
  De <- diag(diag(Ge))                                # Genetic variances within each environment

  k <- ge_params[[ge_scen]]$k                         # Rank of the GEI matrix
  k_maxfit <- ge_params[[ge_scen]]$k                  # Number of covariates used in crossing calculations
  p <- ge_params[[ge_scen]]$p                         # Number of environments sampled 
  nEnvTPE <- ge_params[[ge_scen]]$nEnvTPE             # Number of environments in the TPE (this must equal p for this simulation)
  
 
  for (Rep in 1:nReps){
   
  #---- Initialise Sim ----
  
  source("3CreateFounders.R")
  
  
  #----Burn-in Phase ----
  
  for (cycle in 1:nCyclesBurnIn){
    
     cat("Working on burnin cycle:", cycle, "\n")
     source("4BurnIn.R")                            # Run burn-in cycles
    } 
     
     burnDH <- DH
     
     print("burn in complete")
     
     # ---- Future phase ----
    
      for(cycle in (nCyclesBurnIn+1):(nCyclesBurnIn+nCyclesFuture)) {
       
        cat("Working on future cycle:", cycle, "\n")
     
      # Get rotation for the breeding population in each cycle
        source("5RotatePopulation.R")
        
        
      # Make product development crosses using different strategies on the same population,
         for(strat in ProdDevStrat){
         cat("  Working on prodDev:",strat, "cycle",cycle,"\n", "Rep", Rep, 
             " Ge scenario:",ge_scen,"\n")
          source(paste0("6ProdDev/6ProdDev_", strat, ".R"))
         }

        
      # Record the performance of product development lines with reference to the parental population
         source("7RecordPerformance.R")

        
      # Perform a cycle of population improvement 
        source("8PopImp.R")  
        
      }
  }
  
  # output the results from each ge_scen
  
  fwrite(release_top_full, file = file.path(output_dir, paste0(ge_scen, "_release_top5_sum.csv")))
  fwrite(cor_stats, file = file.path(output_dir, paste0(ge_scen, "_cor_stats.csv")))
  fwrite(pop_stats, file = file.path(output_dir, paste0(ge_scen, "_pop_stats.csv")))
  fwrite(pd_stats, file = file.path(output_dir, paste0(ge_scen, "_pd_stats.csv")))
   }


# some example plots

# Mean genetic correlation between environments
cor_df <- read.csv("10SimOutput/mod50_het_cor_stats.csv")
head(cor_df)
tt <- with(cor_df, tapply(mean_cor, list(strat, cycle), mean))
cor_df <- data.frame(select = rownames(tt),
                     cycle = rep(colnames(tt), each = 6),
                     mean.cor = c(tt))
head(cor_df)
ggplot(cor_df, aes(x = cycle, y = mean.cor, group = select, color = select)) + geom_line()

# population improvement
pop_df <- read.csv("10SimOutput/mod50_het_pop_stats.csv")
head(pop_df)
ggplot(pop_df, aes(x = cycle, y = mean.cor, group = select, color = select)) + geom_line()

# product development
pd_df <- read.csv("10SimOutput/mod50_het_pd_stats.csv")
head(pd_df)
ggplot(pd_df, aes(x = cycle, y = mean.cor, group = select, color = select)) + geom_line()

# release varieties
release_df <- read.csv("10SimOutput/mod50_het_release_top5_sum.csv")
head(release_df)
ggplot(release_df, aes(x = cycle, y = mean.cor, group = select, color = select)) + geom_line()


# script end

