# ---- Load necessary libraries ---- #
rm(list = ls())  # Clean up the environment
set.seed(12)
library(AlphaSimR)
library(FieldSimR)
library(data.table)
library(dplyr)
source(paste0("seltools/drotate.R"))
source(paste0("seltools/dcross.R"))
#============================================# Prepare simulation parameters
# Load global parameters
source("GlobalParam.R")
# Load GEI
# Bayer situation
ge_params <- list(
# 20 environments
low20_het = list(k = 7, k_maxfit = 5, p = 20, nEnvTPE = 20),
mod20_het = list(k = 7, k_maxfit = 5, p = 20, nEnvTPE = 20),
high20_het = list(k = 7, k_maxfit = 5, p = 20, nEnvTPE = 20),
# 50 environments
low50_het = list(k = 7, k_maxfit = 5, p = 50, nEnvTPE = 50),
mod50_het = list(k = 7, k_maxfit = 5, p = 50, nEnvTPE = 50),
high50_het = list(k = 7, k_maxfit = 5, p = 50, nEnvTPE = 50))
# Prepare lists to collect results
rmsd_percentiles <- c(0.1,1,2.5,5,10, 20, 50, 100)   # rmsd levels to calculate main effects
output_dir <- "simOutput"
if (!dir.exists(output_dir)) dir.create(output_dir)
#=====================================================#
#Begin Simulation
#=====================================================#
for(ge_scen in names(ge_params)){
cat(" Ge scenario:",ge_scen,"\n")
# Create dataframes to record stats
release_full <- data.frame()
release_top_full <- data.frame()
release_full_var <- data.frame()
winners_full <- data.frame()
winners_full_var <- data.frame()
pop_stats <- data.frame()
cor_stats <- data.frame()
pd_stats <- data.frame()
Ge <- readRDS(paste0("simGe/",ge_scen,"_GEI.RDS"))
Ce <- cov2cor(Ge)
De <- diag(diag(Ge))
k <- ge_params[[ge_scen]]$k
k_maxfit <- ge_params[[ge_scen]]$k
p <- ge_params[[ge_scen]]$p
nEnvTPE <- ge_params[[ge_scen]]$nEnvTPE
for (Rep in 1:nReps){
#----Create Initial Parents ----
source("CreateFounders.R")
covs_tpe    <- input_asr$cov.mat              # store covariates for base population
#----Fill the breeding pipeline ----
source("FillPipeline.R")
#----Burn-in Phase ----
for (cycle in 1:nCyclesBurnIn){
source("BurnIn.R")                            # Run burn-in cycles
}
burnDH <- DH
burnF1 <- F1
print("burn in complete")
# ---- Future phase ----
for(cycle in (nCyclesBurnIn+1):(nCyclesBurnIn+nCyclesFuture)) {
cat("Working on future cycle:",cycle,"\n")
# Get rotation for the population in each cycle
source("RotatePopulation.R")
# Make product development crosses using different strategies on the same population,
for(strat in ProdDevStrat){
cat("  Working on prodDev:",strat, "cycle",cycle,"\n", "Rep", Rep,
" Ge scenario:",ge_scen,"\n")
source(paste0(strat, "ProdDev.R"))
}
# Summarise results
source("RecordBestLinesRotPiPd.R")
source("RecordGain.R")
# Perform a cycle of population improvement
source("PopImp.R")
for (p in rmsd_percentiles) {
cat("\nPercentile:", p, "\n")
print(summary(as.factor(winners_full$strat[winners_full$rmsd_percentile == p])))
}
}
}
fwrite(release_full, file = file.path(output_dir, paste0(ge_scen, "_release_sum.csv")))
fwrite(release_top_full, file = file.path(output_dir, paste0(ge_scen, "_release_top5_sum.csv")))
fwrite(release_full_var, file = file.path(output_dir, paste0(ge_scen, "_release_var_sum.csv")))
fwrite(winners_full, file = file.path(output_dir, paste0(ge_scen, "_winners_sum.csv")))
fwrite(winners_full_var, file = file.path(output_dir, paste0(ge_scen, "_winners_var_sum.csv")))
fwrite(cor_stats, file = file.path(output_dir, paste0(ge_scen, "_cor_stats.csv")))
fwrite(pop_stats, file = file.path(output_dir, paste0(ge_scen, "_pop_stats.csv")))
fwrite(pd_stats, file = file.path(output_dir, paste0(ge_scen, "_pd_stats.csv")))
}
# ======================================================================
#
#
# Script: Simulate selection for stability using TBVs to examine patterns over time
#
#
# ======================================================================
#
# Description:
# This script is used to compare selection strategies, with the ultimate aim of releasing
# a variety that performs the best across the TPE
# We do not include MET sampling - just using BVs from the TPE
#
# This script repeats a 10 year rapid cylcing scheme across a number of different scenarios
# and records the results across them. We are aiming to get a handle on how it works
#
# ======================================================================
# ---- Load necessary libraries ---- #
rm(list = ls())  # Clean up the environment
set.seed(12)
library(AlphaSimR)
library(FieldSimR)
library(data.table)
library(dplyr)
source(paste0("seltools/drotate.R"))
source(paste0("seltools/dcross.R"))
#============================================# Prepare simulation parameters
# Load global parameters
source("GlobalParam.R")
# Load GEI scenarios and define parameters
# k = rank,  k_maxfit = number of factors considered for selection
# nEnvTPE = number of environments in the TPE
# p = number of environments sampled = nEnvTPE
ge_params <- list(
high50_het = list(k = 7, k_maxfit = 5, nEnvTPE = 50, p = nEnvTPE))
ge_params <- list(
high50_het = list(k = 7, k_maxfit = 5, nEnvTPE = 50, p = nEnvTPE))
ge_params <- list(
high50_het = list(k = 7, k_maxfit = 5, nEnvTPE = 50, p = 50))
# Prepare lists to collect results
rmsd_percentiles <- c(0.1,1,2.5,5,10, 20, 50, 100)   # rmsd levels to calculate main effects
output_dir <- "simOutput"
if (!dir.exists(output_dir)) dir.create(output_dir)
for(ge_scen in names(ge_params)){
cat(" Ge scenario:",ge_scen,"\n")
# Create dataframes to record stats
release_full <- data.frame()
release_top_full <- data.frame()
release_full_var <- data.frame()
winners_full <- data.frame()
winners_full_var <- data.frame()
pop_stats <- data.frame()
cor_stats <- data.frame()
pd_stats <- data.frame()
Ge <- readRDS(paste0("simGe/",ge_scen,"_GEI.RDS"))
Ce <- cov2cor(Ge)
De <- diag(diag(Ge))
k <- ge_params[[ge_scen]]$k
k_maxfit <- ge_params[[ge_scen]]$k
p <- ge_params[[ge_scen]]$p
nEnvTPE <- ge_params[[ge_scen]]$nEnvTPE
for (Rep in 1:nReps){
#----Create Initial Parents ----
source("CreateFounders.R")
covs_tpe    <- input_asr$cov.mat              # store covariates for base population
#----Fill the breeding pipeline ----
source("FillPipeline.R")
#----Burn-in Phase ----
for (cycle in 1:nCyclesBurnIn){
source("BurnIn.R")                            # Run burn-in cycles
}
burnDH <- DH
burnF1 <- F1
print("burn in complete")
# ---- Future phase ----
for(cycle in (nCyclesBurnIn+1):(nCyclesBurnIn+nCyclesFuture)) {
cat("Working on future cycle:",cycle,"\n")
# Get rotation for the population in each cycle
source("RotatePopulation.R")
# Make product development crosses using different strategies on the same population,
for(strat in ProdDevStrat){
cat("  Working on prodDev:",strat, "cycle",cycle,"\n", "Rep", Rep,
" Ge scenario:",ge_scen,"\n")
source(paste0(strat, "ProdDev.R"))
}
# Summarise results
source("RecordBestLinesRotPiPd.R")
source("RecordGain.R")
# Perform a cycle of population improvement
source("PopImp.R")
for (p in rmsd_percentiles) {
cat("\nPercentile:", p, "\n")
print(summary(as.factor(winners_full$strat[winners_full$rmsd_percentile == p])))
}
}
}
fwrite(release_full, file = file.path(output_dir, paste0(ge_scen, "_release_sum.csv")))
fwrite(release_top_full, file = file.path(output_dir, paste0(ge_scen, "_release_top5_sum.csv")))
fwrite(release_full_var, file = file.path(output_dir, paste0(ge_scen, "_release_var_sum.csv")))
fwrite(winners_full, file = file.path(output_dir, paste0(ge_scen, "_winners_sum.csv")))
fwrite(winners_full_var, file = file.path(output_dir, paste0(ge_scen, "_winners_var_sum.csv")))
fwrite(cor_stats, file = file.path(output_dir, paste0(ge_scen, "_cor_stats.csv")))
fwrite(pop_stats, file = file.path(output_dir, paste0(ge_scen, "_pop_stats.csv")))
fwrite(pd_stats, file = file.path(output_dir, paste0(ge_scen, "_pd_stats.csv")))
}
warnings()
# ---- Load necessary libraries ---- #
rm(list = ls())  # Clean up the environment
set.seed(12)
library(AlphaSimR)
library(FieldSimR)
library(data.table)
library(dplyr)
source(paste0("seltools/drotate.R"))
source(paste0("seltools/dcross.R"))
#============================================# Prepare simulation parameters
# Load global parameters
source("GlobalParam.R")
# Load GEI scenarios and define parameters
# k = rank,  k_maxfit = number of factors considered for selection
# nEnvTPE = number of environments in the TPE
# p = number of environments sampled = nEnvTPE
ge_params <- list(
high50_het = list(k = 7, k_maxfit = 5, nEnvTPE = 50, p = 50))
# Prepare lists to collect results
rmsd_percentiles <- c(0.1,1,2.5,5,10, 20, 50, 100)   # rmsd levels to calculate main effects
output_dir <- "simOutput"
if (!dir.exists(output_dir)) dir.create(output_dir)
#=====================================================#
#Begin Simulation
#=====================================================#
for(ge_scen in names(ge_params)){
cat(" Ge scenario:",ge_scen,"\n")
# Create dataframes to record stats
release_full <- data.frame()
release_top_full <- data.frame()
release_full_var <- data.frame()
winners_full <- data.frame()
winners_full_var <- data.frame()
pop_stats <- data.frame()
cor_stats <- data.frame()
pd_stats <- data.frame()
# Load GEI
Ge <- readRDS(paste0("simGe/",ge_scen,"_GEI.RDS"))
Ce <- cov2cor(Ge)
De <- diag(diag(Ge))
k <- ge_params[[ge_scen]]$k
k_maxfit <- ge_params[[ge_scen]]$k
p <- ge_params[[ge_scen]]$p
nEnvTPE <- ge_params[[ge_scen]]$nEnvTPE
for (Rep in 1:nReps){
#----Create Initial Parents ----
source("CreateFounders.R")
covs_tpe    <- input_asr$cov.mat              # store covariates for base population
#----Fill the breeding pipeline ----
source("FillPipeline.R")
#----Burn-in Phase ----
for (cycle in 1:nCyclesBurnIn){
source("BurnIn.R")                            # Run burn-in cycles
}
burnDH <- DH
burnF1 <- F1
print("burn in complete")
# ---- Future phase ----
for(cycle in (nCyclesBurnIn+1):(nCyclesBurnIn+nCyclesFuture)) {
cat("Working on future cycle:",cycle,"\n")
# Get rotation for the population in each cycle
source("RotatePopulation.R")
# Make product development crosses using different strategies on the same population,
for(strat in ProdDevStrat){
cat("  Working on prodDev:",strat, "cycle",cycle,"\n", "Rep", Rep,
" Ge scenario:",ge_scen,"\n")
source(paste0(strat, "ProdDev.R"))
}
# Record the best product development lines with reference to the parental population
source("RecordBestLinesRotPiPd.R")
source("RecordGain.R")
# Perform a cycle of population improvement
source("PopImp.R")
for (p in rmsd_percentiles) {
cat("\nPercentile:", p, "\n")
print(summary(as.factor(winners_full$strat[winners_full$rmsd_percentile == p])))
}
}
}
# output the results from each ge_scen
fwrite(release_full, file = file.path(output_dir, paste0(ge_scen, "_release_sum.csv")))
fwrite(release_top_full, file = file.path(output_dir, paste0(ge_scen, "_release_top5_sum.csv")))
fwrite(release_full_var, file = file.path(output_dir, paste0(ge_scen, "_release_var_sum.csv")))
fwrite(winners_full, file = file.path(output_dir, paste0(ge_scen, "_winners_sum.csv")))
fwrite(winners_full_var, file = file.path(output_dir, paste0(ge_scen, "_winners_var_sum.csv")))
fwrite(cor_stats, file = file.path(output_dir, paste0(ge_scen, "_cor_stats.csv")))
fwrite(pop_stats, file = file.path(output_dir, paste0(ge_scen, "_pop_stats.csv")))
fwrite(pd_stats, file = file.path(output_dir, paste0(ge_scen, "_pd_stats.csv")))
}
setwd("~/Documents/bayer_project/properSim/paper_sim/final_sim/for_sven/simGe")
# We only accept matrices with mean correlations of 0.2, 0.5, 0.8 +/- 0.005
# With heterogenous genetic variance we constrain it to be mean variance of 1
#
#===============================================#
rm(list = ls())
set.seed(97)
library(AlphaSimR)
library(FieldSimR)
library(data.table)
#-----------------------------------------------------------#
# Function for sampling genetic variances with guarented mean variance
rvar_diag_gamma <- function(n_env, alpha, mean = 1) {
# Step 1: Draw n_env independent Gamma(alpha, 1) values
x <- rgamma(n_env, shape = alpha, scale = 1)
# Step 2: Normalize so their sample mean = 1
x_norm <- n_env * x / sum(x)
# Step 3: Rescale so the target mean variance = `mean`
vars <- x_norm * mean
# Step 4: Put variances on the diagonal of a matrix
diag(vars)
}
# We will use genetic variance = 1 across all environments
# high = 0.2, mod = 0.5, low = 0.2, tolerance = +/- 0.005
# ------------------------------ Create small Ge matrices (<5 environments)
# 2 environments
low2 <- matrix(data = c(1,0.8,0.8,1), ncol = 2)
mod2 <- matrix(data = c(1,0.5,0.5,1), ncol = 2)
high2 <- matrix(data = c(1,0.2,0.2,1), ncol = 2)
de2 <- matrix(c(0.8,0,0,1.2), ncol = 2)
low2_het <- sqrt(de2) %*% low2 %*% sqrt(de2)
mod2_het <- sqrt(de2) %*% mod2 %*% sqrt(de2)
high2_het <- sqrt(de2) %*% high2 %*% sqrt(de2)
# 3 environments
low3 <- matrix(data = c(1,0.8,0.65,
0.8, 1, 0.95,
0.65, 0.95, 1), ncol = 3)
mod3 <- matrix(data = c(1, 0.65, 0.35,
0.65, 1, 0.50,
0.35, 0.50, 1), ncol = 3)
high3 <- matrix(data = c(1, 0.20, 0,
0.20, 1, 0.4,
0, 0.4, 1), ncol = 3)
de3 <- matrix(c(0.7,0,0,
0,1,0,
0,0,1.3), ncol = 3)
summary(cov2cor(high3)[upper.tri(high3)])
summary(cov2cor(mod3)[upper.tri(mod3)])
summary(cov2cor(low3)[upper.tri(low3)])
low3_het <- sqrt(de3) %*% low3 %*% sqrt(de3)
mod3_het <- sqrt(de3) %*% mod3 %*% sqrt(de3)
high3_het <- sqrt(de3) %*% high3 %*% sqrt(de3)
# 4 environments
low4 <- matrix(data = c(1, 0.6, 0.80, 0.9,
0.6, 1, 0.7, 0.85,
0.80, 0.7, 1, 0.95,
0.9, 0.85, 0.95, 1), ncol = 4)
mod4 <- matrix(data = c(1, 0.51, 0.46, 0.23,
0.51, 1, 0.80, 0.4,
0.46, 0.80, 1, 0.6,
0.23, 0.4, 0.6, 1), ncol = 4)
high4 <- matrix(c(
1.00,  0.50, 0,  0.23,
0.50,  1.00, 0.18,  0.44,
0, 0.18,  1.00, -0.15,
0.23,  0.44, -0.15,  1.00), nrow = 4, byrow = TRUE)
summary(low4[upper.tri(low4)])
summary(mod4[upper.tri(mod4)])
summary(high4[upper.tri(high4)])
de4 <- matrix(c(0.5,0,0,0,
0,0.8,0,0,
0,0,1.2,0,
0,0,0,1.5), ncol = 4)
low4_het <- sqrt(de4) %*% low4 %*% sqrt(de4)
mod4_het <- sqrt(de4) %*% mod4 %*% sqrt(de4)
high4_het <- sqrt(de4) %*% high4 %*% sqrt(de4)
# Write Ge matrices
saveRDS(low2, file = "low2_GEI.RDS")
saveRDS(mod2, file = "mod2_GEI.RDS")
saveRDS(high2, file = "high2_GEI.RDS")
saveRDS(low2_het, file = "low2_het_GEI.RDS")
saveRDS(mod2_het, file = "mod2_het_GEI.RDS")
saveRDS(high2_het, file = "high2_het_GEI.RDS")
saveRDS(low3, file = "low3_GEI.RDS")
saveRDS(mod3, file = "mod3_GEI.RDS")
saveRDS(high3, file = "high3_GEI.RDS")
saveRDS(low3_het, file = "low3_het_GEI.RDS")
saveRDS(mod3_het, file = "mod3_het_GEI.RDS")
saveRDS(high3_het, file = "high3_het_GEI.RDS")
saveRDS(low4, file = "low4_GEI.RDS")
saveRDS(mod4, file = "mod4_GEI.RDS")
saveRDS(high4, file = "high4_GEI.RDS")
saveRDS(low4_het, file = "low4_het_GEI.RDS")
saveRDS(mod4_het, file = "mod4_het_GEI.RDS")
saveRDS(high4_het, file = "high4_het_GEI.RDS")
#-----------------------------------------------------------#
#-----------------------------------------------------------#
#-----------------------------------------------------------#
# Create a list of parameters for the different  scenarios
# Baseline correlation parameters
rho_low <- 0.79
rho_mod <- 0.43
rho_high <- 0.075
tolerance <- 0.001 # only accept matrices with mean correlation within target +/- tolerance
gam_val <- -0.4    # gamma value
cov_params <- list(
low5_het = list(rho = rho_low, k = 5, gamma = gam_val, scale = 1, p = 5, target = 0.8),
mod5_het = list(rho = rho_mod, k = 5, gamma = gam_val, scale = 1, p = 5, target = 0.5),
high5_het = list(rho = rho_high, k = 5, gamma = gam_val, scale = 1, p = 5, target = 0.5),
low10_het = list(rho = rho_low, k = 5, gamma = gam_val, scale = 1, p = 10, target = 0.8),
mod10_het = list(rho = rho_mod, k = 5, gamma = gam_val, scale = 1, p = 10, target = 0.5),
high10_het = list(rho = rho_high, k = 5, gamma = gam_val, scale = 1, p = 10, target = 0.2),
low20_het = list(rho = rho_low, k = 7, gamma = gam_val, scale = 1, p = 20, target = 0.8),
mod20_het= list(rho = rho_mod, k = 7, gamma = gam_val, scale = 1, p = 20, target = 0.5),
high20_het = list(alpha = 1.5, beta = 1, rho = rho_high, k = 7, gamma = gam_val, scale = 1, p = 20, target = 0.2),
low50_het = list(rho = rho_low, k = 7, gamma = gam_val, scale = 1, p = 50, target = 0.8),
mod50_het = list(rho = rho_mod, k = 7, gamma = gam_val, scale = 1, p = 50, target = 0.5),
high50_het = list(rho = rho_high, k = 7, gamma = gam_val, scale = 1, p = 50, target = 0.2),
low100_het = list(rho = rho_low, k = 7, gamma = gam_val, scale = 1, p = 100, target = 0.8),
mod100_het = list(rho = rho_mod, k = 7, gamma = gam_val, scale = 1, p = 100, target = 0.5),
high100_het= list(rho = rho_high, k = 7, gamma = gam_val, scale = 1, p = 100, target = 0.2)
)
#--- Simulate the GEI matrices
GeMats <- list()
for (i in names(cov_params)) {
cat("Running scenario:", i, "\n")
par <- cov_params[[i]]
# Generate De
if (par$scale == 0) {
De <- diag(1, par$p)
} else {
De <- rvar_diag_gamma(n_env = par$p, alpha = 10, mean = 1)
}
# Generate Ce
ok <- FALSE
while (!ok) {
# --- simulate your Ce here ---
Ce <- struc_cor_mat(n = par$p, base.cor = par$rho, rank = par$k, skew = par$gamma)
# compute mean off-diagonal
m <- mean(Ce[upper.tri(Ce)])
# check if tolerance for mean correlation is met
if (abs(m - par$target) <= tolerance) {
ok <- TRUE
}
}
# Heterogeneous matrix
Ge_het <- sqrt(De) %*% Ce %*% sqrt(De)
GeMats[[i]] <- Ge_het
saveRDS(Ge_het, file = paste0(i, "_GEI.RDS"))
# Homogeneous matrix
base_name <- gsub("_het", "", i)
GeMats[[base_name]] <- Ce
saveRDS(Ce, file = paste0(base_name, "_GEI.RDS"))
}
#-----------------------------------------------------------#
#-----------------------------------------------------------#
#-----------------------------------------------------------#
# Summarise the different Ge matrices
# Summary of covariance matrices
for (i in names(GeMats)){
print(cbind((summary(GeMats[[i]][upper.tri(GeMats[[i]])])),
i ))
}
# Summary of correlation matrices
for (i in names(GeMats)){
print(cbind( summary(cov2cor(GeMats[[i]])[upper.tri(cov2cor(GeMats[[i]]))]),
i ))
}
# Summary of variances within environments
for (i in names(GeMats)){
print(mean(diag(GeMats[[i]]), main = i, breaks = 50))
}
#---------------# make histogram plots
par(mfrow = c(3,3))
for (i in names(cov_params)){
hist(cov2cor(GeMats[[i]])[upper.tri(cov2cor(GeMats[[i]]))], main = as.character(i))
abline(v = mean(cov2cor(GeMats[[i]])[upper.tri(cov2cor(GeMats[[i]]))]), lwd = 4)
}
#---------------# Investigate eigenvalues
get_eig_prop <- function(mat) {
eig_vals <- eigen(mat, symmetric = TRUE)$values
eig_vals <- eig_vals[eig_vals > 1e-07]  # remove zero or negative eigenvalues
eig_vals / sum(eig_vals)
}
