R-4.5.2 
SOFTWARE REQUIREMENTS
These scripts were developed and tested in R version 4.2.0+. 
The following libraries are required:
- terra (Spatial data handling)
- biomod2 (Species Distribution Modeling)
- humboldt (Niche overlap and divergence tests)
- dplyr, tidyr (Data manipulation)

# ==============================================================================
# STRATEGIC ENSEMBLE SPECIES DISTRIBUTION MODELING (SDM)
# ==============================================================================
# Purpose: This script implements a Strategic Pseudo-Absence (PA) selection 
# protocol. PAs are restricted to extralimital regions (Mexico and Arctic) to 
# better define the species' climatic boundaries within a biomod2 framework.
# ==============================================================================

library(biomod2)
library(terra)
library(dplyr)
library(ggplot2)

# --- 1. GLOBAL DIRECTORY & PATH SETUP ---
base_path <- "C:/DATA"
path_vars <- file.path(base_path, "Variables_Lc")
path_occ  <- file.path(base_path, "Occurrences/Lc.csv")
path_out  <- file.path(base_path, "OUTPUTS")

if(!dir.exists(path_out)) dir.create(path_out, recursive = TRUE)
setwd(path_out)

myRespName <- "Lynxcanadensis"

# --- 2. DATA LOADING & PRESENCE FILTERING ---
message(">>> Loading environmental layers and presence records...")
my_expl <- rast(list.files(path_vars, pattern = '.tif$', full.names = TRUE))
occ_data <- read.csv(path_occ)

# Extract presence-only coordinates
pres_data <- occ_data %>% filter(get(myRespName) == 1)
pres_xy   <- pres_data[, c("X_WGS84", "Y_WGS84")]
pres_resp <- rep(1, nrow(pres_xy))

# --- 3. STRATEGIC PSEUDO-ABSENCE (PA) GENERATION ---
# Rationale: We intentionally select background points from regions where the 
# species is bioclimatically excluded (Mexico < 32°N and High Arctic > 68°N) 
# to force the model to identify hard climatic thresholds.

message(">>> Generating Strategic User-Defined PA points...")
# Extract non-NA cell coordinates from the first raster layer
all_cells <- as.data.frame(crds(my_expl[[1]], df = TRUE, na.rm = TRUE))

# Filter candidates based on the specified Lat/Long thresholds (Strategic Logic)
pa_candidates <- all_cells[all_cells$y < 32 | 
                             all_cells$y > 68 | 
                             (all_cells$y < 35 & all_cells$x > -95), ]

# Randomly sample 10,000 points from the strategic candidate pool
set.seed(42)
pa_sample <- pa_candidates[sample(nrow(pa_candidates), min(10000, nrow(pa_candidates))), ]
pa_xy <- setNames(pa_sample, c("X_WGS84", "Y_WGS84"))

# Combine Presence and Pseudo-Absence into a unified dataset
all_coords <- rbind(pres_xy, pa_xy)
all_resp   <- c(pres_resp, rep(NA, nrow(pa_xy))) # PAs marked as NA for user-defined selection

# Create the PA selection table for biomod2
PA_table <- data.frame(PA1 = rep(TRUE, length(all_resp)))

# --- 4. DATA FORMATTING & MODEL CALIBRATION ---
message(">>> Initializing BIOMOD2 Modeling...")

myBiomodData <- BIOMOD_FormatingData(
  resp.var      = all_resp,
  expl.var      = my_expl,
  resp.xy       = all_coords,
  resp.name     = myRespName,
  PA.strategy   = 'user.defined', # Using our strategic PA points
  PA.user.table = PA_table,
  filter.raster = TRUE
)

# Individual Model Calibration (RF, GLM, MAXNET, GAM, FDA)
myBiomodModelOut <- BIOMOD_Modeling(
  bm.format   = myBiomodData,
  modeling.id = 'Lc_Current_SDM',
  models      = c('RF', 'GLM', 'MAXNET', 'GAM', 'FDA'),
  CV.strategy = 'random',
  CV.nb.rep   = 2,    
  CV.perc     = 0.8,
  metric.eval = c('TSS', 'ROC'),
  var.import  = 2,   
  nb.cpu      = 6
)

# --- 5. ENSEMBLE MODELING ---
# Rationale: Ensemble produced using EMmean and EMmedian for robust consensus.
myBiomodEM <- BIOMOD_EnsembleModeling(
  bm.mod               = myBiomodModelOut,
  models.chosen        = 'all',
  em.by                = 'all',
  metric.select        = 'TSS',
  metric.select.thresh = 0.8, 
  metric.eval          = c('TSS', 'ROC'),
  em.algo              = c('EMwmean')
)

# --- 6. EXPORTING EVALUATION METRICS ---
message(">>> Exporting evaluation metrics and variable importance...")
write.csv(get_evaluations(myBiomodModelOut), "Lc_Ind_Eval.csv", row.names = FALSE)
write.csv(get_variables_importance(myBiomodModelOut), "Lc_Ind_VarImp.csv", row.names = FALSE)
write.csv(get_evaluations(myBiomodEM), "Lc_Ens_Eval.csv", row.names = FALSE)

# --- 7. SPATIAL OUTPUTS (ENSEMBLE FORECASTING) ---
message(">>> Generating final suitability TIFs...")

myBiomodEF_Current <- BIOMOD_EnsembleForecasting(
  bm.em               = myBiomodEM,
  proj.name           = 'Current',
  new.env             = my_expl,
  models.chosen       = 'all',
  metric.binary       = 'TSS',      # Automated BIN (Presence/Absence) TIF
  metric.filter       = 'TSS',      # Threshold-filtered TIF
  build.clamping.mask = FALSE       # Excluding clamping masks to keep directory clean
)

# Extract and save the final Ensemble Mean (EMwmean) raster
current_ensemble_raster <- get_predictions(myBiomodEF_Current)
writeRaster(current_ensemble_raster, 
            filename = file.path(path_out, "Lc_Current_Ensemble_Final.tif"), 
            overwrite = TRUE, 
            datatype = 'INT2S')

message(">>> SDM PROCESS COMPLETE. All outputs stored in: ", path_out)


















# ==============================================================================
# STRATEGIC ENSEMBLE SPECIES DISTRIBUTION MODELING (SDM) - BOBCAT (Lr)
# ==============================================================================
# Purpose: This script implements the "Deep North" Strategic PA selection 
# protocol for L. rufus. PAs are restricted to high-latitude regions (55N-72N) 
# to define the hard northern climatic boundaries of the species.
# ==============================================================================

library(biomod2)
library(terra)
library(dplyr)
library(ggplot2)

# --- 1. GLOBAL DIRECTORY & PATH SETUP ---
base_path <- "C:/DATA"
path_vars <- file.path(base_path, "Variables_Lr")
path_occ  <- file.path(base_path, "Occurrences/Lr.csv")
path_out  <- file.path(base_path, "OUTPUTS")

if(!dir.exists(path_out)) dir.create(path_out, recursive = TRUE)
setwd(path_out)

myRespName <- "Lynxrufus"

# --- 2. DATA LOADING & PRESENCE FILTERING ---
message(">>> Loading environmental layers and presence records for L. rufus...")
my_expl  <- rast(list.files(path_vars, pattern = '.tif$', full.names = TRUE))
occ_data <- read.csv(path_occ)

# Extract presence-only coordinates
pres_data <- occ_data %>% filter(get(myRespName) == 1)
pres_xy   <- pres_data[, c("X_WGS84", "Y_WGS84")]
pres_resp <- rep(1, nrow(pres_xy))

# --- 3. STRATEGIC "DEEP NORTH" PA GENERATION ---
# Rationale: Bobcat distributions typically terminate near 50°N. We select 
# background points from 55°N to 72°N (Deep North/Boreal-Arctic transition) 
# to ensure the model captures the physiological limits of the species.

message(">>> Generating Strategic 'Deep North' PA points (55N - 72N)...")
# Extract non-NA cell coordinates from the first raster layer
all_cells <- as.data.frame(crds(my_expl[[1]], df = TRUE, na.rm = TRUE))

# Filter candidates: Restricted to the Deep North latitudinal band
pa_candidates <- all_cells[all_cells$y > 55 & all_cells$y < 72, ]

# Randomly sample 10,000 points from the strategic candidate pool
set.seed(42)
pa_sample <- pa_candidates[sample(nrow(pa_candidates), min(10000, nrow(pa_candidates))), ]
pa_xy <- setNames(pa_sample, c("X_WGS84", "Y_WGS84"))

# Combine Presence and Strategic Pseudo-Absence
all_coords <- rbind(pres_xy, pa_xy)
all_resp   <- c(pres_resp, rep(NA, nrow(pa_xy))) # PAs marked as NA for user-defined selection

# Create the PA selection table for biomod2
PA_table <- data.frame(PA1 = rep(TRUE, length(all_resp)))

# --- 4. DATA FORMATTING & MODEL CALIBRATION ---
message(">>> Initializing BIOMOD2 Modeling for L. rufus...")

myBiomodData <- BIOMOD_FormatingData(
  resp.var      = all_resp,
  expl.var      = my_expl,
  resp.xy       = all_coords,
  resp.name     = myRespName,
  PA.strategy   = 'user.defined', 
  PA.user.table = PA_table,
  filter.raster = TRUE
)

# Individual Model Calibration
myBiomodModelOut <- BIOMOD_Modeling(
  bm.format   = myBiomodData,
  modeling.id = 'Lr_Current_SDM',
  models      = c('RF', 'GLM', 'MAXNET', 'GAM', 'FDA'),
  CV.strategy = 'random',
  CV.nb.rep   = 2,    
  CV.perc     = 0.8,
  metric.eval = c('TSS', 'ROC'),
  var.import  = 2,   
  nb.cpu      = 6
)

# --- 5. ENSEMBLE MODELING ---
# Rationale: Ensemble produced using Weighted Mean (EMwmean) based on TSS scores.
myBiomodEM <- BIOMOD_EnsembleModeling(
  bm.mod               = myBiomodModelOut,
  models.chosen        = 'all',
  em.by                = 'all',
  metric.select        = 'TSS',
  metric.select.thresh = 0.8, 
  metric.eval          = c('TSS', 'ROC'),
  em.algo              = c('EMwmean')
)

# --- 6. EXPORTING EVALUATION METRICS ---
message(">>> Exporting metrics for L. rufus...")
write.csv(get_evaluations(myBiomodModelOut), "Lr_Ind_Eval.csv", row.names = FALSE)
write.csv(get_variables_importance(myBiomodModelOut), "Lr_Ind_VarImp.csv", row.names = FALSE)
write.csv(get_evaluations(myBiomodEM), "Lr_Ens_Eval.csv", row.names = FALSE)

# --- 7. SPATIAL OUTPUTS (ENSEMBLE FORECASTING) ---
message(">>> Generating L. rufus suitability maps...")

myBiomodEF_Current <- BIOMOD_EnsembleForecasting(
  bm.em               = myBiomodEM,
  proj.name           = 'Current',
  new.env             = my_expl,
  models.chosen       = 'all',
  metric.binary       = 'TSS',      
  metric.filter       = 'TSS',      
  build.clamping.mask = FALSE       
)

# Extract and save the final Ensemble Weighted Mean (EMwmean) raster
current_ensemble_raster <- get_predictions(myBiomodEF_Current)
writeRaster(current_ensemble_raster, 
            filename = file.path(path_out, "Lr_Current_Ensemble_Final.tif"), 
            overwrite = TRUE, 
            datatype = 'INT2S')

message(">>> SDM PROCESS COMPLETE for L. rufus. Results saved in: ", path_out)


