library(ggplot2)
# ===============================
# Load data
# ===============================
df <- read_excel("~/Dropbox/Doctorate project - VMMA/Thesis-associated_papers/P001_Sampling_design/Online_Resoures-SUPPLEMENTARY_INFORMATION/Supplementary_information_S12_Associated_Databases/Sampling_count_database.xlsx")
# ===============================
# Wide → Long
# ===============================
df_long <- df %>%
pivot_longer(
cols = c(
basins,
halfbasins,
Landcover,
SLIC,
ssample20,
ssample40,
ssample60,
ssample80
),
names_to = "Strategy",
values_to = "Num_Points"
)
# ===============================
# Normalize to percentage
# ===============================
df_long <- df_long %>%
mutate(
Percent_Used = (Num_Points / Num_Points_Total) * 100
)
# ===============================
# Strategy order
# ===============================
df_long$Strategy <- factor(
df_long$Strategy,
levels = c(
"basins",
"halfbasins",
"Landcover",
"SLIC",
"ssample20",
"ssample40",
"ssample60",
"ssample80"
)
)
# ===============================
# Plot (FACETED)
# ===============================
p <- ggplot(df_long, aes(x = Locality, y = Percent_Used)) +
# Rango min–max (muy sutil)
stat_summary(
fun.min = min,
fun.max = max,
geom = "linerange",
linewidth = 0.3,
color = "grey40"
) +
# Boxplot fino
geom_boxplot(
width = 0.18,
color = "black",
fill = "white",
linewidth = 0.35,
outlier.shape = NA
) +
# Mediana
stat_summary(
fun = median,
geom = "point",
size = 1.6,
color = "black"
) +
facet_wrap(~ Strategy, ncol = 2) +
labs(
x = "Locality",
y = "Recorder locations used (% of full grid)"
) +
theme_classic(base_family = "Times New Roman") +
theme(
strip.text = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 11),
axis.title = element_text(size = 13),
panel.spacing = unit(0.8, "lines")
)
print(p)
ggsave(
"Subsampling_efficiency_box_range.png",
p,
width = 8,
height = 10,
dpi = 300
)
# ===============================
# Libraries
# ===============================
library(readxl)
library(dplyr)
library(tidyr)
library(openxlsx)
# ===============================
# Load data
# ===============================
df <- read_excel(
"~/Dropbox/Doctorate project - VMMA/Thesis-associated_papers/P001_Sampling_design/Online_Resoures-SUPPLEMENTARY_INFORMATION/Supplementary_information_S12_Associated_Databases/Sampling_count_database.xlsx"
)
# ===============================
# Wide → Long
# ===============================
df_long <- df %>%
pivot_longer(
cols = c(
basins,
halfbasins,
Landcover,
SLIC,
ssample20,
ssample40,
ssample60,
ssample80
),
names_to = "Strategy",
values_to = "Num_Points"
)
# ===============================
# Normalize to percentage
# ===============================
df_long <- df_long %>%
mutate(
Percent_Used = (Num_Points / Num_Points_Total) * 100
)
# ===============================
# Summary table: mean & SD
# ===============================
summary_table <- df_long %>%
group_by(Locality, Strategy) %>%
summarise(
Mean_Percent = mean(Percent_Used, na.rm = TRUE),
SD_Percent   = sd(Percent_Used, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(Strategy, Locality)
# ===============================
# Export to Excel
# ===============================
output_path <- "Sampling_efficiency_summary_mean_SD.xlsx"
write.xlsx(
summary_table,
file = output_path,
overwrite = TRUE
)
cat("✅ Summary table exported to:", output_path, "\n")
# ===============================
# Export to Excel
# ===============================
output_path <- "/home/vmartinezarias/Dropbox/Doctorate project - VMMA/Thesis-associated_papers/P001_Sampling_design/Online_Resoures-SUPPLEMENTARY_INFORMATION/Supplementary_information_S12_Associated_Databases/Sampling_summary_mean_SD.xlsx"
# Load required libraries
library(ggstatsplot)
install.packages("ggstatsplot")
# Load required libraries
library(ggstatsplot)
library(dplyr)
library(readxl)
library(openxlsx)
library(future.apply)
# Set up parallel plan
plan(multisession, workers = parallel::detectCores() - 1)
# Load data (replace with appropriate file path)
data <- read_excel("~/Dropbox/Doctorate project - VMMA/Thesis-associated_papers/P001_Sampling_design/Online_Resoures-SUPPLEMENTARY_INFORMATION/Supplementary_information_S12_Associated_Databases/DF1-1_Interpolations_variograms_error_and_performance_of_subsamples.xlsx") %>%
mutate(across(c(Locality, Type, Subsample, Frequency_Range, Time_Period, Statistic, Index), as.character))
# Create duplicate with Locality = "ALL"
data_all <- data %>% mutate(Locality = "ALL")
expanded_data <- bind_rows(data, data_all)
# Get unique combinations of filters
combinations <- expanded_data %>%
distinct(Locality, Type, Frequency_Range, Time_Period, Statistic, Index, Subsample)
# Function to process each combination
process_combination <- function(filter_row) {
filtered_data <- expanded_data %>%
filter(
Locality == filter_row$Locality,
Type == filter_row$Type,
Frequency_Range == filter_row$Frequency_Range,
Time_Period == filter_row$Time_Period,
Statistic == filter_row$Statistic,
Index == filter_row$Index,
Subsample == filter_row$Subsample
)
# Check for sufficient data and variation
if (nrow(filtered_data) < 3 ||
length(unique(filtered_data$Sample_percent)) < 2 ||
length(unique(filtered_data$Subsample_performance)) < 2) return(NULL)
# Extract correlation statistics
temp_plot <- tryCatch({
ggscatterstats(
data = filtered_data,
x = Sample_percent,
y = Subsample_performance,
messages = FALSE
)
}, error = function(e) return(NULL))
if (is.null(temp_plot)) return(NULL)
stats <- tryCatch(extract_stats(temp_plot), error = function(e) return(NULL))
if (is.null(stats)) return(NULL)
subtitle_data <- stats$subtitle_data
caption_data <- stats$caption_data
# Add filtering context to results
for (col in names(filter_row)) {
subtitle_data[[col]] <- filter_row[[col]]
caption_data[[col]] <- filter_row[[col]]
}
subtitle_data$Type <- "Frequentist"
caption_data$Type <- "Bayesian"
bind_rows(subtitle_data, caption_data)
}
# Apply processing to all combinations in parallel
results_list <- future_lapply(1:nrow(combinations), function(i) {
filter_row <- combinations[i, ]
process_combination(filter_row)
}, future.seed = TRUE)
library(shiny)
library(ggplot2)
library(readxl)
library(dplyr)
ui <- fluidPage(
titlePanel("Distribution of Pearson Coefficient (Frequentist)"),
sidebarLayout(
sidebarPanel(
fileInput("archivo", "Upload Excel file", accept = c(".xlsx")),
selectInput("variable_categorica", "Categorical variable to compare:",
choices = c("Locality", "Frequency_Range", "Time_Period", "Statistic", "Index", "Subsample", "Type")),
numericInput("axis_title_size", "Axis title font size:", value = 14, min = 8, max = 30),
numericInput("axis_text_size", "Axis text font size:", value = 12, min = 6, max = 30),
downloadButton("descargar_pdf", "Download plot as PDF"),
uiOutput("filtros_adicionales")
),
mainPanel(
plotOutput("grafico_violin", height = "600px"),
tableOutput("tabla_filtrada")
)
)
)
server <- function(input, output, session) {
# Read uploaded file
datos <- reactive({
req(input$archivo)
read_excel(input$archivo$datapath) %>%
filter(Tipo == "Frequentist") %>%
filter(!is.na(estimate))
})
# Dynamic additional filters
output$filtros_adicionales <- renderUI({
req(datos())
other_columns <- setdiff(c("Locality", "Frequency_Range", "Time_Period", "Statistic", "Index", "Subsample", "Type"), input$variable_categorica)
lapply(other_columns, function(col) {
selectInput(
inputId = paste0("filtro_", col),
label = col,
choices = c("All", unique(datos()[[col]])),
selected = "All"
)
})
})
# Filter data based on selected inputs
datos_filtrados <- reactive({
req(datos())
df <- datos()
other_columns <- setdiff(c("Locality", "Frequency_Range", "Time_Period", "Statistic", "Index", "Subsample", "Type"), input$variable_categorica)
for (col in other_columns) {
val <- input[[paste0("filtro_", col)]]
if (!is.null(val) && val != "All") {
df <- df %>% filter(.data[[col]] == val)
}
}
df
})
# Generate violin plot
plot_violines <- reactive({
df <- datos_filtrados()
req(nrow(df) > 0)
ggplot(df, aes(x = .data[[input$variable_categorica]], y = estimate)) +
geom_violin(fill = "skyblue", alpha = 0.7, color = NA) +
# geom_jitter(width = 0.15, alpha = 0.5, size = 1.5, color = "black") +
labs(x = input$variable_categorica, y = "Pearson Estimate") +
theme_minimal() +
theme(
axis.title = element_text(size = input$axis_title_size),
axis.text = element_text(size = input$axis_text_size),
plot.title = element_blank()
)
})
output$grafico_violin <- renderPlot({
plot_violines()
})
# Show filtered table
output$tabla_filtrada <- renderTable({
datos_filtrados()
})
# Download button
output$descargar_pdf <- downloadHandler(
filename = function() {
paste0("violin_plot_pearson_", Sys.Date(), ".pdf")
},
content = function(file) {
ggsave(file, plot = plot_violines(), device = "pdf", width = 10, height = 6)
}
)
}
shinyApp(ui = ui, server = server)
# Load required libraries
library(dplyr)
library(ggplot2)
library(readxl)
library(ggpubr)
library(rstatix)
library(FSA)
library(writexl)
install.packages(c("ggpubr","rstatix","FSA"))
# Load required libraries
library(dplyr)
library(ggplot2)
library(readxl)
library(ggpubr)
library(rstatix)
library(FSA)
library(writexl)
# Load data (replace path accordingly)
df <- read_excel("~/Dropbox/Doctorate project - VMMA/Thesis-associated_papers/P001_Sampling_design/Online_Resoures-SUPPLEMENTARY_INFORMATION/Supplementary_information_S12_Associated_Databases/DF1-1_Interpolations_variograms_error_and_performance_of_subsamples.xlsx") %>%
mutate(across(c(Subsample, Type, Index, Statistic), as.character)) %>%
mutate(Pearson_rel = Pearson_Correlation / Sample_percent)
# Load data (replace path accordingly)
df <- read_excel("~/Dropbox/Doctorate project - VMMA/Thesis-associated_papers/P001_Sampling_design/Online_Resoures-SUPPLEMENTARY_INFORMATION/Supplementary_information_S12_Associated_Databases/DF1-1_Interpolations_variograms_error_and_performance_of_subsamples.xlsx") %>%
mutate(across(c(Subsample, Type, Index, Statistic), as.character)) %>%
mutate(Pearson_rel = Subsample_performance / Sample_percent)
# Loop through each summary statistic type
for (stat in c("mean", "median")) {
df_tess <- df %>%
filter(Type == "Tessells", Statistic == stat, !is.na(Subsample_performance))
# Summary statistics of precision and efficiency
summary_precision <- df_tess %>%
group_by(Subsample) %>%
summarise(
n = n(),
mean_Pearson = mean(Subsample_performance, na.rm = TRUE),
median_Pearson = median(Subsample_performance, na.rm = TRUE),
sd_Pearson = sd(Subsample_performance, na.rm = TRUE),
mean_eff = mean(Pearson_rel, na.rm = TRUE),
median_eff = median(Pearson_rel, na.rm = TRUE),
sd_eff = sd(Pearson_rel, na.rm = TRUE),
cv_eff = sd_eff / abs(mean_eff)
) %>%
arrange(desc(median_eff))
# Shapiro-Wilk normality test (only for groups with n >= 3)
valid <- df_tess %>%
group_by(Subsample) %>%
summarise(n = n()) %>%
filter(n >= 3) %>%
pull(Subsample)
normality_df <- df_tess %>%
filter(Subsample %in% valid) %>%
group_by(Subsample) %>%
shapiro_test(Subsample_performance)
# Precision plot
precision_plot <- ggplot(df_tess, aes(x = Subsample, y = Subsample_performance, fill = Subsample)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.2, outlier.shape = NA) +
theme_minimal() +
labs(title = paste("Strategy precision (Pearson) -", stat), y = "Pearson Correlation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(limits = c(-1, 1), oob = scales::squish)
ggsave(
paste0("#/Tessellation_KW_Precision_", stat, ".pdf"),
plot = precision_plot,
width = 8, height = 6, units = "in"
)
# Efficiency plot:
efficiency_plot <- ggplot(df_tess, aes(x = Subsample, y = Pearson_rel, fill = Subsample)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.2, outlier.shape = NA) +
theme_minimal() +
labs(title = paste("Strategy efficiency (Pearson / Sample%) -", stat), y = "Relative Pearson") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave(
paste0("#/Tessellation_KW_Efficiency_", stat, ".pdf"),
plot = efficiency_plot,
width = 8, height = 6, units = "in"
)
# Kruskal–Wallis tests and post-hoc comparisions:
kw_pearson <- kruskal_test(Subsample_performance ~ Subsample, data = df_tess)
posthoc_pearson <- dunn_test(Subsample_performance ~ Subsample, data = df_tess, p.adjust.method = "BH")
kw_eff <- kruskal_test(Pearson_rel ~ Subsample, data = df_tess)
posthoc_eff <- dunn_test(Pearson_rel ~ Subsample, data = df_tess, p.adjust.method = "BH")
# Export results to Excel
write_xlsx(
list(
Summary_Statistics = summary_precision,
Normality_Test = normality_df,
KW_Pearson = as.data.frame(kw_pearson),
PostHoc_Pearson = as.data.frame(posthoc_pearson),
KW_Efficiency = as.data.frame(kw_eff),
PostHoc_Efficiency = as.data.frame(posthoc_eff)
),
path = paste0("#/KW_Tessellation_Strategies_", stat, ".xlsx")
)
}
# Load data (replace path accordingly)
df <- read_excel("/Dropbox/Doctorate project - VMMA/Thesis-associated_papers/P001_Sampling_design/Online_Resoures-SUPPLEMENTARY_INFORMATION/Supplementary_information_S12_Associated_Databases/DF1-1_Interpolations_variograms_error_and_performance_of_subsamples.xlsx") %>%
mutate(across(c(Subsample, Type, Index, Statistic), as.character)) %>%
mutate(Pearson_rel = Subsample_performance / Sample_percent)
ggsave(
paste0("#/Tessellation_KW_Precision_", stat, ".pdf"),
plot = precision_plot,
width = 8, height = 6, units = "in"
)
ggsave(
paste0("/Tessellation_KW_Precision_", stat, ".pdf"),
plot = precision_plot,
width = 8, height = 6, units = "in"
)
# Precision plot
precision_plot <- ggplot(df_tess, aes(x = Subsample, y = Subsample_performance, fill = Subsample)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.2, outlier.shape = NA) +
theme_minimal() +
labs(title = paste("Strategy precision (Pearson) -", stat), y = "Pearson Correlation") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(limits = c(-1, 1), oob = scales::squish)
precision_plot
Statistic <- "mean"  # Set to "mean" or "median"
# Load data (replace path with your own)
data <- read_excel("~/Dropbox/Doctorate project - VMMA/Thesis-associated_papers/P001_Sampling_design/Online_Resoures-SUPPLEMENTARY_INFORMATION/Supplementary_information_S12_Associated_Databases/DF1-1_Interpolations_variograms_error_and_performance_of_subsamples.xlsx")
# Filter based on combined conditions
filtered_data <- data %>%
filter(
(Index %in% c("BI", "NDSI") & Statistic == Statistic) |
(Index %in% c("ACItf", "ACIft", "NP") &
Frequency_Range == "R0-24" & Statistic == "mean")
) %>%
mutate(
Subsample_Color = case_when(
Subsample == "SLIC" ~ "SLIC",
Subsample == "halfbasins" ~ "HalfBasins",
Subsample == "basins" ~ "Basins",
Subsample == "Landcover" ~ "Landcover",
TRUE ~ "Random"
)
)
# Add synthetic category "ALL"
data_all <- filtered_data %>%
mutate(Locality = "ALL")
# Merge both datasets
complete_data <- bind_rows(filtered_data, data_all)
# Create plot
plot <- ggplot(complete_data, aes(x = Sample_percent, y = Pearson_Correlation)) +
geom_point(data = filter(complete_data, Subsample_Color == "Random"),
aes(color = Subsample_Color),
alpha = 0.25, size = 1) +
geom_smooth(method = "lm", color = "blue", se = FALSE, linewidth = 0.8) +
geom_point(data = filter(complete_data, Subsample_Color != "Random"),
aes(color = Subsample_Color), size = 2) +
scale_color_manual(
values = c(
"SLIC" = "purple",
"HalfBasins" = "skyblue",
"Basins" = "navy",
"Random" = "gray50",
"Landcover" = "green"
)
) +
facet_grid(Locality ~ Index) +
theme_minimal(base_size = 12) +
labs(
x = "Sample percent",
y = "Subsample_performance",
color = "Sampling method"
) +
theme(
legend.position = "right",
axis.text = element_text(size = 10),
strip.text = element_text(size = 12, face = "bold")
)
plot
# Create plot
plot <- ggplot(complete_data, aes(x = Sample_percent, y = Subsample_performance)) +
geom_point(data = filter(complete_data, Subsample_Color == "Random"),
aes(color = Subsample_Color),
alpha = 0.25, size = 1) +
geom_smooth(method = "lm", color = "blue", se = FALSE, linewidth = 0.8) +
geom_point(data = filter(complete_data, Subsample_Color != "Random"),
aes(color = Subsample_Color), size = 2) +
scale_color_manual(
values = c(
"SLIC" = "purple",
"HalfBasins" = "skyblue",
"Basins" = "navy",
"Random" = "gray50",
"Landcover" = "green"
)
) +
facet_grid(Locality ~ Index) +
theme_minimal(base_size = 12) +
labs(
x = "Sample percent",
y = "Subsample_performance",
color = "Sampling method"
) +
theme(
legend.position = "right",
axis.text = element_text(size = 10),
strip.text = element_text(size = 12, face = "bold")
)
plot
