### Topic: ### Supplementary material for the article "Estimating snakebite risk in ### the Terai, Nepal: a high-resolution geospatial and One Health analysis" ### Corresponding author: ### Carlos R. Ochoa, carlos.ochoa@unige.ch, co.unige@pm.me ### Description: ### Simplified version and descriptions of the functions used in the geospatial ### risk analysis for humans (equivalent to animals) described in the main text ### of the manuscript. The following code uses geospatial layers in TIF format ### for gridded rasters and in (.shp) format for vector layers. Survey covariates ### were preprocessed and imported from a (.RData) file. # Packages needed: ----- library(pacman) p_load(INLA, tidyverse, ggplot2, geoR, raster, sf, rgdal, leaflet, spdep, ggmap, SpatialEpi, sp, maptools, rgeos, readxl, corrplot, RColorBrewer, plotKML) # Sourcing of dataset from the survey ----- load(file = 'SurveyDataIfAvailable.RData') # Extracting relevant variables (optional) ----- DF <- SurveData %>% # select() is masked so the package is required dplyr::select(Var01, Var02, etc) %>% # comma-separated list of variables to keep data.frame() # Creating a prediction data frame with geographic coordinates: ----- ## Import a raster without 'No Data' pixels at the resolution desired for prediction (e.g. 1 km^2) r <- raster("./BasePredictionRaster.tif") PredPoints <- rasterToPoints(r) Pred <- data.frame(PredPoints) Pred$Lat <- Pred$y Pred$Long <- Pred$x rm(PredPoints, r) # Creation of the Poverty Provability Index (PPI) for Nepal: ----- ## Removal of rows affected by specific NAs: DF <- DF[-which(is.na(DF$Main_job)), ] # Repeating structure for the 10 variables # Preparation of the PPI: DF$HH_size <- ifelse(DF$HH_size == 0, 34, ifelse(DF$HH_size == 1, 34, ifelse(DF$HH_size == 2, 34, ifelse(DF$HH_size == 3, 30, ifelse(DF$HH_size == 4, 19, ifelse(DF$HH_size == 5, 12, ifelse(DF$HH_size == 6, 8, ifelse(DF$HH_size == 7, 6, 0)))))))) DF$Main_job <- dplyr::case_when(DF$Main_job %in% 1 ~ 0, DF$Main_job %in% 2 ~ 0, DF$Main_job %in% 3 ~ 4, DF$Main_job %in% 4 ~ 5, DF$Main_job %in% 5 ~ 7, DF$Main_job %in% 6 ~ 8, TRUE ~ as.numeric(DF$Main_job)) DF$Bedrooms <- dplyr::case_when(DF$Bedrooms %in% 1 ~ 0, DF$Bedrooms %in% 3 ~ 7, DF$Bedrooms %in% 4 ~ 11, TRUE ~ as.numeric(DF$Bedrooms)) # Answer 2 gives 2 points, so no change DF$Walls <- dplyr::case_when(DF$Walls %in% 1 ~ 0, DF$Walls %in% 2 ~ 6, TRUE ~ as.numeric(DF$Walls)) DF$Roof <- dplyr::case_when(DF$Roof %in% 1 ~ 0, DF$Roof %in% 3 ~ 6, DF$Roof %in% 4 ~ 7, TRUE ~ as.numeric(DF$Roof)) # Answer 2 gives 2 points, so no change DF$Kitchen <- dplyr::case_when(DF$Kitchen %in% 1 ~ 0, DF$Kitchen %in% 2 ~ 5, TRUE ~ as.numeric(DF$Kitchen)) DF$Stove <- dplyr::case_when(DF$Stove %in% 1 ~ 0, DF$Stove %in% 2 ~ 3, TRUE ~ as.numeric(DF$Stove)) DF$Toilet <- dplyr::case_when(DF$Toilet %in% 1 ~ 0, DF$Toilet %in% 2 ~ 6, TRUE ~ as.numeric(DF$Toilet)) DF$Telephone <- dplyr::case_when(DF$Telephone %in% 1 ~ 0, DF$Telephone %in% 2 ~ 8, DF$Telephone %in% 3 ~ 14, TRUE ~ as.numeric(DF$Telephone)) DF$Own_land <- dplyr::case_when(DF$Own_land %in% 1 ~ 0, DF$Own_land %in% 2 ~ 3, DF$Own_land %in% 3 ~ 6, TRUE ~ as.numeric(DF$Own_land)) # Translation of the PPI Score into 100% National poverty likelihood levels according # to the 2010 Nepal PPI scorecard. DF <- DF %>% mutate(Sum = HH_size + Main_job + Bedrooms + Walls + Roof + Kitchen + Toilet + Telephone + Own_land, PPI = case_when(Sum >= 0 & Sum <= 4 ~ 1, Sum >= 5 & Sum <= 9 ~ 1, Sum >= 10 & Sum <= 14 ~ 0.778, Sum >= 15 & Sum <= 19 ~ 0.646, Sum >= 20 & Sum <= 24 ~ 0.593, Sum >= 25 & Sum <= 29 ~ 0.498, Sum >= 30 & Sum <= 34 ~ 0.389, Sum >= 35 & Sum <= 39 ~ 0.259, Sum >= 40 & Sum <= 44 ~ 0.177, Sum >= 45 & Sum <= 49 ~ 0.096, Sum >= 50 & Sum <= 54 ~ 0.053, Sum >= 55 & Sum <= 59 ~ 0.035, Sum >= 60 & Sum <= 64 ~ 0.018, Sum >= 65 & Sum <= 69 ~ 0.004, Sum >= 70 & Sum <= 74 ~ 0.002, Sum >= 75 & Sum <= 79 ~ 0, Sum >= 80 & Sum <= 84 ~ 0, Sum >= 85 & Sum <= 89 ~ 0, Sum >= 90 & Sum <= 94 ~ 0, Sum >= 95 & Sum <= 100 ~ 0)) %>% data.frame() # Addition of geospatial layers: ----- # Repeating structure for any other individual geospatial gridded layer covering # the study area. ## Elevation: DEM <- raster("./Layers/DEM.tif") DF$Alt <- round(raster::extract(DEM, DF[ , c("Long", "Lat")]), 0) Pred$Alt <- round(raster::extract(DEM, Pred[ , c("Long", "Lat")]), 0) rm(DEM) # Addition of multiple layers with similar names as a 'brick': # BIO variables from WorldClim 1-19: ListAV <- list.files("./Layers/BIO_var/", pattern = "AnyCommonNameElement") rlist <- lapply(paste0("./Layers/BIO_var/", ListAV), raster) BIO <- brick(rlist) rm(ListAV, rlist) A <- data.frame(raster::extract(BIO, DF[ , c("Long", "Lat")])) DF <- cbind(DF, A) A <- data.frame(raster::extract(BIO, Pred[ , c("Long", "Lat")])) Pred <- cbind(Pred, A) rm(A, BIO) # INLA modelling: --------------------------------------------------------- # Estimation of parameters (for prediction see line 258) # Human or animal risk estimation based on binary response variables: ----- ## Data preparation: ## - Verify and transform categorical covariates into factors, excepting the ## response variables ## - Verify, complete and/or remove observations with NA in explanatory variables ## - For numerical stability it is advisable to scale all numeric explanatory ## variables so they fall in a range between -10 and 10. In this way their ## effects will be fully preserved # Transformation of coordinates from geographic to projected: ----- ## - Additional transformation of distance units into km to avoid possible ## numerical instability of very large numbers in meters ## - Based on the assumption that most open source layers are in WGS84 geographic ## coordinates, so importing their values directly in the previous step is ## better practice d <- data.frame(X = DF$Long, Y = DF$Lat) coordinates(d) <- c("X", "Y") proj4string(d) <- CRS("+init=epsg:4326") CRS.new <- CRS("+init=epsg:32632 +units=km") # CRS for Nepal Proj.coo <- spTransform(d, CRS.new) # Projected coordinates rm(d) # Delimiting area: ----- ## Admin area, or as in this article's case, considering excluded areas Area <- readOGR("./Layers/DelimitingArea.shp") Area_Projected <- spTransform(Area, crs(CRS.new)) # Creation of the mesh grid: ----- ## For information on the mesh elements and how to define them, please see: ## - https://www.paulamoraga.com/book-geospatial/ ## - https://becarioprecario.bitbucket.io/inla-gitbook/ Max.edge <- 1 # to be given in the distance units in the Area_Projected layer (i.e. km) mesh <- inla.mesh.2d(loc = Proj.coo, boundary = Area_Projected, max.edge = c(1, 10) * Max.edge, min.angle = c(30, 21), cutoff = Max.edge/5, offset = c(5, 30)) mesh$n # Number of vertices in the final mesh. rm(Max.edge, Area, Area_Projected) # Definition of the spatial random effect (spde): ----- ## Default option: ## spde <- inla.spde2.matern(mesh = mesh, alpha = 2) # Improved option, using Penalized Complexity (PC) priors: ## For information on PC priors, please see: ## Fuglstad et al. 2019: https://doi.org/10.1080/01621459.2017.1415907 ## https://becarioprecario.bitbucket.io/inla-gitbook/ch-priors.html prior.median.sd <- 0.25 prior.median.range <- 30 spde <- inla.spde2.pcmatern(mesh, prior.range = c(prior.median.range, 0.5), prior.sigma = c(prior.median.sd, 0.5), constr = T) rm(prior.median.range, prior.median.sd) # Creation of the Index: ----- index <- inla.spde.make.index(name = 'Weights', n.spde = spde$n.spde, n.group = 1, n.repl = 1) # Creation of the Projector matrix A: ----- P.M_A <- inla.spde.make.A(mesh = mesh, loc = Proj.coo) # Creation of the Stack: ----- Stack <- inla.stack(tag = "Fit", data = list(y = DF$BinaryResponseVar), A = list(1, P.M_A), effects = list(X = DF, Weights = index)) # Definition of the formula: ----- ## This follows the general grammar of lm and glm regression models ## For information, please see the multiple INLA vignette files formula <- y ~ Var1 + Var2 + f(Weights, model = spde) # Running of the specified model: ----- ## For information on the multiple options and settings, please see the multiple ## INLA vignette files and the on-line support page: https://www.r-inla.org/ Model <- inla(formula = formula, Ntrials = 1, family = "binomial", control.family = list(link = "logit"), data = inla.stack.data(Stack), control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE), control.predictor = list(A = inla.stack.A(Stack), compute = TRUE, quantiles = c(0.025, 0.05, 0.95, 0.975), link = 1), control.inla = list(int.strategy = "eb", strategy = "adaptive"), control.fixed = list(prec = 0.1), verbose = TRUE) # ------------------------------------------------------------------------------ # Prediction: ----- ## The same comments and clarifications apply (omitted) # Transformation of coordinates from geographic to projected: ----- d <- data.frame(X = DF$Long, Y = DF$Lat) coordinates(d) <- c("X", "Y") proj4string(d) <- CRS("+init=epsg:4326") CRS.new <- CRS("+init=epsg:32632 +units=km") Proj.coo <- spTransform(d, CRS.new) d <- data.frame(X = Pred$Long, Y = Pred$Lat) coordinates(d) <- c("X", "Y") proj4string(d) <- CRS("+init=epsg:4326") CRS.new <- CRS("+init=epsg:32632 +units=km") Pred.coo <- spTransform(d, CRS.new) rm(d) # Delimiting area: ----- Area <- readOGR("./Layers/DelimitingArea.shp") Area_Projected <- spTransform(Area, crs(CRS.new)) # Creation of the mesh grid: ----- Max.edge <- 1 mesh <- inla.mesh.2d(loc = Proj.coo, boundary = Area_Projected, max.edge = c(1, 10) * Max.edge, min.angle = c(30, 21), cutoff = Max.edge/5, offset = c(5, 30)) mesh$n rm(Max.edge, Area, Area_Projected) # Definition of the spatial random effect (spde): ----- ## Default option: ## spde <- inla.spde2.matern(mesh = mesh, alpha = 2) # Improved option, using Penalized Complexity (PC) priors: prior.median.sd <- 0.5 prior.median.range <- 40 spde <- inla.spde2.pcmatern(mesh, prior.range = c(prior.median.range, 0.5), prior.sigma = c(prior.median.sd, 0.5), constr = T) rm(prior.median.range, prior.median.sd) # Creation of the Index: ----- index <- inla.spde.make.index(name = 'Weights', n.spde = spde$n.spde, n.group = 1, n.repl = 1) # Creation of the Projector matrix A: ----- P.M_A <- inla.spde.make.A(mesh = mesh, loc = Proj.coo) # Creation of the Stack: ----- ## Estimation data stack: Stack <- inla.stack(tag = "Fit", data = list(y = DF$BinaryResponseVar), A = list(1, P.M_A), effects = list(X = DF, Weights = index)) ## Prediction data stack: Stack_Pred <- inla.stack(tag = "Pred", data = list(y = NA), A = list(1, Pred_P.M_A), effects = list(Pred_X = Pred, Weights = index)) ## General stack: Stack <- inla.stack(Stack_Fit, Stack_Pred) rm(Stack_Fit, Stack_Pred) # Definition of the formula: ----- formula <- y ~ Var1 + Var2 + f(Weights, model = spde) # Running of the specified model: ----- Model <- inla(formula = formula, Ntrials = 1, family = "binomial", control.family = list(link = "logit"), data = inla.stack.data(Stack), control.compute = list(dic = TRUE, waic = TRUE, cpo = TRUE), control.predictor = list(A = inla.stack.A(Stack), compute = TRUE, quantiles = c(0.025, 0.05, 0.95, 0.975), link = 1), control.inla = list(int.strategy = "eb", strategy = "adaptive"), control.fixed = list(prec = 0.1), verbose = TRUE) # ------------------------------------------------------------------------------ # Extraction of predicted values for each point of the 'Pred' data set: ----- ## Extraction prediction indexes in the stack: IndexPred <- inla.stack.index(stack = Stack, tag = "Pred")$data # Extraction of main values: ----- ## Creation of a data frame in probability scale with mean, DF, mode, and different ## quantiles for the predicted posterior distribution of each point: Risk <- data.frame(Mean = Model$summary.fitted.values[IndexPred, "mean"], SD = Model$summary.fitted.values[IndexPred, "sd"], Low95 = Model$summary.fitted.values[IndexPred, "0.025quant"], High95 = Model$summary.fitted.values[IndexPred, "0.975quant"], Low90 = Model$summary.fitted.values[IndexPred, "0.05quant"], High90 = Model$summary.fitted.values[IndexPred, "0.95quant"], Mode = Model$summary.fitted.values[IndexPred, "mode"]) # Creation of a spatialpoints data frame: ----- SpatialLayer <- data.frame(Risk, Pred.coo@coords) coordinates(SpatialLayer) <- c("X", "Y") proj4string(SpatialLayer) <- CRS("+init=epsg:32645 +units=km") # Rasterization and export of points attributes into individual rasters:---- DataFrame <- SpatialLayer model <- "YourTag" filenames <- names(DataFrame) lapply(filenames, function(x) { r1 <- vect2rast(DataFrame, fname = x, cell.size = 1, method = "raster", background = NA) r2 <- raster(r1) writeRaster(r2, paste0("./Layers/", model, "_", x), format = 'GTiff') })