### Topic:
### Supplementary material for the article "Estimating snakebite risk in
### the Terai, Nepal: a high-resolution geospatial and One Health analysis"

### Corresponding author:
### Carlos R. Ochoa, carlos.ochoa@unige.ch, co.unige@pm.me

### Description:
### Simplified version and descriptions of the functions used in the geospatial
### risk analysis for humans (equivalent to animals) described in the main text
### of the manuscript. The following code uses geospatial layers in TIF format
### for gridded rasters and in (.shp) format for vector layers. Survey covariates
### were preprocessed and imported from a (.RData) file. 


# Packages needed: -----
library(pacman)
p_load(INLA, tidyverse, ggplot2, geoR, raster, sf, rgdal, leaflet, spdep, ggmap,
       SpatialEpi, sp, maptools, rgeos, readxl, corrplot, RColorBrewer, plotKML)


# Sourcing of dataset from the survey -----
load(file = 'SurveyDataIfAvailable.RData')


# Extracting relevant variables (optional) -----  
DF <- SurveData %>%                    # select() is masked so the package is required
  dplyr::select(Var01, Var02, etc) %>% # comma-separated list of variables to keep
  data.frame()


# Creating a prediction data frame with geographic coordinates: -----
## Import a raster without 'No Data' pixels at the resolution desired for prediction (e.g. 1 km^2)
r <- raster("./BasePredictionRaster.tif")
PredPoints <- rasterToPoints(r)
Pred <- data.frame(PredPoints) 
Pred$Lat <- Pred$y
Pred$Long <- Pred$x
rm(PredPoints, r)


# Creation of the Poverty Provability Index (PPI) for Nepal: -----
## Removal of rows affected by specific NAs:
DF <- DF[-which(is.na(DF$Main_job)), ] # Repeating structure for the 10 variables

# Preparation of the PPI:
DF$HH_size <- ifelse(DF$HH_size == 0, 34,
                     ifelse(DF$HH_size == 1, 34,
                            ifelse(DF$HH_size == 2, 34,
                                   ifelse(DF$HH_size == 3, 30,
                                          ifelse(DF$HH_size == 4, 19,
                                                 ifelse(DF$HH_size == 5, 12,
                                                        ifelse(DF$HH_size == 6, 8,
                                                               ifelse(DF$HH_size == 7, 6, 0))))))))

DF$Main_job <- dplyr::case_when(DF$Main_job %in% 1 ~ 0,
                                DF$Main_job %in% 2 ~ 0,
                                DF$Main_job %in% 3 ~ 4,
                                DF$Main_job %in% 4 ~ 5,
                                DF$Main_job %in% 5 ~ 7,
                                DF$Main_job %in% 6 ~ 8,
                                TRUE ~ as.numeric(DF$Main_job))

DF$Bedrooms <- dplyr::case_when(DF$Bedrooms %in% 1 ~ 0,
                                DF$Bedrooms %in% 3 ~ 7,
                                DF$Bedrooms %in% 4 ~ 11,
                                TRUE ~ as.numeric(DF$Bedrooms)) # Answer 2 gives 2 points, so no change

DF$Walls <- dplyr::case_when(DF$Walls %in% 1 ~ 0,
                             DF$Walls %in% 2 ~ 6,
                             TRUE ~ as.numeric(DF$Walls))

DF$Roof <- dplyr::case_when(DF$Roof %in% 1 ~ 0,
                            DF$Roof %in% 3 ~ 6,
                            DF$Roof %in% 4 ~ 7,
                            TRUE ~ as.numeric(DF$Roof)) # Answer 2 gives 2 points, so no change

DF$Kitchen <- dplyr::case_when(DF$Kitchen %in% 1 ~ 0,
                               DF$Kitchen %in% 2 ~ 5,
                               TRUE ~ as.numeric(DF$Kitchen))

DF$Stove <- dplyr::case_when(DF$Stove %in% 1 ~ 0,
                             DF$Stove %in% 2 ~ 3,
                             TRUE ~ as.numeric(DF$Stove))

DF$Toilet <- dplyr::case_when(DF$Toilet %in% 1 ~ 0,
                              DF$Toilet %in% 2 ~ 6,
                              TRUE ~ as.numeric(DF$Toilet))

DF$Telephone <- dplyr::case_when(DF$Telephone %in% 1 ~ 0,
                                 DF$Telephone %in% 2 ~ 8,
                                 DF$Telephone %in% 3 ~ 14,
                                 TRUE ~ as.numeric(DF$Telephone))

DF$Own_land <- dplyr::case_when(DF$Own_land %in% 1 ~ 0,
                                DF$Own_land %in% 2 ~ 3,
                                DF$Own_land %in% 3 ~ 6,
                                TRUE ~ as.numeric(DF$Own_land))

# Translation of the PPI Score into 100% National poverty likelihood levels according
# to the 2010 Nepal PPI scorecard.
DF <- DF %>%
  mutate(Sum = HH_size + Main_job + Bedrooms + Walls + Roof + Kitchen + Toilet +
           Telephone + Own_land,
         PPI = case_when(Sum >= 0 & Sum <= 4 ~ 1, Sum >= 5 & Sum <= 9 ~ 1,
                         Sum >= 10 & Sum <= 14 ~ 0.778, Sum >= 15 & Sum <= 19 ~ 0.646,
                         Sum >= 20 & Sum <= 24 ~ 0.593, Sum >= 25 & Sum <= 29 ~ 0.498,
                         Sum >= 30 & Sum <= 34 ~ 0.389, Sum >= 35 & Sum <= 39 ~ 0.259,
                         Sum >= 40 & Sum <= 44 ~ 0.177, Sum >= 45 & Sum <= 49 ~ 0.096,
                         Sum >= 50 & Sum <= 54 ~ 0.053, Sum >= 55 & Sum <= 59 ~ 0.035,
                         Sum >= 60 & Sum <= 64 ~ 0.018, Sum >= 65 & Sum <= 69 ~ 0.004,
                         Sum >= 70 & Sum <= 74 ~ 0.002, Sum >= 75 & Sum <= 79 ~ 0,
                         Sum >= 80 & Sum <= 84 ~ 0, Sum >= 85 & Sum <= 89 ~ 0,
                         Sum >= 90 & Sum <= 94 ~ 0, Sum >= 95 & Sum <= 100 ~ 0)) %>%
  data.frame()


# Addition of geospatial layers: -----

# Repeating structure for any other individual geospatial gridded layer covering
# the study area.
## Elevation:
DEM <- raster("./Layers/DEM.tif")
DF$Alt <- round(raster::extract(DEM, DF[ , c("Long", "Lat")]), 0)
Pred$Alt <- round(raster::extract(DEM, Pred[ , c("Long", "Lat")]), 0)
rm(DEM)

# Addition of multiple layers with similar names as a 'brick':
# BIO variables from WorldClim 1-19:
ListAV <- list.files("./Layers/BIO_var/", pattern = "AnyCommonNameElement")
rlist <- lapply(paste0("./Layers/BIO_var/", ListAV), raster)
BIO <- brick(rlist)
rm(ListAV, rlist)

A <- data.frame(raster::extract(BIO, DF[ , c("Long", "Lat")]))
DF <- cbind(DF, A)

A <- data.frame(raster::extract(BIO, Pred[ , c("Long", "Lat")]))
Pred <- cbind(Pred, A)
rm(A, BIO)



# INLA modelling: ---------------------------------------------------------
# Estimation of parameters (for prediction see line 258)

# Human or animal risk estimation based on binary response variables: -----
## Data preparation:
## - Verify and transform categorical covariates into factors, excepting the 
##   response variables
## - Verify, complete and/or remove observations with NA in explanatory variables
## - For numerical stability it is advisable to scale all numeric explanatory
##   variables so they fall in a range between -10 and 10. In this way their 
##   effects will be fully preserved 


# Transformation of coordinates from geographic to projected: -----
## - Additional transformation of distance units into km to avoid possible 
##   numerical instability of very large numbers in meters
## - Based on the assumption that most open source layers are in WGS84 geographic 
##   coordinates, so importing their values directly in the previous step is
##   better practice
d <- data.frame(X = DF$Long, Y = DF$Lat)
coordinates(d) <- c("X", "Y")
proj4string(d) <- CRS("+init=epsg:4326")
CRS.new <- CRS("+init=epsg:32632 +units=km") # CRS for Nepal
Proj.coo <- spTransform(d, CRS.new)          # Projected coordinates
rm(d)


# Delimiting area: -----
## Admin area, or as in this article's case, considering excluded areas
Area <- readOGR("./Layers/DelimitingArea.shp")
Area_Projected <- spTransform(Area, crs(CRS.new))


# Creation of the mesh grid: -----
## For information on the mesh elements and how to define them, please see: 
## - https://www.paulamoraga.com/book-geospatial/
## - https://becarioprecario.bitbucket.io/inla-gitbook/

Max.edge <- 1 # to be given in the distance units in the Area_Projected layer (i.e. km)
mesh <- inla.mesh.2d(loc = Proj.coo,
                     boundary = Area_Projected,
                     max.edge = c(1, 10) * Max.edge,
                     min.angle = c(30, 21),
                     cutoff = Max.edge/5,
                     offset = c(5, 30))

mesh$n        # Number of vertices in the final mesh.
rm(Max.edge, Area, Area_Projected)


# Definition of the spatial random effect (spde): -----
## Default option:
## spde <- inla.spde2.matern(mesh = mesh, alpha = 2)

# Improved option, using Penalized Complexity (PC) priors:
## For information on PC priors, please see:
## Fuglstad et al. 2019: https://doi.org/10.1080/01621459.2017.1415907 
## https://becarioprecario.bitbucket.io/inla-gitbook/ch-priors.html

prior.median.sd <- 0.25  
prior.median.range <- 30 

spde <- inla.spde2.pcmatern(mesh,
                            prior.range = c(prior.median.range, 0.5),
                            prior.sigma = c(prior.median.sd, 0.5),
                            constr = T) 

rm(prior.median.range, prior.median.sd)


# Creation of the Index: -----
index <- inla.spde.make.index(name = 'Weights',
                              n.spde = spde$n.spde,
                              n.group = 1,
                              n.repl = 1)


# Creation of the Projector matrix A: -----
P.M_A <- inla.spde.make.A(mesh = mesh, loc = Proj.coo)


# Creation of the Stack: -----
Stack <- inla.stack(tag = "Fit",
                    data = list(y = DF$BinaryResponseVar),
                    A = list(1, P.M_A),
                    effects = list(X = DF, Weights = index))


# Definition of the formula: -----
## This follows the general grammar of lm and glm regression models
## For information, please see the multiple INLA vignette files
formula <- y ~ Var1 + Var2 + f(Weights, model = spde)


# Running of the specified model: -----
## For information on the multiple options and settings, please see the multiple
## INLA vignette files and the on-line support page: https://www.r-inla.org/
Model <- inla(formula = formula,
              Ntrials = 1,                           
              family = "binomial",
              control.family = list(link = "logit"),
              data = inla.stack.data(Stack),
              control.compute = list(dic = TRUE,
                                     waic = TRUE,
                                     cpo = TRUE), 
              control.predictor = list(A = inla.stack.A(Stack),
                                       compute = TRUE, 
                                       quantiles = c(0.025, 0.05, 0.95, 0.975),
                                       link = 1),
              control.inla = list(int.strategy = "eb", strategy = "adaptive"),
              control.fixed = list(prec = 0.1),
              verbose = TRUE)


# ------------------------------------------------------------------------------
# Prediction: -----
## The same comments and clarifications apply (omitted)


# Transformation of coordinates from geographic to projected: -----
d <- data.frame(X = DF$Long, Y = DF$Lat)
coordinates(d) <- c("X", "Y")
proj4string(d) <- CRS("+init=epsg:4326")
CRS.new <- CRS("+init=epsg:32632 +units=km") 
Proj.coo <- spTransform(d, CRS.new)          

d <- data.frame(X = Pred$Long, Y = Pred$Lat)
coordinates(d) <- c("X", "Y")
proj4string(d) <- CRS("+init=epsg:4326") 
CRS.new <- CRS("+init=epsg:32632 +units=km")
Pred.coo <- spTransform(d, CRS.new) 
rm(d)


# Delimiting area: -----
Area <- readOGR("./Layers/DelimitingArea.shp")
Area_Projected <- spTransform(Area, crs(CRS.new))


# Creation of the mesh grid: -----
Max.edge <- 1 
mesh <- inla.mesh.2d(loc = Proj.coo,
                     boundary = Area_Projected,
                     max.edge = c(1, 10) * Max.edge,
                     min.angle = c(30, 21),
                     cutoff = Max.edge/5,
                     offset = c(5, 30))

mesh$n        
rm(Max.edge, Area, Area_Projected)


# Definition of the spatial random effect (spde): -----
## Default option:
## spde <- inla.spde2.matern(mesh = mesh, alpha = 2)

# Improved option, using Penalized Complexity (PC) priors:
prior.median.sd <- 0.5  
prior.median.range <- 40 

spde <- inla.spde2.pcmatern(mesh,
                            prior.range = c(prior.median.range, 0.5),
                            prior.sigma = c(prior.median.sd, 0.5),
                            constr = T) 

rm(prior.median.range, prior.median.sd)


# Creation of the Index: -----
index <- inla.spde.make.index(name = 'Weights',
                              n.spde = spde$n.spde,
                              n.group = 1,
                              n.repl = 1)


# Creation of the Projector matrix A: -----
P.M_A <- inla.spde.make.A(mesh = mesh, loc = Proj.coo)


# Creation of the Stack: -----
## Estimation data stack:
Stack <- inla.stack(tag = "Fit",
                    data = list(y = DF$BinaryResponseVar),
                    A = list(1, P.M_A),
                    effects = list(X = DF, Weights = index))

## Prediction data stack:
Stack_Pred <- inla.stack(tag = "Pred",
                         data = list(y = NA),
                         A = list(1, Pred_P.M_A),
                         effects = list(Pred_X = Pred, Weights = index))

## General stack:
Stack <- inla.stack(Stack_Fit, Stack_Pred)
rm(Stack_Fit, Stack_Pred)


# Definition of the formula: -----
formula <- y ~ Var1 + Var2 + f(Weights, model = spde)


# Running of the specified model: -----
Model <- inla(formula = formula,
              Ntrials = 1,                           
              family = "binomial",
              control.family = list(link = "logit"),
              data = inla.stack.data(Stack),
              control.compute = list(dic = TRUE,
                                     waic = TRUE,
                                     cpo = TRUE), 
              control.predictor = list(A = inla.stack.A(Stack),
                                       compute = TRUE, 
                                       quantiles = c(0.025, 0.05, 0.95, 0.975),
                                       link = 1),
              control.inla = list(int.strategy = "eb", strategy = "adaptive"),
              control.fixed = list(prec = 0.1),
              verbose = TRUE)


# ------------------------------------------------------------------------------

# Extraction of predicted values for each point of the 'Pred' data set: -----
## Extraction prediction indexes in the stack: 
IndexPred <- inla.stack.index(stack = Stack, tag = "Pred")$data


# Extraction of main values: -----
## Creation of a data frame in probability scale with mean, DF, mode, and different
## quantiles for the predicted posterior distribution of each point: 
Risk <- data.frame(Mean = Model$summary.fitted.values[IndexPred, "mean"],
                   SD = Model$summary.fitted.values[IndexPred, "sd"], 
                   Low95 = Model$summary.fitted.values[IndexPred, "0.025quant"],
                   High95 = Model$summary.fitted.values[IndexPred, "0.975quant"],
                   Low90 = Model$summary.fitted.values[IndexPred, "0.05quant"],
                   High90 = Model$summary.fitted.values[IndexPred, "0.95quant"],
                   Mode = Model$summary.fitted.values[IndexPred, "mode"])


# Creation of a spatialpoints data frame: -----
SpatialLayer <- data.frame(Risk, Pred.coo@coords)
coordinates(SpatialLayer) <- c("X", "Y") 
proj4string(SpatialLayer) <- CRS("+init=epsg:32645 +units=km")


# Rasterization and export of points attributes into individual rasters:----
DataFrame <- SpatialLayer 
model <- "YourTag" 

filenames <- names(DataFrame)

lapply(filenames, function(x) {
  r1 <- vect2rast(DataFrame, fname = x, cell.size = 1, method = "raster", background = NA)
  r2 <- raster(r1)
  writeRaster(r2, paste0("./Layers/", model, "_", x), format = 'GTiff')
})