library(sf)
library(dplyr)
library(readr)

# ---------------- Inputs (read from current working directory) ----------------
# States: keep CONUS + DC
States <- st_read("tl_2020_us_state.shp", quiet = TRUE) |>
  dplyr::select(STUSPS, STATEFP) |>
  st_drop_geometry() |>
  dplyr::filter(!STUSPS %in% c("PR","AK","GU","VI","HI","AS","MP"))

State_list <- States$STUSPS

# Sampled footprints
Selection <- st_read("Footprints_county_sample.shp", quiet = TRUE)

# Ensure an 'index' exists (used for grouping after buffer join)
if (!"index" %in% names(Selection)) {
  Selection$index <- seq_len(nrow(Selection))
}

# ---------------- Single 500 m buffer around footprint centroids --------------
Footprints_c <- st_centroid(Selection)
Footprints_500m <- st_buffer(Footprints_c, 500)
Footprints_500m$buffer <- 500  # retained for clarity in the output

# ---------------- Per-state processing: LODES -> blocks -> 500m buffers -------
block_path_tpl <- "STATE_block_2010.shp"          # e.g., "RI_block_2010.shp"
wac_path_tpl   <- "STATE_wac_S000_JT00_2010.csv"  # e.g., "ri_wac_S000_JT00_2010.csv"

Results_list <- vector("list", length(State_list))
k <- 1L

for (abbr in State_list) {
  message("Processing: ", abbr)
  
  # Paths for this state (WAC csv uses lowercase state code)
  blk_path <- sub("STATE", abbr,   block_path_tpl)
  wac_path <- sub("STATE", tolower(abbr), wac_path_tpl)
  
  # ---- Read LODES WAC and compute job fields ----
  # Expect columns: C000, CE03, CNS01..CNS06, CNS08, w_geocode
  LODES_data <- readr::read_csv(wac_path, show_col_types = FALSE) |>
    mutate(
      Tot_jobs = C000,
      HI_jobs  = CE03,
      Ind_jobs = CNS01 + CNS02 + CNS03 + CNS04 + CNS05 + CNS06 + CNS08,
      GEOID10  = w_geocode
    ) |>
    dplyr::select(GEOID10, Tot_jobs, HI_jobs, Ind_jobs)
  
  # ---- Read state blocks, join to LODES by GEOID10, convert to centroids ----
  state_blocks <- st_read(blk_path, quiet = TRUE) |>
    dplyr::select(GEOID10)
  
  LODES_clean <- state_blocks |>
    left_join(LODES_data, by = "GEOID10") |>
    st_centroid() |>
    st_transform(st_crs(Footprints_500m))
  
  # ---- Spatially join to 500 m buffers; aggregate counts ---------------------
  joined <- st_join(LODES_clean, Footprints_500m) |>
    st_drop_geometry() |>
    filter(!is.na(GEOID10)) |>
    dplyr::select(index, buffer, Tot_jobs, HI_jobs, Ind_jobs) |>
    group_by(index, buffer) |>
    summarise(
      Tot_jobs = sum(Tot_jobs, na.rm = TRUE),
      HI_jobs  = sum(HI_jobs,  na.rm = TRUE),
      Ind_jobs = sum(Ind_jobs, na.rm = TRUE),
      .groups  = "drop"
    )
  
  Results_list[[k]] <- joined
  k <- k + 1L
}

Job_counts <- dplyr::bind_rows(Results_list)

save(Job_counts, file = "Job_counts.RData")