#' Apply names to data
#'
#' @name generate_renamed_data
#' @description A function to take the relevant name correcting keys and apply
#'   them to the LEMIS dataset. Starts with binomial name conversions, then
#'   moves on to correcting using genus, then the final species code checks.
#' @return A dataframe of the LEMIS data with names corrected with additional
#'   columns. Also saves corresponding csv files.
#'
#' @export
generate_renamed_data <- function(nameConveringKey, speciesCodeFixes, genericFixes){

  # load in matching name key -----------------------------------------------

  # load(file = here("data", "nameReview", "nameConvertingKey_complete_2023_05_10"))
  # nameConvertingKey_complete <- nameConvertingKey_complete_2023_05_10

  nameConvertingKey_complete <- nameConveringKey

  # Load in lemis data -------------------------------------------------

  lemisData <- read_csv(here("data", "lemis", "lemis2014_2022",
                             "lemisDataComplete_2023-04-19.csv.gz"))

  # join data ---------------------------------------------------------------
  names(lemisData)

  lemisData$lemisName <- paste(lemisData$genus, lemisData$species)
  lemisData$lemisName <- str_to_sentence(lemisData$lemisName)

  lemisData$lemisName[which(str_detect(lemisData$lemisName, " NA$"))] <-
    sub(" NA$", "",
        lemisData$lemisName[which(str_detect(lemisData$lemisName, " NA$"))])

  lemisData$lemisName[which(str_detect(lemisData$lemisName, "sp\\.|spp| sp$| sp sp$"))] <-
    sub(" sp\\.| spp| sp$| sp sp$", "",
        lemisData$lemisName[which(str_detect(lemisData$lemisName, "sp\\.|spp| sp$| sp sp$"))])

  lemisData$lemisName[which(str_detect(lemisData$lemisName, "\\*$"))] <-
    word(lemisData$lemisName[which(str_detect(lemisData$lemisName, "\\*$"))], 1, 1)

  # "Shell shell" "Molluscs shells molluscs" "Nacre from mollus mollusk shell"
  lemisData <- lemisData %>%
    mutate(
      lemisName = case_when(
        lemisName == "Cribrarula shell" ~ "Cribrarula",
        lemisName == "Erronea shell" ~ "Erronea",
        lemisName == "Fimbiata shell" ~ "Fimbiata",
        lemisName == "Fossils shells" ~ "Fossils",
        lemisName == "Hammer shell" ~ "Hammer",
        lemisName == "Hypripsis cumingii triangle shell mus" ~ "Hypripsis cumingii",
        lemisName == "Lyncina shell" ~ "Lyncina",
        lemisName == "Malleus hammer shell" ~ "Malleus hammer",
        lemisName == "Naria shell" ~ "Naria",
        lemisName == "Nylon shell" ~ "Nylon",
        lemisName == "Patella gastropoda limpit shell" ~ "Patella gastropoda",
        lemisName == "Pinctada margareti blacklip shell" ~ "Pinctada margareti",
        lemisName == "Schilderia shell" ~ "Schilderia",
        lemisName == "Staphylaea shell" ~ "Staphylaea",
        lemisName == "Patella gastropoda limpit shell" ~ "Patella gastropoda",
        lemisName == "Tectus pyramis troca shell" ~ "Tectus pyramis",
        TRUE ~ lemisName)
    )

  # remove egg from end of species name
  lemisData$lemisName[which(str_detect(lemisData$lemisName, " egg"))] <-
    word(lemisData$lemisName[which(str_detect(lemisData$lemisName, " egg"))], 1, 2)

  # remove cf. to help search
  lemisData$lemisName[which(str_detect(lemisData$lemisName, " cf\\."))] <-
    sub(" cf\\.", "", lemisData$lemisName[which(str_detect(lemisData$lemisName, " cf\\."))])

  # remove s.
  lemisData$lemisName[which(str_detect(lemisData$lemisName, " s\\."))] <-
    sub(" s\\.", "", lemisData$lemisName[which(str_detect(lemisData$lemisName, " s\\."))])

  # lower to only genus level
  lemisData$lemisName[which(str_detect(lemisData$lemisName, "n/a"))] <-
    word(lemisData$lemisName[which(str_detect(lemisData$lemisName, "n/a"))], 1, 1)
  lemisData$lemisName[which(str_detect(lemisData$lemisName, "unknown"))] <-
    word(lemisData$lemisName[which(str_detect(lemisData$lemisName, "unknown"))], 1, 1)

  # just to genus(?)
  lemisData$lemisName[which(str_detect(lemisData$lemisName, "Zoohantus"))] <-
    word(lemisData$lemisName[which(str_detect(lemisData$lemisName, "Zoohantus"))], 1, 1)

  lemisDataCorrected <- lemisData %>%
    left_join(nameConvertingKey_complete, by = "lemisName", copy = TRUE)

  lemisDataCorrected[which(is.na(lemisDataCorrected$lemisName) |
                             lemisDataCorrected$lemisName %in% c("NA",
                                                                 "Other live inverts in trop fish &",
                                                                 "Non-cites entry",
                                                                 "Tropical fish (marine)",
                                                                 "Tropical fish (freshwater)",
                                                                 "All live tropical fish (including goldfish)",
                                                                 "Tropical fish (marine.)",
                                                                 "Noncites entry",
                                                                 "Tropical fish (freshwater.)",
                                                                 "Tropical_fish freshwater",
                                                                 "Tropical_fish marine",
                                                                 "Tropical fish ex 4",
                                                                 "Mollusc shell",
                                                                 "Noncites crustaceans",
                                                                 "Noncites insects",
                                                                 "Noncites invertebrates",
                                                                 "Noncites entry spanish fly",
                                                                 "Crustaceans crustacea",
                                                                 "Noncites butterflies",
                                                                 "Non-cites butterflies",
                                                                 "Noncites butterfly",
                                                                 "Noncites fish",
                                                                 "Noncites arachnids",
                                                                 "Noncites molluscs",
                                                                 "Crustaceans na",
                                                                 "Noncites mammals",
                                                                 "Noncites amphibians",
                                                                 "Platens omnodum",
                                                                 "All plants na",
                                                                 "Noncites entry na",
                                                                 "*******",
                                                                 "******" ,
                                                                 "****",
                                                                 "********",
                                                                 "Xxxxxxx xxxxx",
                                                                 "Xxxxxx xxxxxx",
                                                                 "Na na",
                                                                 "Ex 4 ex 4",
                                                                 "Noncites reptiles",
                                                                 "Noncites birds",
                                                                 "Animals museum specimens",
                                                                 "Noncites coral",
                                                                 "Non-cites bird",
                                                                 "All plants",
                                                                 "Molluscs",
                                                                 "All live trop. Fsh (incl. goldfish)",
                                                                 "Tropical fish",
                                                                 "Gastropods rochia conus",
                                                                 "Captive breed clownfish",
                                                                 "Noncites entry octocorals",
                                                                 "Butterflies",
                                                                 "Non-cites shells",
                                                                 "Noncites entry echinoderms",
                                                                 "Plants herbariums, live",
                                                                 "Molluscs moll",
                                                                 "Molluscs na",
                                                                 "Crustaceans",
                                                                 "Anuran",
                                                                 "Na ex 4",
                                                                 "Unknown",
                                                                 "Unknown cites mammal",
                                                                 "Unknown cites bird",
                                                                 "Unknown parasites species",
                                                                 "Unknown animal",
                                                                 "Re-export re-export",
                                                                 "Nonr reptiles",
                                                                 "Noncites entry snake",
                                                                 "Noncites entry xx",
                                                                 "Exemption 4 exemption 4",
                                                                 "All live trop. Fsh (incl. Goldfish)",
                                                                 "Other live_inverts",
                                                                 "Spider species",
                                                                 "Live tropical fish freshwater",
                                                                 "Migratory bird gen. authorization",
                                                                 "Tropical fish tropical fish")),]$corrected <- "REMAINS UNKNOWN"
  lemisDataCorrected[which(str_detect(lemisDataCorrected$lemisName, "Tadpole")),]$corrected <- "REMAINS UNKNOWN"
  lemisDataCorrected[which(str_detect(lemisDataCorrected$lemisName, "tadpole")),]$corrected <- "REMAINS UNKNOWN"
  lemisDataCorrected[which(str_detect(lemisDataCorrected$lemisName, "Tadpole")),]$group_ <- "Amphibians"
  lemisDataCorrected[which(str_detect(lemisDataCorrected$lemisName, "tadpole")),]$group_ <- "Amphibians"
  lemisDataCorrected[which(str_detect(lemisDataCorrected$lemisName, "Tadpole")),]$orderCorrected <- "Anura"
  lemisDataCorrected[which(str_detect(lemisDataCorrected$lemisName, "tadpole")),]$orderCorrected <- "Anura"

  lemisDataCorrected <- lemisDataCorrected %>%
    mutate(group_ = case_when(
      lemisName == "NA" ~ "Miscellaneous",
      lemisName == "Other live_inverts" ~ "Other Invertebrates",
      lemisName == "Noncites entry xx" ~ "Miscellaneous",
      lemisName == "Other live inverts in trop fish &" ~ "Fish",
      lemisName == "Tropical fish ex 4" ~ "Fish",
      lemisName == "All live trop. Fsh (incl. Goldfish)" ~ "Fish",
      lemisName == "Non-cites entry" ~ "Miscellaneous",
      lemisName == "Tropical fish (marine)" ~ "Fish",
      lemisName == "Tropical fish (freshwater)" ~ "Fish",
      lemisName == "All live tropical fish (including goldfish)" ~ "Fish",
      lemisName == "Tropical fish (marine.)" ~ "Fish",
      lemisName == "Noncites entry" ~ "Miscellaneous",
      lemisName == "Non-cites bird" ~ "Birds",
      lemisName == "Unknown cites mammal" ~ "Terrestrial Mammals",
      lemisName == "Unknown mammalian" ~ "Terrestrial Mammals",
      lemisName == "Unknown cites bird" ~ "Birds",
      lemisName == "Noncites entry snake" ~ "Reptiles",
      lemisName == "Nonr reptiles" ~ "Reptiles",
      lemisName == "Tropical fish (freshwater.)" ~ "Fish",
      lemisName == "Tropical_fish freshwater" ~ "Fish",
      lemisName == "Tropical_fish marine" ~ "Fish",
      lemisName == "Spider species" ~ "Arachnids",
      lemisName == "Non-cites shells" ~ "Crustaceans and Molluscs",
      lemisName == "Mollusc shell" ~ "Crustaceans and Molluscs",
      lemisName == "Noncites crustaceans" ~ "Crustaceans and Molluscs",
      lemisName == "Noncites insects" ~ "Insecta and Myriapoda",
      lemisName == "Anuran" ~ "Amphibians",
      lemisName == "Noncites invertebrates" ~ "Insecta and Myriapoda",
      lemisName == "Noncites entry spanish fly" ~ "Insecta and Myriapoda",
      lemisName == "Crustaceans na" ~ "Crustaceans and Molluscs",
      lemisName == "Crustaceans crustacea" ~ "Crustaceans and Molluscs",
      lemisName == "Noncites butterflies" ~ "Lepidoptera",
      lemisName == "Non-cites butterflies" ~ "Lepidoptera",
      lemisName == "Noncites butterfly" ~ "Lepidoptera",
      lemisName == "Captive breed clownfish" ~ "Fish",
      lemisName == "Noncites fish" ~ "Fish",
      lemisName == "Noncites arachnids" ~ "Arachnids",
      lemisName == "Gastropods rochia conus" ~ "Crustaceans and Molluscs",
      lemisName == "Noncites molluscs" ~ "Crustaceans and Molluscs",
      lemisName == "Molluscs na" ~ "Crustaceans and Molluscs",
      lemisName == "Noncites mammals" ~ "Terrestrial Mammals",
      lemisName == "Noncites amphibians" ~ "Amphibians",
      str_detect(lemisName, "Tadpole") ~ "Amphibians",
      lemisName == "Platens omnodum" ~ "Plants",
      lemisName == "Noncites reptiles" ~ "Reptiles",
      lemisName == "Noncites birds" ~ "Birds",
      lemisName == "Animals museum specimens" ~ "Miscellaneous",
      lemisName == "Noncites coral" ~ "Echinoderms and Cnidaria",
      lemisName == "All plants" ~ "Plants",
      lemisName == "All plants na" ~ "Plants",
      lemisName == "Molluscs" ~ "Crustaceans and Molluscs",
      lemisName == "All live trop. Fsh (incl. goldfish)" ~ "Fish",
      lemisName == "Tropical fish" ~ "Fish",
      lemisName == "Noncites entry octocorals" ~ "Echinoderms and Cnidaria",
      lemisName == "Butterflies" ~ "Lepidoptera",
      lemisName == "Butterflies na" ~ "Lepidoptera",
      lemisName == "Noncites entry echinoderms" ~ "Echinoderms and Cnidaria",
      lemisName == "Plants herbariums, live" ~ "Plants",
      lemisName == "Molluscs moll" ~ "Crustaceans and Molluscs",
      lemisName == "Crustaceans" ~ "Crustaceans and Molluscs",
      lemisName == "Live tropical fish freshwater" ~ "Fish",
      lemisName == "Migratory bird gen. authorization" ~ "Birds",
      lemisName == "Tropical fish tropical fish" ~ "Fish",
      lemisName == "Live tropical fish live tropical fish" ~ "Fish",
      TRUE ~ group_
    ))

  lemisDataCorrected <- lemisDataCorrected %>%
    mutate(orderCorrected = case_when(
      lemisName == "Anuran" ~ "Anura",
      lemisName == "Noncites entry snake" ~ "Squamata",
      lemisName == "Lizard lizard" ~ "Squamata",
      lemisName == "Butterflies na" ~ "Lepidoptera",
      lemisName == "Noncites butterflies" ~ "Lepidoptera",
      TRUE ~ orderCorrected
    ))

  unique(lemisDataCorrected[is.na(lemisDataCorrected$group_),]$lemisName)
  unique(lemisDataCorrected[is.na(lemisDataCorrected$orderCorrected) | lemisDataCorrected$orderCorrected == "NA",]$corrected)

  unique(lemisDataCorrected[is.na(lemisDataCorrected$corrected),]$lemisName)
  sort(table(lemisDataCorrected[is.na(lemisDataCorrected$corrected),]$lemisName),
       decreasing = TRUE)

  lemisDataCorrected[is.na(lemisDataCorrected$corrected),]$corrected <- "REMAINS UNKNOWN"

  lemisDataCorrected$rank <- str_to_sentence(lemisDataCorrected$rank)

  for(spCode in speciesCodeFixes$species_code){
    # spCode <- "TUPR"
    print(spCode)
    lemisDataCorrected[which(lemisDataCorrected$species_code == spCode),]$corrected <-
      unique(speciesCodeFixes[which(speciesCodeFixes$species_code == spCode),]$corrected)
  }

  lemisDataCorrected$combinedGeneric <- paste(lemisDataCorrected$specific_name, lemisDataCorrected$generic_name)

  for(genName in genericFixes$combinedGeneric){
    # genName <- "NA SCORPION"
    # genName <- genericFixes$combinedGeneric[13]
    print(genName)

    lemisDataCorrected[which(lemisDataCorrected$corrected %in% c("REMAINS UNKNOWN") &
                               lemisDataCorrected$combinedGeneric == genName),]$correctedGenus <-
      genericFixes[which(genericFixes$combinedGeneric == genName),]$correctedGenus

    lemisDataCorrected[which(lemisDataCorrected$corrected %in% c("REMAINS UNKNOWN") &
                               lemisDataCorrected$combinedGeneric == genName),]$orderCorrected <-
      genericFixes[which(genericFixes$combinedGeneric == genName),]$orderCorrected

    lemisDataCorrected[which(lemisDataCorrected$corrected %in% c("REMAINS UNKNOWN") &
                               lemisDataCorrected$combinedGeneric == genName),]$rank <-
      genericFixes[which(genericFixes$combinedGeneric == genName),]$rank

    lemisDataCorrected[which(lemisDataCorrected$corrected %in% c("REMAINS UNKNOWN") &
                               lemisDataCorrected$combinedGeneric == genName),]$corrected <-
      genericFixes[which(genericFixes$combinedGeneric == genName),]$corrected

    lemisDataCorrected[which(lemisDataCorrected$corrected %in% c("REMAINS UNKNOWN") &
                               lemisDataCorrected$combinedGeneric == genName),]$group_ <-
      genericFixes[which(genericFixes$combinedGeneric == genName),]$group_
  }

  lemisDataCorrected[which(lemisDataCorrected$corrected %in% c("REMAINS UNKNOWN") &
                             is.na(lemisDataCorrected$group_)),]$group_ <- "Miscellaneous"

  lemisDataCorrected$entryID <- paste0("eID", str_pad(1:nrow(lemisDataCorrected),
                                                      width = nchar(nrow(lemisDataCorrected)),
                                                      side = "left", pad = "0"))

  speciesCodes_unknowns <- lemisDataCorrected %>%
    filter(!rank %in% c("Species", "Genus")) %>%
    filter(corrected == "REMAINS UNKNOWN") %>%
    select(species_code) %>%
    filter(!duplicated(species_code))

  write_csv(speciesCodes_unknowns, here("data", "nameReview", "speciesCodes_unknowns.csv"))

  lemisDataCorrected <- lemisDataCorrected %>%
    select(-combinedGeneric)

  save(lemisDataCorrected, file = here("data", "lemisDataCorrected"))
  # write.csv(lemisDataCorrected, file = here("data", "lemisDataCorrected_2023-11-11.csv"), row.names = FALSE)

  write_csv(lemisDataCorrected, here("data", "lemisDataCorrected_2023-11-11.csv.gz"))

  # write group-specific files ----------------------------------------------

  for(g in unique(lemisDataCorrected$group_)){
    # g <- unique(lemisDataCorrected$group_)[1]
    subsetLemisData <- lemisDataCorrected %>%
      filter(group_ == g)

    write_csv(subsetLemisData, here("data", paste0("lemisDataCorrected_", g,
                                                   "_2023-11-11.csv.gz")))

  }

  return(lemisDataCorrected)

}
