#' Filter outliers
#'
#' @name filter_outliers
#' @description A function to remove the worse aspects of the data. I.e., those
#'   based on on-count measures and those entries listed worse than genus in
#'   terms of taxonomic resolution. Removes any outliers, by year by group,
#'   based on those exceeding an order of magnitude more than the other entries
#'   in that year.
#' @return A dataframe, a filter version of the input.
#'
#' @export
filter_outliers <- function(lemisDataFiltered, knownOutliers, filterOpt){

  # targets::tar_load("lemisDataFiltered")
  # targets::tar_load("knownOutliers")
  # filterOpt <- optionsFiltering$filter[1]
  # filterOpt <- "mag2_tenK"

  lemisDataFiltered_whole <- lemisDataFiltered %>%
    filter(unit %in% c("NO")) %>%
    filter(description %in% c("BOD", "EGL", "DEA", "LIV", "SPE", "SKI", "TRO")) %>%
    filter(quantity > 0)

  exclusionsWhole <- lemisDataFiltered[which(!lemisDataFiltered$entryID %in%
                                               lemisDataFiltered_whole$entryID),]

  lemisDataFiltered_outliersRemoved <- lemisDataFiltered_whole %>%
    filter(!entryID %in% knownOutliers$entryID)

  if(!paste0("exclusionsWhole_", Sys.Date(), ".csv.gz") %in%
     list.files(here::here("data"), pattern = ".csv")){

    write_csv(exclusionsWhole, here::here("data",
                                          paste0("exclusionsWhole_", Sys.Date(), ".csv.gz")))
  }

  if(str_detect(filterOpt, "tenKonly")){
    lemisDataFiltered_outliersRemoved <- lemisDataFiltered_outliersRemoved %>%
      filter(quantity < 10000)
  }

  if(str_detect(filterOpt, "genPlus")){
    lemisDataFiltered_outliersRemoved <- lemisDataFiltered_outliersRemoved %>%
    filter(rank == "Species" | rank == "Genus")
  }

  if(str_detect(filterOpt, "mag2")){
    qtyData_magCounts <- lemisDataFiltered_outliersRemoved %>%
      group_by(correctedGenus) %>%
      mutate(orderMagnitudeQty = 10^(floor(log10(quantity))),
             entriesInGenus = n()) %>%
      group_by(correctedGenus, orderMagnitudeQty) %>%
      add_count(name = "orderMagCountperGenus")

    lemisDataFiltered_outliersRemoved <- qtyData_magCounts %>%
      filter(entriesInGenus < 5 | orderMagCountperGenus > 1) %>%
      filter(!is.na(group_)) %>%
      filter(!group_ == "Miscellaneous")
  }

  if(str_detect(filterOpt, "mag2_tenK")){
    qtyData_magCounts <- lemisDataFiltered_outliersRemoved %>%
      group_by(correctedGenus) %>%
      mutate(orderMagnitudeQty = 10^(floor(log10(quantity))),
             entriesInGenus = n()) %>%
      group_by(correctedGenus, orderMagnitudeQty) %>%
      add_count(name = "orderMagCountperGenus")

    lemisDataFiltered_outliersRemoved <- qtyData_magCounts %>%
      filter(entriesInGenus < 5 | orderMagCountperGenus > 1 |
               (orderMagCountperGenus == 1 & quantity < 10000)) %>%
      filter(!is.na(group_)) %>%
      filter(!group_ == "Miscellaneous") %>%
      filter(quantity < 40000000)

  }

  exclusionsOutliers <- lemisDataFiltered_whole[which(!lemisDataFiltered_whole$entryID %in%
                                                        lemisDataFiltered_outliersRemoved$entryID),]

  lemisDataFiltered_outliersRemoved <- lemisDataFiltered_outliersRemoved %>%
    filter(!is.na(group_))

  write_csv(exclusionsOutliers,
            here::here("data",
                       paste0("exclusionsOutliers_", Sys.Date(), "_", filterOpt, ".csv")))

  write_csv(lemisDataFiltered_outliersRemoved,
            here("data", paste0("lemisDataOutliersRemoved_",
                                Sys.Date(), "_", filterOpt, ".csv.gz")))

  return(lemisDataFiltered_outliersRemoved)

}
