#!/usr/local/bin/Rscript

## LIBRARY
library(optparse)
library(ggplot2)
library(gridExtra)

Main <- function() {

  # Get command-line arguments & options
  option_list <- list(
    make_option(c("--first-n-pcs"), type="numeric", default=8,
                help="first n pcs to plot [default %default]", metavar="numeric"),
    make_option(c("--fam-file"), type="character", default=NA,
                help="PLINK fam file with raw case/control defs [default %default]",
                metavar="character"),
    make_option(c("--dataset-iid-tsv"), type="character", default=NA,
                help="file with dataset as col1 and iid as col2 [default %default]",
                metavar="character"),
    make_option(c("--out-csv"), type="character", default="out.csv",
                help="output CSV with samples remaining after each step [default %default]",
                metavar="character")

  )
  args <- parse_args(OptionParser(usage="%prog dataset_iid.tsv step1:step1.fidiid.tsv .. stepN:stepN.fidiid.tsv",
                                  option_list=option_list),
                     positional_arguments=TRUE)
  opts <- args$options

  # print param to stdout
  cat("PARAMETERS : \n")
  for (opt_i in names(opts)) {
    if (opt_i != "help") {
      cat(opt_i, ":", opts[[opt_i]],"\n")
    }
  }

  # Writes args & opts to vars
  dataset_iid_tsv <- args$args[1]
  steps_fidiids <- args$args[-(1)]
  out_csv <- opts[["out-csv"]]

  # read dataset/iid file
  ds_iid <- read.table(dataset_iid_tsv, header=T)
  datasets <- unique(sort(ds_iid[,1]))
  
  # init output df
  out_df <- data.frame(dataset=datasets)
  rownames(out_df) <- datasets

  # for each step/fidiid, write sample counts to df
  for (i in 1:length(steps_fidiids)) {
    step_fidiid <- strsplit(steps_fidiids[i],":")[[1]]
    step <- step_fidiid[1]
    fidiid_tsv <- step_fidiid[2]
    fidiid <- read.table(fidiid_tsv, stringsAsFactors=F)
    if (ncol(fidiid) == 1) { fidiid[,2] <- fidiid[,1] }
    out_df[[step]] <- rep(0, nrow(out_df))
    for (ds.i in datasets) {
      iids <- ds_iid[ds_iid[,1] == ds.i, 2]
      fidiid.i <- fidiid[ fidiid[,2] %in% iids, , drop=F]
      n.i <- nrow(fidiid.i)
      out_df[ds.i, step] <- n.i
      
    }

  }

  # write df to output csv  file
  write.csv(out_df,
            file=out_csv,
            row.names=F,
            quote=F)

}

if (interactive() == F) {
  Main()
}

