
## PARAM
COVARS <- c(paste0("PC",c(1,2,3,4,5)),"FEMALE","LRR_SD")

Main <- function(){
  ARGS <- commandArgs(trailingOnly=T)
  if (length(ARGS) != 7) {
    cat("prs_deleterious_cnv_absense.R <carrier_prs.tsv> <metrics.tsv> ",
        "<plink.eigenvec> <height.plink.profile> <ocd.plink.profile> <cdg.plink.profile> <results.tsv>\n")
    q()
  }

  # collect user-defined input
  carrier_prs.tsv <- ARGS[1]
  metrics.tsv <- ARGS[2]
  plink.evec <- ARGS[3]
  height.plink.profile <- ARGS[4]
  ocd.plink.profile <- ARGS[5]
  cdg.plink.profile <- ARGS[6]
  results.tsv <- ARGS[7]

  # read input table, string away any existing PRS cols
  df <- read.table(carrier_prs.tsv,
                   header=T,
                   stringsAsFactors=F,
                   sep="\t")
  df$IID <- df$Analysis_ID
  df$OCD_PRS <- NULL

  # read plink eigenvec file and merge PCs into df
  evec <-read.table(plink.evec, stringsAsFactors=F)
  colnames(evec) <- c("FID","IID",paste0("PC",1:20))
  evec$FID <- NULL
  df <- merge(df, evec, by="IID")

  # read metrics table and merge key values into df
  metrics <- read.table(metrics.tsv, header=T, stringsAsFactors=F)
  metrics <- metrics[,c("IID","FEMALE","LRR_SD")]
  df$FEMALE <- NULL
  df <- merge(metrics, df, by="IID")

  # define generic deleterious cnv rows
  deleterious_cnv_rows <- c("has_neurodev_cnv",
                            "has_cnv_pli995",
                            "has_cnv_nddgene")
  
  # add a column for presence in norway dataset
  df$norway <- ifelse(grepl("NORDiC-NOR",df$DATASET), 1, 0)

  # add ocd PRS
  profile <- read.table(ocd.plink.profile, header=T,stringsAsFactors=F)
  profile <- profile[,c("IID","SCORE")]
  colnames(profile)[2] <- "OCD_PRS"
  df <- merge(df, profile, by="IID")
  df$OCD_PRS_Z <- (df$OCD_PRS - mean(df$OCD_PRS))/sd(df$OCD_PRS)

  # add cross disorder PRS 
  profile <- read.table(cdg.plink.profile, header=T,stringsAsFactors=F)
  profile <- profile[,c("IID","SCORE")]
  colnames(profile)[2] <- "CDG_PRS"
  df <- merge(df, profile, by="IID")
  df$CDG_PRS_Z <- (df$CDG_PRS - mean(df$CDG_PRS))/sd(df$CDG_PRS) 

  # add standing height PRS
  profile <- read.table(height.plink.profile, header=T,stringsAsFactors=F)
  profile <- profile[,c("IID","SCORE")]
  colnames(profile)[2] <- "height_PRS"
  df <- merge(df, profile, by="IID")
  df$height_PRS_Z <- (df$height_PRS - mean(df$height_PRS))/sd(df$height_PRS) 

  # define result data frame
  out_df <- data.frame(prs=character(),
                       cnv_type=character(),
                       lm_est=numeric(),
                       lm_95ci_l=numeric(),
                       lm_95ci_u=numeric(),
                       lm_p=numeric())

  # for each PRS ..
  prs_assess<-c("height","OCD","CDG")
  for (prs in prs_assess) {

    # get z col
    z_col <- paste0(prs, "_PRS_Z")

    # create copy of initial df to store prs values in
    df.x <- df

    # for each cnv type ..
    for (x in c("del","dup")) {

      # get binary indicator for whether or not sample has at least one 
      # deleterious cnv
      deleterious_cnv_rows_x <- gsub("cnv", x, deleterious_cnv_rows)
      df.x$deleterious_cnv <- rowSums(df.x[, deleterious_cnv_rows_x, drop=F])
      df.x$deleterious_cnv_carrier <- ifelse(df.x$deleterious_cnv >= 1, 1, 0)

      # test for association between deleterious cnv carrier status and PRS,
      # controlling for source dataset and sex
      outcome <- "deleterious_cnv_carrier"
      pred <- "PRS_Z_bottom"
      pred <- z_col
      mdl_str <- paste0(pred," ~ ",  
                        paste(COVARS, collapse="+"),
                        "+",outcome)
      mdl <- lm(as.formula(mdl_str), data=df.x)
      res <- summary(mdl)$coefficients
      lm_ci <- confint(mdl)
      out_df <- rbind(out_df, data.frame(prs=prs,
                                         cnv_type=x,
                                         lm_est=res[outcome,1],
                                         lm_95ci_l=lm_ci[outcome, 1],
                                         lm_95ci_u=lm_ci[outcome, 2],
                                         lm_p=res[outcome,4]))
    
    }

  }

  # write output df to file
  write.table(out_df,
              file=results.tsv,
              row.names=F, col.names=T, sep="\t", quote=F)
  
}

if (interactive() == F) {
  Main()
}
