

library(ggplot2)

Main <- function(){
  ARGS <- commandArgs(trailingOnly=T)
  if (length(ARGS) != 4) {
    cat("cons_analysis.R <in.tbl.tsv> <in.bed> <in.zoonomia.annot.tsv> ",
        "<outroot>\n")
    q()
  }
  in.tbl.tsv <- ARGS[1]
  in.bed <- ARGS[2]
  in.zoonomia.annot.tsv <- ARGS[3]
  outroot <- ARGS[4]

  # read table
  tbl <- read.table(in.tbl.tsv, header=T, sep="\t", stringsAsFactors=F)
  rownames(tbl) <- tbl$IID
  cnv <- read.table(in.bed, stringsAsFactors=F, header=F)
  colnames(cnv) <- c("chrom","start0","end", "locus", "cnvtype", "iid")
  zoonomia <- read.table(in.zoonomia.annot.tsv, 
                         check.names=F,
                         header=T, stringsAsFactors=F)
  colnames(zoonomia) <- gsub("-","neg",colnames(zoonomia))  
  
  # annotate with cons scores
  cons_cols <- c("neg20toneg2","neg2toneg1","neg1to0","0to1","1to2","2to9")
  for (cons_col in cons_cols) {
    tbl[[paste0("nbp_",cons_col)]] <- rep(0,nrow(tbl))  
  }
  tbl$nbp_cons <- rowSums(tbl[,paste0("nbp_",cons_cols)])
  cat("reading cons nbp ..")
  for (iid.i in rownames(tbl)) {
    cnv.i <- subset(cnv, iid == iid.i)
    if (nrow(cnv.i) > 0) {
      zoonomia.i <- subset(zoonomia, locus %in% cnv.i$locus)
      for (cons_col in cons_cols) {
        tbl[iid.i, paste0("nbp_",cons_col)] <- sum(zoonomia.i[[cons_col]])

      }
    }
  }
  cat("done.\n")
  ctrl.1 <- c("ANGI_PhaseII_Pedersen_Controls_GSA-MD_wave1")
  tbl <- subset(tbl, grepl("ANGI", tbl$dataset))
  tbl$CTRL1 <- ifelse(tbl$dataset == ctrl.1, 1, 0)
  res_df <- data.frame(predictor=character(),
                       lm_est=numeric(),
                       lm_ci_95l=numeric(),
                       lm_ci_95u=numeric(),
                       lm_p=numeric())
  for (cons_col in cons_cols) {
    cons_col <- paste0("nbp_",cons_col)
    tbl[[cons_col]] <- tbl[[cons_col]] / 1000
    formula.i <- as.formula(paste0(cons_col,"~CTRL1"))
    mdl.i <- lm(formula.i, data=tbl)
    res.i <- summary(mdl.i)
    ci.i <- confint(mdl.i)
    res_df <- rbind(res_df, 
                    data.frame(predictor=cons_col,
                               lm_est=res.i$coefficients["CTRL1",1],
                               lm_ci_95l=ci.i[1],
                               lm_ci_95u=ci.i[2],
                               lm_p=res.i$coefficients["CTRL1",4])
                    )
  }
  res_df$predictor <- gsub("neg","-",res_df$predictor)
  res_df$predictor <- factor(res_df$predictor, 
                             levels=res_df$predictor)
      gg <- ggplot(res_df, aes(y=predictor, x=lm_est))
      gg <- gg + geom_pointrange(aes(xmin=lm_ci_95l, xmax=lm_ci_95u))
      gg <- gg + theme(axis.title.x = element_blank(),
                       axis.title.y = element_blank(),
                       axis.text.y = element_text(angle = 45, hjust = 1))
      gg <- gg + geom_vline(xintercept=0)
      ggsave(gg, file=paste0(outroot, ".cons_bins.kbp.ANGI_cmp.pdf")) 

      res_df <- data.frame(predictor=character(),
                       lg_est=numeric(),
                       lg_ci_95l=numeric(),
                       lg_ci_95u=numeric(),
                       lg_p=numeric())
  for (cons_col in cons_cols) {
    cons_col <- paste0("nbp_",cons_col)
    formula.i <- as.formula(paste0("CTRL1~",cons_col))
    mdl.i <- glm(formula.i, family=binomial, data=tbl)
    res.i <- summary(mdl.i)
    ci.i <- confint(mdl.i)
    res_df <- rbind(res_df, 
                    data.frame(predictor=cons_col,
                               lg_est=exp(res.i$coefficients[cons_col,1]),
                               lg_ci_95l=exp(ci.i[cons_col,1]),
                               lg_ci_95u=exp(ci.i[cons_col,2]),
                               lg_p=res.i$coefficients[cons_col,4])
                    )
  }
  print(res_df)
  res_df$predictor <- gsub("neg","-",res_df$predictor)
  res_df$predictor <- factor(res_df$predictor, 
                             levels=res_df$predictor)
      gg <- ggplot(res_df, aes(y=predictor, x=lg_est))
      gg <- gg + geom_pointrange(aes(xmin=lg_ci_95l, xmax=lg_ci_95u))
      gg <- gg + theme(axis.title.x = element_blank(),
                       axis.title.y = element_blank(),
                       axis.text.y = element_text(angle = 45, hjust = 1))
      gg <- gg + geom_vline(xintercept=1)
      ggsave(gg, file=paste0(outroot, ".cons_bins.kbp_ORs.ANGI_cmp.pdf")) 


}


if (interactive() == F) {
  Main()
}
