#!/usr/bin/env Rscript

## LIBRARY
library(ggplot2)
library(gridExtra)

## PARAM
METRICS_TSV="results/intensity_qc/NORDiC_2021.intensity_callset_metrics.biolqc_intensity_cnv_qc.tsv"
FAM_FILE="../NORDiC_GWAS_CNV_202103/results/sample_biological_qc/NORDiC_2021.MERGED.pruned_miss_sex_rel_EUR_nonoutlier_het.fam"
EIGENVEC_FILE="results/intensity_qc/NORDiC_2021.intensity_callset_metrics.biolqc_intensity_cnv_qc.PCA.eigenvec"
CNV_COUNTS_TSV="results/annot_cnv/NORDiC_2021.qual_cnv.outlier_pruned.30kb_rare_filtered.annot.CDS.counts_global.tsv"
# CNV_COUNTS_TSV="results/annot_cnv/NORDiC_2021.qual_cnv.outlier_pruned.annot.CDS.counts_global.tsv"
GSA_V1_DATASETS <- c("NORDiC-NOR_cases_2019_1",
                     "ANGI_PhaseII_Pedersen_Controls_GSA-MD_wave1",
                     "ANGI_PhaseII_Pedersen_Controls_GSA-MD_wave2")
GSA_V2_DATASETS <- c("NORDiC-NOR_cases_2019_2",
                     "NORDiC-SWE_cases_2018",
                     "LG500_ctrls_2019")
GSA_V3_DATASETS <- c("NORDiC-NOR_cases_2020",
                     "NORDiC-SWE_cases_2020")
GSA_DECODE_DATASETS <- c("norway_ctrls_2019",
                         "norway_ctrls_2020")
GSA_LIFTOVER_DATASETS <- GSA_DECODE_DATASETS
CTRL_DATASETS <- c("ANGI_PhaseII_Pedersen_Controls_GSA-MD_wave1",
                   "ANGI_PhaseII_Pedersen_Controls_GSA-MD_wave2",
                   "LG500_ctrls_2019",
                   "norway_ctrls_2019",
                   "norway_ctrls_2020")
DATASETS <- c(GSA_V1_DATASETS, GSA_V2_DATASETS, GSA_V3_DATASETS,
              GSA_DECODE_DATASETS)
# DUP_COLOR <- "#0072B2"
DUP_COLOR <- "#56B4E9"
# DUP_COLOR <- "#009E73"
DEL_COLOR <- "#D55E00"
# DEL_COLOR <- "#56B4E9"
DEL_COLOR <- "#EE6677"
DUP_COLOR <- "#4477AA"

DELDUP_COLORS <- c(DEL_COLOR, DUP_COLOR)

Main <- function() {

  # get args
  ARGS<- commandArgs(trailingOnly=T)
  if (length(ARGS) != 2) {
    cat("plot_global_cnv_burden_analysis_results.R <in.tsv> <outroot>\n")
    q()
  }
  in_tsv <- ARGS[1]
  outroot <- ARGS[2]

  # read results df
  out_df <- read.table(in_tsv, header=T, stringsAsFactors=F, sep="\t")

  # adjustments to input df
  out_df$predictor <- gsub("_pli_","_pLI_",out_df$predictor)

  # plot the nongenic/genic results
  predictors <- c("n_del_nongenic","n_dup_nongenic",
                  "n_del_genic","n_dup_genic")
  pd <- position_dodge(0.1)
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group == "ALL")
  out_df.i$predictor <- factor(out_df.i$predictor, 
                               levels=predictors)
  gg1 <- ggplot(out_df.i, aes(x=predictor, y=lg_est)) + 
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) + 
                geom_hline(yintercept=1) + 
                theme(axis.text=element_text(size=14),
                      axis.title.x=element_blank())
  ggsave(paste0(outroot, ".cnv_burden.genic_nongenic.pdf"))

  # plot by size bins
  pd <- position_dodge(0.1)
  predictors <- c("n_cnv_100kb_to_500kb",
                  "n_cnv_500kb_to_1mb",
                  "n_cnv_gt_1mb")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group == "ALL")
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=predictors)
  gg1 <- ggplot(out_df.i, aes(x=predictor, y=lg_est)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                theme(axis.text=element_text(size=14),
                      axis.title.x=element_blank())
  ggsave(paste0(outroot, ".cnv_burden.size_bins.pdf"))

  # plot by zoonomia constraint bins  (global) 
  PLOT_CONS = TRUE
  if (PLOT_CONS == TRUE) {

  cons_bins <- c("neg20toneg2","neg2toneg1","neg1to0",
                 "0to1","1to2","2to12")
  cons_bins <- c("__","0to3","3to6","6to9")
  cons_bins_del <- paste0("nkbp_del_cons", cons_bins)
  cons_bins_dup <- paste0("nkbp_dup_cons", cons_bins)
  cons_bins_del <- gsub("__","",cons_bins_del)
  cons_bins_dup <- gsub("__","",cons_bins_dup)
  cons_bins_formatted <- c("0 - 9", "0 to 3", "3 to 6", "6 to 9")
  
  # del
  out_df.i <- subset(out_df, predictor %in% cons_bins_del)
  gg <- ggplot(out_df.i, aes(y=predictor, x=lg_est))
  gg <- gg + geom_pointrange(aes(xmin=lg_est_95ci_l, xmax=lg_est_95ci_u))
  gg <- gg + theme(axis.title.x = element_blank(),
                   axis.title.y = element_blank(),
                   axis.text.y = element_text(angle = 45, hjust = 1))
  gg <- gg + geom_vline(xintercept=1)
  ggsave(gg, file=paste0(outroot, ".nkbp_del_burden.lg.zoonomia.pdf")) 
  gg <- ggplot(out_df.i, aes(y=predictor, x=lm_est))
  gg <- gg + geom_pointrange(aes(xmin=lm_est_95ci_l, xmax=lm_est_95ci_u))
  gg <- gg + theme(axis.title.x = element_blank(),
                   axis.title.y = element_blank(),
                   axis.text.y = element_text(angle = 45, hjust = 1))
  gg <- gg + geom_vline(xintercept=0)
  ggsave(gg, file=paste0(outroot, ".nkbp_del_burden.lm.zoonomia.pdf"))

  # dup
  out_df.i <- subset(out_df, predictor %in% cons_bins_dup)
  out_df.i$predictor <- factor(out_df.i$predictor, 
                             levels=cons_bins_formatted)
  gg <- ggplot(out_df.i, aes(y=predictor, x=lg_est))
  gg <- gg + geom_pointrange(aes(xmin=lg_est_95ci_l, xmax=lg_est_95ci_u))
  gg <- gg + xlab("case/control odds ratio")
  gg <- gg + theme(axis.title.x = element_blank(),
                   axis.text.y = element_text(angle = 45, hjust = 1))
  gg <- gg + geom_vline(xintercept=1)
  ggsave(gg, file=paste0(outroot, ".nkbp_dup_burden.lg.zoonomia.pdf"))
  gg <- ggplot(out_df.i, aes(y=predictor, x=lm_est))
  gg <- gg + geom_pointrange(aes(xmin=lm_est_95ci_l, xmax=lm_est_95ci_u))
  gg <- gg + xlab("case/control rate difference")
  gg <- gg + theme(axis.title.x = element_blank(),
                   axis.title.y = element_blank(),
                   axis.text.y = element_text(angle = 45, hjust = 1))
  gg <- gg + geom_vline(xintercept=0)
  ggsave(gg, file=paste0(outroot, ".nkbp_dup_burden.lm.zoonomia.pdf"))

  }

  # plot n cnv by group (ALL, leave-one-out)
  titles <- list("del"="deletions", "dup"="duplications")
  for (x in c("del","dup")){
    pd <- position_dodge(0.3)
    out_df.i <- subset(out_df, predictor == paste0("n_",x))
    out_df.i$group <- gsub("leaveoneout_",
                           "leave one out-\n",
                           out_df.i$group)
    out_df.i$group <- factor(out_df.i$group,
                             levels=rev(sort(out_df.i$group)))
    out_df.i$pval <- PvalFix(out_df.i$lg_p)
    gg1 <- ggplot(out_df.i, aes(x=group, 
                                y=lg_est, 
                                label=pval)) +
                  geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                                width=0.1, position=pd) +
                  geom_point(position=pd) +
                  geom_hline(yintercept=1) +
                  theme(axis.text=element_text(size=14),
                        axis.title.y=element_blank(),
                        legend.position=c(0.8,0.8)) +
                  ylab("case/control odds ratio") + 
                  coord_flip() + 
                  geom_text(vjust=-1, angle=0, size=3) +
                  ggtitle(titles[[x]]) +
    ggsave(paste0(outroot, ".",x,"_burden.all_loo.pdf"),
           width=5, height=4)
  }

  # plot dels, dups global burden (all, loo)
  pd <- position_dodge(0.3)
  predictors <- c("n_del","n_dup")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, grepl("leaveoneout_", group) | (group=="ALL"))
  out_df.i$group <- gsub("leaveoneout_",
                         "leave one out-\n",
                         out_df.i$group)
  groups <- rev(sort(subset(out_df.i, predictor == "n_del")$group))
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),"del","dup")
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$group <- factor(out_df.i$group,
                           levels=groups)
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=group, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(1), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.all_loo.pdf"),
         width=6, height=4)

  # plot dels, dups global burden (all, covariate loo)
  pd <- position_dodge(0.3)
  predictors <- c("n_del","n_dup")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, grepl("covarloo", group))
  out_df.i$group <- gsub("covarloo_",
                         "leave one out-\n",
                         out_df.i$group)
  groups <- rev(sort(subset(out_df.i, predictor == "n_del")$group))
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),"del","dup")
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$group <- factor(out_df.i$group,
                           levels=groups)
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=group, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(1), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.loo_covar.pdf"),
         width=6, height=4)

  # plot by pLI bins (dels)
  pd <- position_dodge(0.1)
  predictors <- c("n_del_pLI_0_to_50",
                  "n_del_pLI_50_to_995",
                  "n_del_pLI_995_to_1")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=predictors)
  gg1 <- ggplot(out_df.i, aes(x=predictor, y=lg_est)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                theme(axis.text=element_text(size=14),
                      axis.title.x=element_blank())
  ggsave(paste0(outroot, ".del_burden.pLI_bins.pdf"))

  # plot by pLI bins (dups)
  pd <- position_dodge(0.1)
  predictors <- c("n_dup_pLI_0_to_50",
                  "n_dup_pLI_50_to_995",
                  "n_dup_pLI_995_to_1")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=predictors)
  gg1 <- ggplot(out_df.i, aes(x=predictor, y=lg_est)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                theme(axis.text=element_text(size=14),
                      axis.title.x=element_blank())
  ggsave(paste0(outroot, ".dup_burden.pLI_bins.pdf"))
 
  # plot by pLI bins (small dels)
  predictors <- c("n_del_g1_pLI_0_to_50",
                  "n_del_g1_pLI_50_to_995",
                  "n_del_g1_pLI_995_to_1")
  pd <- position_dodge(0.1)
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=predictors)
  gg1 <- ggplot(out_df.i, aes(x=predictor, y=lg_est)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                theme(axis.text=element_text(size=14),
                      axis.title.x=element_blank())
  ggsave(paste0(outroot, ".del_g1_burden.pLI_bins.pdf"))
  
  # plot by pLI bins (small dups)
  predictors <- c("n_dup_g1_pLI_0_to_50",
                  "n_dup_g1_pLI_50_to_995",
                  "n_dup_g1_pLI_995_to_1")
  pd <- position_dodge(0.1)
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=predictors)
  gg1 <- ggplot(out_df.i, aes(x=predictor, y=lg_est)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                theme(axis.text=element_text(size=14),
                      axis.title.x=element_blank())
  ggsave(paste0(outroot, ".dup_g1_burden.pLI_bins.pdf"))
  
  # plot dels, dups by genic/nongenic
  pd <- position_dodge(0.3)
  predictors <- c("n_del_genic","n_dup_genic",
                  "n_del_nongenic", "n_dup_nongenic")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),"del","dup")
  out_df.i$predictor <- gsub("n_del_","",out_df.i$predictor)
  out_df.i$predictor <- gsub("n_dup_","",out_df.i$predictor)
  print(out_df.i)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("genic","nongenic")
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c(
                                          "genic","nongenic")) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.nongenicgenic.pdf"),
         width=5, height=4)
  
  # plot dels, dups by genic/nongenic (single gene CNVs only)
  pd <- position_dodge(0.3)
  predictors <- c("n_del_g1_genic","n_dup_g1_genic",
                  "n_del_g1_nongenic", "n_dup_g1_nongenic")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$predictor <- gsub("_g1","",out_df.i$predictor)
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),"del","dup")
  out_df.i$predictor <- gsub("n_del_","",out_df.i$predictor)
  out_df.i$predictor <- gsub("n_dup_","",out_df.i$predictor)
  print(out_df.i)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("genic","nongenic")
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c(
                                          "genic","nongenic")) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.nongenicgenic.g1.pdf"),
         width=5, height=4)

  # plot dels, dups by genic/nongenic, pLI bins
  pd <- position_dodge(0.3)
  predictors <- c("n_del_genic","n_del_nongenic",
                  "n_del_pLI_995_to_1","n_del_pLI_50_to_995","n_del_pLI_0_to_50",
                  "n_dup_genic","n_dup_nongenic",
                  "n_dup_pLI_995_to_1","n_dup_pLI_50_to_995","n_dup_pLI_0_to_50")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),
                              "del", "dup")
  out_df.i$predictor <- gsub("n_del_","",out_df.i$predictor)
  out_df.i$predictor <- gsub("n_dup_","",out_df.i$predictor)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("pLI_995_to_1", 
                                        "pLI_50_to_995",
                                        "pLI_0_to_50",
                                        "genic","nongenic")
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=13),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c("pLI\n0.995 - 1",
                                          "pLI\n0.5 - 0.995",
                                          "pLI\n0 - 0.5",
                                          "genic","nongenic")) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.nongenicgenic_pLI_bins.pdf"),
         width=5, height=4)

  # plot dels, dups by genic/nongenic, pLI bins (small cnvs)
  pd <- position_dodge(0.3)
  predictors <- c("n_del_g1_genic",
                  "n_del_g1_nongenic",
                  "n_del_g1_pLI_995_to_1",
                  "n_del_g1_pLI_50_to_995",
                  "n_del_g1_pLI_0_to_50",
                  "n_dup_g1_genic",
                  "n_dup_g1_nongenic",
                  "n_dup_g1_pLI_995_to_1",
                  "n_dup_g1_pLI_50_to_995",
                  "n_dup_g1_pLI_0_to_50"
                 )
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$predictor <- gsub("_g1","",out_df.i$predictor)
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),
                              "del", "dup")
  out_df.i$predictor <- gsub("n_del_","",out_df.i$predictor)
  out_df.i$predictor <- gsub("n_dup_","",out_df.i$predictor)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("pLI_995_to_1", 
                                        "pLI_50_to_995",
                                        "pLI_0_to_50",
                                        "genic","nongenic")
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c("pLI 0.995 - 1",
                                          "pLI 0.5 - 0.995",
                                          "pLI 0 - 0.5",
                                          "genic","nongenic")) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.nongenicgenic_pLI_bins.g1.pdf"),
         width=5, height=4)

  # plot genic CNV res by pHaplo/pTriplo
  pd <- position_dodge(0.3)
  predictors <- c('n_del_phaplo_ge_86',
                  'n_del_phaplo_lt_86',
                  'n_del_ptriplo_ge_94',
                  'n_del_ptriplo_lt_94',
                  'n_dup_phaplo_ge_86',
                  'n_dup_phaplo_lt_86',
                  'n_dup_ptriplo_ge_94',
                  'n_dup_ptriplo_lt_94')
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),
                              "del", "dup")
  out_df.i$predictor <- gsub("n_del_","",out_df.i$predictor)
  out_df.i$predictor <- gsub("n_dup_","",out_df.i$predictor)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("phaplo_ge_86", 
                                        "phaplo_lt_86",
                                        "ptriplo_ge_94",
                                        "ptriplo_lt_94")
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c("pHaplo >= 0.86",
                                          "pHaplo < 0.86",
                                          "pTriplo >= 0.94",
                                          "pTriplo < 0.94")) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.pHaplo_pTriplo.pdf"),
         width=5, height=4)
  
  # plot del, dup by size bins
  pd <- position_dodge(0.3)
  predictors <- c("n_del_30kb_to_100kb",
                  "n_del_100kb_to_500kb","n_del_500kb_to_1mb","n_del_gt_1mb",
                  "n_dup_30kb_to_100kb",
                  "n_dup_100kb_to_500kb","n_dup_500kb_to_1mb","n_dup_gt_1mb")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),
                              "del", "dup")
  out_df.i$predictor <- gsub("n_del_","",out_df.i$predictor)
  out_df.i$predictor <- gsub("n_dup_","",out_df.i$predictor)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("gt_1mb", "500kb_to_1mb",
                                        "100kb_to_500kb"),
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c(">1MB", "500KB-1MB", 
                                          "100KB-500KB", "30KB-100KB")) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.size_bins.pdf"),
         width=5, height=4)

  # plot genic CNV res by pHaplo/pTriplo
  # plot case/control results centered on neurodevelopmental genes + loci
  pd <- position_dodge(0.3)
  predictors <- c('n_del_neurodev',
                  'n_del_NDD',
                  'n_del_NDD0',
                  'n_dup_neurodev',
                  'n_dup_NDD',
                  'n_dup_NDD0')
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),
                              "del", "dup")
  out_df.i$predictor <- gsub("n_del_","",out_df.i$predictor)
  out_df.i$predictor <- gsub("n_dup_","",out_df.i$predictor)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("NDD0", 
                                        "NDD",
                                        "neurodev")
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c("non-NDD genes\n(Fu et al.)",
                                          "NDD genes\n(Fu et al., n=664)",
                                          "NDD CNVs\n(Kendall et al. 2019)")) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.neurodevelopmental.pdf"),
         width=5, height=4)
 

  # plot by freq bins
  pd <- position_dodge(0.3)
  predictors <- c("n_del_freq0to1", "n_del_freq2to5", "n_del_freq6to10",
                  "n_del_freq11to38",
                  "n_dup_freq0to1", "n_dup_freq2to5", "n_dup_freq6to10",
                  "n_dup_freq11to38")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$cnv_type <- ifelse(grepl("n_del", out_df.i$predictor),
                              "del", "dup")
  out_df.i$predictor <- gsub("n_del_","",out_df.i$predictor)
  out_df.i$predictor <- gsub("n_dup_","",out_df.i$predictor)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("freq0to1",  "freq2to5",
                                        "freq6to10", "freq11to38"),
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c("1", "2-5", 
                                          "6-10", ">10")) +
                ylab("case/control odds ratio") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.freq_bins.pdf"),
         width=5, height=4)

  # plot by constraint bins
  pd <- position_dodge(0.3)
  predictors <- c("nkbp_del_cons","nkbp_del_cons0to3",
                  "nkbp_del_cons3to6","nkbp_del_cons6to9",
                  "nkbp_dup_cons","nkbp_dup_cons0to3",
                  "nkbp_dup_cons3to6","nkbp_dup_cons6to9")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$cnv_type <- ifelse(grepl("_del_", out_df.i$predictor),
                              "del", "dup")
  out_df.i$predictor <- gsub("nkbp_del_cons","",out_df.i$predictor)
  out_df.i$predictor <- gsub("nkbp_dup_cons","",out_df.i$predictor)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("6to9",  "3to6",
                                        "0to3", ""),
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c(">6", "3-6", 
                                          "0-3", ">0")) +
                ylab("case/control odds ratio per additional kilobase") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.cons_bins.pdf"),
         width=5, height=4)

  # plot by constraint bins (noncoding cnvs only)
  pd <- position_dodge(0.3)
  predictors <- c("nkbp_del_cons_nc","nkbp_del_cons0to3_nc",
                  "nkbp_del_cons3to6_nc","nkbp_del_cons6to9_nc",
                  "nkbp_dup_cons_nc","nkbp_dup_cons0to3_nc",
                  "nkbp_dup_cons3to6_nc","nkbp_dup_cons6to9_nc")
  out_df.i <- subset(out_df, predictor %in% predictors)
  out_df.i <- subset(out_df.i, group =="ALL")  
  out_df.i$cnv_type <- ifelse(grepl("_del_", out_df.i$predictor),
                              "del", "dup")
  out_df.i$predictor <- gsub("nkbp_del_cons","",out_df.i$predictor)
  out_df.i$predictor <- gsub("nkbp_dup_cons","",out_df.i$predictor)
  out_df.i$predictor <- gsub("_nc","",out_df.i$predictor)
  out_df.i$cnv_type <- factor(out_df.i$cnv_type, levels=c("del","dup"))
  out_df.i$predictor <- factor(out_df.i$predictor,
                               levels=c("6to9",  "3to6",
                                        "0to3", ""),
                               )
  out_df.i$pval <- PvalFix(out_df.i$lg_p)
  gg1 <- ggplot(out_df.i, aes(x=predictor, 
                              y=lg_est, 
                              group=cnv_type,
                              fill=cnv_type,
                              label=pval,
                              color=cnv_type)) +
                geom_errorbar(aes(ymin=lg_est_95ci_l, ymax=lg_est_95ci_u),
                              width=0.1, position=pd) +
                geom_point(position=pd) +
                geom_hline(yintercept=1) +
                labs(color="CNV type", fill="CNV type") +
                theme(axis.text=element_text(size=14),
                      axis.title.y=element_blank(),
                      legend.position=c(0.8,0.8)) +
                scale_x_discrete(labels=c(">6", "3-6", 
                                          "0-3", ">0")) +
                ylab("case/control odds ratio per additional kilobase") + 
                coord_flip() + 
                geom_text(position=position_dodge(0.8), angle=0, size=3) +
                scale_color_manual(values=DELDUP_COLORS)
  ggsave(paste0(outroot, ".del_dup_burden.cons_bins.noncoding_cnvs_only.pdf"),
         width=5, height=4)



  q()

}

# fix pval cols
PvalFix <- function(pvals, pvals_digits=2) {
  pvals.x <- ifelse(pvals <= 1,
                    paste0("p = ", 
                           formatC(pvals, format='e', digits=pvals_digits)),
                    "")
  return(pvals.x)
}

if (interactive() == F) {
  Main()
}
