
library(ggplot2)

## PARAM
PD <- 1.0
PD_P <- 1.4
DEL_COLOR <- "#EE6677"
DUP_COLOR <- "#4477AA"
DELDUP_COLORS <- c(DEL_COLOR, DUP_COLOR)

Main <- function(){
  ARGS <- commandArgs(trailingOnly=T)
  if (length(ARGS) != 5) {
    cat("plot_cnv_recurrence_results.R ",
        "<permutations.del.tsv> <results.del.tsv> ",
        "<permutations.dup.tsv> <results.dup.tsv> ",
        "<out.pdf>\n")
    q()
  }
  permutations.del.tsv <- ARGS[1]
  results.del.tsv <- ARGS[2]
  permutations.dup.tsv <- ARGS[3]
  results.dup.tsv <- ARGS[4] 
  out.pdf <- ARGS[5]

  # read files
  del.perm.df <- read.table(permutations.del.tsv, header=T)
  dup.perm.df <- read.table(permutations.dup.tsv, header=T)
  del.res.df <- read.table(results.del.tsv, header=T)
  dup.res.df <- read.table(results.dup.tsv, header=T)

  # combine result dfs
  res.df <- rbind(del.res.df,
                  dup.res.df)
  res.df$cnv_type <- c("del","del",
                       "dup","dup")
  colnames(res.df)[1] <- "test_type"
  res.df$value <- res.df$n_obs

  # combine permutations into one df
  perm.df <- data.frame(cnv_type=character(),
                        test_type=character(),
                        value=numeric())
  perm.df <- rbind(perm.df,
                   data.frame(cnv_type=rep("del",nrow(del.perm.df)),
                              test_type=rep("n_loci_ca1_co0",nrow(del.perm.df)),
                              value=del.perm.df$n_loci_ca1_co0))
  perm.df <- rbind(perm.df,
                   data.frame(cnv_type=rep("del",nrow(del.perm.df)),
                              test_type=rep("n_loci_cafrq_gt_cofrq",nrow(del.perm.df)),
                              value=del.perm.df$n_loci_cafrq_gt_cofrq))
  perm.df <- rbind(perm.df,
                   data.frame(cnv_type=rep("dup",nrow(dup.perm.df)),
                              test_type=rep("n_loci_ca1_co0",nrow(dup.perm.df)),
                              value=dup.perm.df$n_loci_ca1_co0))
  perm.df <- rbind(perm.df,
                   data.frame(cnv_type=rep("dup",nrow(dup.perm.df)),
                              test_type=rep("n_loci_cafrq_gt_cofrq",nrow(dup.perm.df)),
                              value=dup.perm.df$n_loci_cafrq_gt_cofrq))
  
  # make sure factors are properly assigned
  perm.df$cnv_type <- factor(perm.df$cnv_type,
                             levels=c("del","dup"))
  perm.df$test_type <- factor(perm.df$test_type,
                              levels=unique(sort(perm.df$test_type)))
  res.df$cnv_type <- factor(res.df$cnv_type,
                            levels=c("del","dup"))
  res.df$test_type <- factor(res.df$test_type,
                             levels=unique(sort(res.df$test_type)))

  # add mean value column to res df
  res.df$value_mean <- rep(NA, nrow(res.df))
  for (i in rownames(res.df)) {
    cnv_type.i <- res.df[i,"cnv_type"] 
    test_type.i <- res.df[i,"test_type"]
    values.i <- subset(perm.df, 
                       (cnv_type == cnv_type.i) & 
                       (test_type == test_type.i)
                      )$value
    res.df[i, "value_mean"] <- mean(values.i)
  }

  # add a column with formatted pval string
  res.df$perm_p_fixed <- PvalFix(res.df$perm_p)

  # make violin plot with distributions derived from permutation
  dodge <- position_dodge(PD)
  dodge_p <- position_dodge(PD + PD_P)
  ggplot(perm.df, aes(x=value, y=test_type)) + 
  geom_violin(aes(fill=cnv_type), position=dodge) + 
  stat_summary(data=res.df, 
               fun='mean',
               geom='crossbar',
               width=0.75,
               colour='black',
               position=dodge,
               mapping=aes(x=value, y=test_type, group=cnv_type)
              )+
  geom_text(data=res.df,
            aes(x=value_mean, group=cnv_type, label=perm_p_fixed),
            position=dodge_p, angle=0, size=3) +
  scale_fill_manual(values=DELDUP_COLORS) +
  xlab("n test loci") +
  theme(axis.title.y=element_blank()
       ) +
  scale_y_discrete(labels=c("1 carrier case,\n0 carrier controls",
                            expression(">=2 carriers,\ncase freq > ctrl freq"))
                  )
  
  ggsave(out.pdf, width=6.5, height=4)
  # width=13, height=3
}

# fix pval cols
PvalFix <- function(pvals, pvals_digits=2) {
  pvals.x <- ifelse(pvals <= 1,
                    paste0("p = ", 
                           formatC(pvals, format='e', digits=pvals_digits)),
                    "")
  return(pvals.x)
}


if (interactive() == F) {
  Main()
}
