#!/usr/local/bin/Rscript

## LIBRARY
library(optparse)
library(ggplot2)
library(gridExtra)

Main <- function() {

  # Get command-line arguments & options
  option_list <- list(
    make_option(c("--first-n-pcs"), type="numeric", default=8,
                help="first n pcs to plot [default %default]", metavar="numeric"),
    make_option(c("--fam-file"), type="character", default=NA,
                help="PLINK fam file with case/control defs [default %default]",
                metavar="character"),
    make_option(c("--ca-color"), type="character", default="#D55E00",
                metavar="character",
                help="color for case [default %default]"),
    make_option(c("--co-color"), type="character", default="#56B4E9",
                metavar="character",
                help="color for ctrl [default %default]")

  )
  args <- parse_args(OptionParser(usage="%prog plink.eigenvec outroot",
                                  option_list=option_list),
                     positional_arguments=TRUE)
  opts <- args$options

  # print param to stdout
  cat("PARAMETERS : \n")
  for (opt_i in names(opts)) {
    if (opt_i != "help") {
      cat(opt_i, ":", opts[[opt_i]],"\n")
    }
  }

  # cancels if nargs != 2
  if (length(args$args) != 2) {
    cat("plot_PCA.R <plink.eigenvec>",
        "<outroot>\n")
    q()
  }

  # Writes args & opts to vars
  plink.eigenvec <- args$args[1]
  outroot <- args$args[2]
  
  # read input data (1000 genomes present)
  df<-read.table(plink.eigenvec,stringsAsFactors=F,header=F)
  cat("Total samples in eigenvec file :",
      nrow(df), "\n")
  colnames(df) <- c("FID","IID",paste0("PC",1:(ncol(df)-2)))
  
  # load case/control classifs if fam file provided
  if (is.na(opts$`fam-file`)==F) {
    fam <- read.table(opts$`fam-file`, stringsAsFactors=F)
    fam <- fam[,c(2,6)]
    colnames(fam) <- c("IID","PHE")
    df <- merge(fam, df, by="IID")
    df$PHE <- ifelse(df$PHE != 2 & df$PHE != 1, "undefined", df$PHE)
    df$PHE <- ifelse(df$PHE == 2, "OCD", df$PHE)
    df$PHE <- ifelse(df$PHE == 1, "CTRL", df$PHE)
    df$PHE <- factor(df$PHE, levels=c("OCD","CTRL"))
    write.table(df, 
                file=paste0(outroot, ".eigenvec_phe.tsv"),
                row.names=F, col.names=T,
                sep="\t", quote=F)
  } else {
    df$PHE <- rep("undefined", nrow(df))
  }

  # define PCs to use
  pcs <- paste0("PC",1:opts$`first-n-pcs`)

  # plot PCs 1-2 with ggplot2 (classifications)
  colors <- c("OCD" = opts$`ca-color`, "CTRL" = opts$`co-color`)
  gg1 <- ggplot(df, aes(x=PC1, y=PC2, fill=PHE, color=PHE))
  gg1 <- gg1 + geom_point(shape='.', alpha=0.5, show.legend=F)
  gg1 <- gg1 + scale_colour_manual(values=colors)
  gg2 <- ggplot(df, aes(x=PC3, y=PC4, fill=PHE, color=PHE))
  gg2 <- gg2 + geom_point(shape='.', alpha=0.5, show.legend=F)
  gg2 <- gg2 + scale_colour_manual(values=colors)
  gg3 <- ggplot(df, aes(x=PC5, y=PC6, fill=PHE, color=PHE))
  gg3 <- gg3 + geom_point(shape='.', alpha=0.5, show.legend=F)
  gg3 <- gg3 + scale_colour_manual(values=colors)
  gg4 <- ggplot(df, aes(x=PC7, y=PC8, fill=PHE, color=PHE))
  gg4 <- gg4 + geom_point(shape='.', alpha=0.5, show.legend=F)
  gg4 <- gg4 + scale_colour_manual(values=colors)
  pdf(paste0(outroot,".PC1-",opts$`first-n-pcs`,".pdf"))
  grid.arrange(gg1, gg2, gg3, gg4, ncol=2, nrow=2)
  dev.off()

  q()

}

if (interactive() == F) {
  Main()
}

