

Main <- function() {
  ARGS <- commandArgs(trailingOnly=T) 
  if (length(ARGS) != 6) {
    cat("chrX_analysis.R <caco.metrics.tsv> <caco.male.PCA.eigenvec> <caco.female.PCA.eigenvec> ",
        "<chrX.male.bed> <chrX.female.bed> <out.tsv>\n")
    q()
  }
  caco.metrics.tsv <- ARGS[1]
  caco.male.PCA.eigenvec <- ARGS[2]
  caco.female.PCA.eigenvec <- ARGS[3]
  chrX.male.bed <- ARGS[4]
  chrX.female.bed <- ARGS[5]
  out.tsv <- ARGS[6]

  # read metrics, split into male-only and female-only
  df <- read.table(caco.metrics.tsv, header=T, stringsAsFactors=F)
  df.m <- subset(df, FEMALE==0)
  df.f <- subset(df, FEMALE==1)
  
  # overwrite eigenvectors in files with sex-specific ones
  for (i in 1:20) {
    pc_i <- paste0("PC",i)
    df.m[[pc_i]] <- NULL
    df.f[[pc_i]] <- NULL
  }
  evec.m <- read.table(caco.male.PCA.eigenvec, header=F, stringsAsFactors=F)
  evec.f <- read.table(caco.female.PCA.eigenvec, header=F, stringsAsFactors=F)
  cols <- c("FID","IID",paste0("PC",1:20))
  colnames(evec.m) <- cols
  colnames(evec.f) <- cols
  evec.m$FID <-	NULL
  evec.f$FID <-	NULL
  df.m <- merge(df.m, evec.m, by="IID")
  df.f <- merge(df.f, evec.f, by="IID")
  rownames(df.m) <- df.m$IID
  rownames(df.f) <- df.f$IID

  # read bed files
  cols <- c("chrom","start0","end","locus","copynumber","IID")
  bed.m <- read.table(chrX.male.bed, header=F, stringsAsFactors=F)
  bed.f <- read.table(chrX.female.bed, header=F, stringsAsFactors=F)
  colnames(bed.m) <- cols
  colnames(bed.f) <- cols

  # cnvs >1MB only
  bed.m <- subset(bed.m, (end-start0) > 1000000)
  bed.f <- subset(bed.f, (end-start0) > 1000000)

  # get number of deletions hitting each sample
  bed.m.c <- table(bed.m$IID)
  bed.f.c <- table(bed.f$IID)
  df.m$n_cnv_gt_1mb_chrX <- rep(0, nrow(df.m))  
  df.f$n_cnv_gt_1mb_chrX <- rep(0, nrow(df.f))
  for (iid in names(bed.m.c)) {
    df.m[iid, "n_cnv_gt_1mb_chrX"] <- bed.m.c[[iid]]
  }
  for (iid in names(bed.f.c)) {
    df.f[iid, "n_cnv_gt_1mb_chrX"] <- bed.f.c[[iid]]
  }

  # init output df
  out_df <- data.frame(group=character(),
                       category=character(),
                       lg_est=numeric(),
                       lg_p=numeric())

  # use same covariates as before : PCs 1-4, n_cnv_0
  covars <- c("PC1","PC2","PC3","PC4","n_cnv_0")
  covars_str <- paste(covars, collapse="+")
  mdl_str <- paste0("CASE~",covars_str,"+n_cnv_gt_1mb_chrX")
  res.f <- summary(glm(as.formula(mdl_str), data=df.f, family=binomial))
  out_df <- rbind(out_df,
                  data.frame(group='female_only',
                             category='n_cnv_gt_1mb_chrX',
                             lg_est=exp(res.f$coefficients['n_cnv_gt_1mb_chrX',1]),
                             lg_p=res.f$coefficients['n_cnv_gt_1mb_chrX',4])
                 )
  mdl_str <- paste0("CASE~",covars_str,"+n_cnv_gt_1mb_chrX")
  res.m <- summary(glm(as.formula(mdl_str), data=df.m, family=binomial))
  out_df <- rbind(out_df,
                  data.frame(group='male_only',
                             category='n_cnv_gt_1mb_chrX',
                             lg_est=exp(res.m$coefficients['n_cnv_gt_1mb_chrX',1]),
                             lg_p=res.m$coefficients['n_cnv_gt_1mb_chrX',4])
                 )

  # write results to file
  write.table(out_df,
              file=out.tsv,
              row.names=F, col.names=T, sep="\t", quote=F)
}

if (interactive() == F) {
  Main()
}
