#setwd('/pub1/data/mg_projects/projects/web_script/R/')
library(optparse)
option_list <- list(
  make_option(c("-i", "--infile"), type = "character", default = '/pub1/data/mg_projects/projects/web_script/tool_runing/8bd5d9dd2ec97a191034afa8256e06d2/input.json',
              action = "store", help = "Input a exp file path!"
  ),
  make_option(c("-o", "--outfile"), type = "character", default = '/pub1/data/mg_projects/projects/web_script/web_file_catche/runing/dae42f7681eca28355c6ba9105a3b5ff',
              action = "store", help = "Input a outfolder path!"
  )
)
logs=c()
tryCatch({
  Args <- commandArgs()
  opt = parse_args(OptionParser(option_list = option_list, usage = "GEO Data press"))
  logs=c(logs,paste0('run knn_norm.R-',basename(opt$outfile)))
  #logs=c(logs,paste0('geting data:',paste0(paste0(names(opt),'=',opt),collapse = ',')))
  #library("rjson")
  library(jsonlite)
  data<-jsonlite::stream_in(file(opt$infile),pagesize = 100)
  exp_path=unlist(data$exp_path)
  mutiRowName=unlist(data$mutiRowName)#'',mean,max,min,med
  if(mutiRowName==''){ 
    mutiRowName='med'
  }
  isKNN=as.numeric(unlist(data$isKNN))#0,1=KNN
  cutRow_P=as.numeric(unlist(data$cutRow_P))#0-100
  cutRow_Z=as.numeric(unlist(data$cutRow_Z))#0,1=NA
  cutCol_P=as.numeric(unlist(data$cutCol_P))#0-100
  cutCol_Z=as.numeric(unlist(data$cutCol_Z))#0,1=NA
  normMethod=unlist(data$normMethod)#Q=quan,L=log2,X=log2X+1,ZC=col zscore,ZR=row zscore,Z=col and row zscore,N= none
  #compare=rep(0,length(types))
  
  dat=data.table::fread(exp_path, sep = "\t",header = T,stringsAsFactors = F,check.names = F
                        ,na.strings="NA",data.table = F)
  #head(dat)
  rNames=dat[,1]
  dat=dat[,-1]
  logs=c(logs,paste0('read data row:',nrow(dat),',col:',ncol(dat)))
  c_cnt=apply(dat, 2, function(x){
    if(cutCol_Z==1) return (sum(is.na(x))/nrow(dat))
    else return(sum(x==0)/nrow(dat))
  })
  c_cnt_sd=apply(dat, 2, function(x){
    return(sd(x,na.rm = T))
  })
  c_inds=which(c_cnt<cutCol_P/100&c_cnt_sd>0)
  logs=c(logs,paste0('Delete column:',(ncol(dat)-length(c_inds)),'/',ncol(dat)))
  if(length(c_inds)>1){
    dat=dat[,c_inds]
    #head(dat)
    r_cnt=apply(dat, 1, function(x){
      if(cutRow_Z==1) return (sum(is.na(x))/ncol(dat))
      else return(sum(x==0)/ncol(dat))
    })
    r_cnt_sd=apply(dat, 1, function(x){
      return(sd(x,na.rm = T))
    })
    r_inds=which(r_cnt<cutRow_P/100&r_cnt_sd>0)
    logs=c(logs,paste0('Delete row:',(nrow(dat)-length(r_inds)),'/',nrow(dat)))
    if(length(r_inds)>1){
      dat1=dat[r_inds,]
      rNames1=rNames[r_inds]
      row.names(dat1)=paste0('R',1:nrow(dat1))
      if(isKNN>0){
        logs=c(logs,paste0('start KNN K=',isKNN))
        knn=impute::impute.knn(as.matrix(dat1) ,k = isKNN, rowmax = cutRow_P/100, colmax = cutCol_P/100,rng.seed=362436069)
        ndat=knn$data
        logs=c(logs,paste0('filter KNN column:',(ncol(dat1)-ncol(ndat)),',row:',(nrow(dat1)-nrow(ndat))))
        ndat.rnames=rNames1[match(row.names(ndat),row.names(dat1))]
        rNames1=ndat.rnames
        dat1=ndat
      }
      if(normMethod=='Q'){#Q=quan,L=log2,X=log2X+1,ZC=col zscore,ZR=row zscore,Z=col and row zscore,N= none
        #compare=rep(0,length(types))
        dat1=limma::normalizeQuantiles(dat1)
        logs=c(logs,paste0('normalize by normalizeQuantiles'))
      }else if(normMethod=='M'){
        dat1=limma::normalizeMedianAbsValues(dat1)
        logs=c(logs,paste0('normalize by normalizeMedianAbsValues'))
      }else if(normMethod=='X'){
        dat1=log2(dat1+1)
        logs=c(logs,paste0('normalize by log2(X+1)'))
      }else if(normMethod=='L'){
        dat1=log2(dat1)
        logs=c(logs,paste0('normalize by log2'))
      }else if(normMethod=='ZC'){
        dat1=scale(dat1)
        logs=c(logs,paste0('normalize by column z-score'))
      }else if(normMethod=='ZR'){
        dat1=t(scale(t(dat1)))
        logs=c(logs,paste0('normalize by row z-score'))
      }else if(normMethod=='Z'){
        dat1=t(scale(t(scale(dat1))))
        logs=c(logs,paste0('normalize by column z-score and row z-score'))
      }
      if(mutiRowName==''){
        write.table(cbind(Tag=rNames1,dat1)
                    ,file = paste0(opt$outfile,'/normalize.txt')
                    ,row.names = F,col.names = T,quote = F,sep = '\t')
        logs=c(logs,paste0('output row:',nrow(dat1),',col:',ncol(dat1)))
      }else{
        rns=table(rNames1)
        muti=names(rns)[which(rns>1)]
        sigO=names(rns)[which(rns==1)]
        dat2=rbind()
        dat2.name=unique(muti)
        logs=c(logs,paste0('merge muti row name:',length(dat2.name),' by ',mutiRowName))
        if(length(dat2.name)>0){
        for(g in dat2.name){
          dt=apply(dat1[which(rNames1%in%g),],2,function(x){#mean,max,min,med
            if(mutiRowName=='mean') return(mean(x,na.rm = T))
            else if(mutiRowName=='med') return(median(x,na.rm = T))
            else if(mutiRowName=='max') return(max(x,na.rm = T))
            else return(min(x,na.rm = T))
          })
          dat2=rbind(dat2,dt)
        }
        colnames(dat2)=colnames(dat1)
        dat3=rbind(dat2,dat1[match(sigO,rNames1),])
        }else{
          dat3=dat1[match(sigO,rNames1),]
        }
        write.table(cbind(Tag=c(dat2.name,sigO),dat3)
                    ,file = paste0(opt$outfile,'/normalize.txt')
                    ,row.names = F,col.names = T,quote = F,sep = '\t')
        logs=c(logs,paste0('output row:',nrow(dat3),',col:',ncol(dat3)))
      } 
    }else{
      logs=c(logs,paste0('Too few rows:<2!'))
    }
  }else{
    logs=c(logs,paste0('Too few columns:<2!'))
  }
},error = function(e) {
  print(conditionMessage(e))
  logs=c(logs,paste0('error:',conditionMessage(e)))
}, finally = {
  write.table(logs,file = paste0(opt$outfile,'/run.log'),quote = F,row.names = T,col.names = T,sep = '\t')
})


