rm(list=ls())
library(tidyverse)
library(maftools)
setwd("D:/R_do2/SingleB/8_mutation/")   #设置工作路径
maf <- read_tsv("data_mutations.txt", comment = "#") 

dat <- data.table::fread("TCGA_tpm_mRNA_clini.txt",data.table = F)

rt <-  distinct(dat, patient_ID,.keep_all = TRUE) %>% 
  column_to_rownames("patient_ID") %>% select("EPAS1")

rownames(rt) <- paste0(rownames(rt),"-01")
tumsam <- rownames(rt) 

comsam <- intersect(tumsam,unique(maf$Tumor_Sample_Barcode)) # 取出共同的样本

label <- c("Tumor_Sample_Barcode",
           "Hugo_Symbol",
           "Chromosome",
           "Start_Position",
           "End_Position",
           "Variant_Classification",
           "Variant_Type",
           "Reference_Allele",
           "Tumor_Seq_Allele1",
           "Tumor_Seq_Allele2")

maf <- maf[which(maf$Tumor_Sample_Barcode %in% comsam),label] # 取出样本和列子集
write.table(maf,"data_mutations1.txt",sep = "\t",row.names = F,col.names = T,quote = F) # 输出文件

rt <- rt[comsam,,drop=F] #调整
rt$group <- ifelse(rt$EPAS1> median(rt$EPAS1),"high","low")

#分为高低组
hsam <- rownames(rt[which(rt$group == "high"),])
lsam <- rownames(rt[which(rt$group == "low"),])

maf.high <- maf[which(maf$Tumor_Sample_Barcode %in% intersect(comsam,hsam)),] # 取出High-EPAS1的MAF数据
maf.low <- maf[which(maf$Tumor_Sample_Barcode %in% intersect(comsam,lsam)),] # 取出Low-EPAS1 的MAF数据

write.table(maf.high,"data_mutations_extended_H_GENE1_modified.txt",sep = "\t",row.names = F,col.names = T,quote = F)
write.table(maf.low,"data_mutations_extended_L_GENE1_modified.txt",sep = "\t",row.names = F,col.names = T,quote = F)

maf1 <- read.maf("data_mutations_extended_H_GENE1_modified.txt",
                 removeDuplicatedVariants = F)
library(ggsci)
col1 = c(pal_d3("category20")(15))

col1
names(col1) = c('Frame_Shift_Del','Missense_Mutation', 'Nonsense_Mutation', 'Frame_Shift_Ins','In_Frame_Ins', 'Splice_Site', 'In_Frame_Del','Nonstop_Mutation','Translation_Start_Site','Multi_Hit')

oncostrip(maf = maf1, colors = col1, top =15)

maf2 <- read.maf("data_mutations_extended_L_GENE1_modified.txt",
                 removeDuplicatedVariants = F)

oncostrip(maf = maf2, colors = col1, top =15)


maf <- read.maf("data_mutations1.txt",
                 removeDuplicatedVariants = F)

TMB = tmb(maf = maf,
          captureSize = 50,
          logScale = TRUE)   #对结果进行对数转换
write.csv(TMB,file = "TMB_STAD.csv",row.names = F)
colnames(TMB)

TMB <- TMB %>% column_to_rownames("Tumor_Sample_Barcode")
rt <- rt[rownames(TMB),]
identical(rownames(TMB),rownames(rt))

TMB2 <- cbind(rt,TMB)

library(ggplot2)
library(cowplot)
library(ggpubr)
ggplot(data=TMB2,aes(x=group,
                   y=total_perMB_log,
                   fill=group))+
  geom_violin(alpha=0.5,width=0.9,
              position=position_dodge(width=0.8),
              size=0.2)+
  geom_boxplot(alpha=1,width=0.3,
               position=position_dodge(width=0.8),
               size=0.5,outlier.colour = NA)+
  theme_cowplot() +
  scale_fill_manual(values=c("#6387BB","#DA635C"))+
  theme(legend.position="none") +
  stat_compare_means()

#吉克学长
