#b站 公众号 吉克学长 
rm(list = ls())
setwd("D:/R_do2/SingleB/1_Single_data/")   #设置工作路径
#装包问题看基础课
library(Seurat) #v4
library(tidyverse)

#GSE167297 
dat1 <- read.table("GSM5101013_Pt1_Normal_CountMatrix.txt.gz")
dat2 <- read.table("GSM5101014_Pt1_Superficial_CountMatrix.txt.gz")
dat3 <- read.table("GSM5101015_Pt1_Deep_CountMatrix.txt.gz")

dat4 <- read.table("GSM5101018_Pt3_Normal_CountMatrix.txt.gz")
dat5 <- read.table("GSM5101019_Pt3_Superficial_CountMatrix.txt.gz")
dat6 <- read.table("GSM5101020_Pt3_Deep_CountMatrix.txt.gz")

dat7 <- read.table("GSM5101021_Pt4_Normal_CountMatrix.txt.gz")
dat8 <- read.table("GSM5101022_Pt4_Superficial_CountMatrix.txt.gz")
dat9 <- read.table("GSM5101023_Pt4_Deep_CountMatrix.txt.gz")


scRNA1 <- CreateSeuratObject(counts = dat1,project = "GSM5101013")
scRNA2 <- CreateSeuratObject(counts = dat2,project = "GSM5101014")
scRNA3 <- CreateSeuratObject(counts = dat3,project = "GSM5101015")

scRNA4 <- CreateSeuratObject(counts = dat4,project = "GSM5101018")
scRNA5 <- CreateSeuratObject(counts = dat5,project = "GSM5101019")
scRNA6 <- CreateSeuratObject(counts = dat6,project = "GSM5101020")

scRNA7 <- CreateSeuratObject(counts = dat7,project = "GSM5101021")
scRNA8 <- CreateSeuratObject(counts = dat8,project = "GSM5101022")
scRNA9 <- CreateSeuratObject(counts = dat9,project = "GSM5101023")

scRNA <- merge(scRNA1,c(scRNA2,scRNA3,scRNA4,scRNA5,scRNA6,scRNA7,scRNA8,scRNA9))
scRNA@meta.data$group <- ifelse(scRNA@meta.data$orig.ident %in% c("GSM5101013","GSM5101018","GSM5101021"),"Control","Tumor")


#第一步 数据过滤
# 计算线粒体、血红蛋白基因比例
scRNA <- PercentageFeatureSet(scRNA,
                              pattern = "^MT-", 
                              col.name = "pMT")

scRNA <- PercentageFeatureSet(scRNA,
                              pattern = "^HBA|^HBB",
                              col.name = "pHB")

# 画图
VlnPlot(scRNA, 
        features = c("nCount_RNA", "nFeature_RNA", "pMT", "pHB"), 
        log = T,
        pt.size = 0)

# 设置参数
#自行调整 没有明确标准
nFeature_lower <- 200
nFeature_upper <- 5000 
nCount_lower <- 200
nCount_upper <- 30000
pMT1 <- 30 #重要参数
pHB1 <- 5


# 过滤
scRNA <- subset(scRNA,
                subset = nCount_RNA > nCount_lower & 
                  nCount_RNA < nCount_upper & 
                  nFeature_RNA > nFeature_lower &
                  nFeature_RNA < nFeature_upper &
                  pMT < pMT1&
                  pHB < pHB1)

VlnPlot(scRNA, 
        features = c("nCount_RNA", "nFeature_RNA", "pMT", "pHB"), 
        log = T,
        pt.size = 0)

# 第二步 标准化
scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)

#第三步  降维聚类 
# 高变基因
scRNA <- FindVariableFeatures(scRNA, selection.method = "vst", nfeatures = 2000) 
scale.genes <-  VariableFeatures(scRNA)
scRNA <- ScaleData(scRNA, features = scale.genes)

scRNA <- RunPCA(scRNA, features = VariableFeatures(scRNA)) 
DimPlot(scRNA, reduction = "pca", group.by="orig.ident") 

#第四步  去批次
#不去批次的流程参考 
#【单细胞专题 3降维聚类细胞注释】 
#https://www.bilibili.com/video/BV1ew41127x2/?share_source=copy_web&vd_source=8965da06b77f43cc5fe07b89415b1cbc

library(harmony)
scRNA <- RunHarmony(scRNA, group.by.vars = "orig.ident")
DimPlot(scRNA, reduction = "harmony", group.by = "orig.ident")

pc.num=1:15
scRNA <- FindNeighbors(scRNA, dims = pc.num, reduction = "harmony") 
scRNA <- FindClusters(scRNA,resolution = 1.2)

#UMAP可视化
scRNA <- RunUMAP(scRNA, dims = pc.num, reduction = "harmony")
DimPlot(scRNA, reduction = "umap",label=T,group.by = "seurat_clusters") 
DimPlot(scRNA, reduction = "umap",label=T,group.by = "orig.ident") 

save(scRNA,file = "sc_pre.rda")
rm(list = ls())
load("sc_pre.rda")

#第五步细胞注释 手动注释法 

cell_mark <- c("CD3D", "CD3E",#T_cells
               "NKG7",#NK_cells
               "BANK1","CD79A", #B_cells
               "MZB1", "IGHG1",#Plasma_cells
               "LYZ","MS4A6A", #Myeloid_cells
               "GATA2","KIT",#Mast cells
               "MYLK","ACTA2",#Smooth muscle            
               "VWF","CDH5", #Endothelial_cells
               "PDGFRB" ,"LUM",#Fibroblasts
               "EPCAM","KRT19",# Epithelial_cells
               'MKI67','TOP2A' #Cycling_cells 
)

plot2 <- DotPlot(scRNA, features = cell_mark,group.by = "seurat_clusters") + coord_flip()
plot2

cluster_celltype <- c("0"="B cells",
                      "1"="B cells",
                      "2"="T/NK cells", 
                      "3"= "T/NK cells", 
                      "4"= "Plasma cells", 
                      "5"= "T/NK cells",
                      "6"= "T/NK cells", 
                      "7"= "T/NK cells", 
                      "8"= "Endothelial cells",
                      "9"= "Myeloid cells",
                      "10"="Epithelial cells",
                      "11"="Epithelial cells",
                      "12"="Myeloid cells",
                      "13"="Fibroblasts",
                      "14"="Myeloid cells",
                      "15"= "Mast cells", 
                      "16"= "Plasma cells",
                      "17"="Plasma cells",
                      "18"="Cycling cells",
                      "19"="Epithelial cells",
                      "20"="Smooth muscle cells")

scRNA@meta.data$cell_type = unname(cluster_celltype[scRNA@meta.data$seurat_clusters])

DimPlot(scRNA, reduction = 'umap', 
        label = T, group.by = "cell_type")
Idents(scRNA)=scRNA@meta.data$cell_type

save(scRNA,file = "sc_anno.rda")

#先保存结果，后面统一优化可视化
#b站 公众号 吉克学长
rm(list = ls())
setwd("D:/R_do2/SingleB/1_Single_data/")   #设置工作路径
load("sc_anno.rda")
library(tidyverse)
library(Seurat)
library(ggsci)
library(ggplot2)

col1 = c(pal_d3("category20")(15))
col1

#
VlnPlot(scRNA, 
        features = c("nCount_RNA"), 
        log = T,cols = col1,
        pt.size = 0)

VlnPlot(scRNA, 
        features = c("nFeature_RNA"), 
        log = T,cols = col1,
        pt.size = 0)

VlnPlot(scRNA, 
        features = c("pMT"), 
        log = T,cols = col1,
        pt.size = 0)

cell_mark <- c("CD3D", "CD3E",#T_cells
               "NKG7",#NK_cells
               "BANK1","CD79A", #B_cells
               "MZB1", "IGHG1",#Plasma_cells
               "LYZ","MS4A6A", #Myeloid_cells
               "GATA2","KIT",#Mast cells
               "MYLK","ACTA2",#Smooth muscle (SM)               
               "VWF","CDH5", #Endothelial_cells
               "PDGFRB" ,"LUM",#Fibroblasts
               "EPCAM","KRT19",# Epithelial_cells
               'MKI67','TOP2A' #Cycling_cell 
)

#marker 展示 
DotPlot(scRNA, features = cell_mark,group.by = "seurat_clusters",
        cols = c("grey90","#D62728FF")) + coord_flip()

FeaturePlot(scRNA,features = "CD3E",
            label = F,
            cols = c("lightgray", "#D62728FF"),
            reduction = 'umap')

#比例展示 
Cellratio <- prop.table(table(scRNA$orig.ident, scRNA$cell_type), margin = 2)#计算各组样本不同细胞群比例
Cellratio

Cellratio <- as.data.frame(Cellratio)

ggplot(Cellratio) + 
  geom_bar(aes(x =Var2, y= Freq, fill = Var1),stat = "identity",width = 0.8)+ 
  scale_fill_manual(values = col1)+theme_bw()+
  labs(x='Cell type',y = 'Ratio')+ coord_flip() +
  theme(legend.title =element_blank(),
        axis.text.y = element_text(size=12, colour = "black"),
        axis.text.x = element_text(size=12, colour = "black"))


names <- table(scRNA$cell_type) %>% names()
ratio <- table(scRNA$cell_type) %>% as.numeric()
pielabel <- paste0(names," (", round(ratio/sum(ratio)*100,2), "%)")

 pie(ratio, 
          labels=pielabel,
          radius = 1.0,clockwise=T,
          main = "cell_type",
          border = "white", 
          col = col1)



library(ggthemes)
library(tidydr)
umap = scRNA@reductions$umap@cell.embeddings %>% 
  as.data.frame() %>%
  cbind(cell_type = scRNA@meta.data$cell_type) # 获取细胞坐标
df.m=umap[,c(1:3)] 
df.m <- df.m %>%
  group_by(cell_type) %>%
  summarise(UMAP_1 = median(UMAP_1),UMAP_2 = median(UMAP_2))#获取文本注释坐标

ggplot(umap, aes(x= UMAP_1 , y = UMAP_2 ,col = cell_type))+
  geom_point(size = 1,shape=16) +
  scale_color_manual(values = col1)+ 
  theme(panel.grid =element_blank(),
        panel.border=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.x=element_blank())+
  labs(x='UMAP1', y='UMAP2')+
  ggrepel::geom_label_repel(aes(label=cell_type),
                            data=df.m,
                            size=4,
                            label.size=1,
                            segment.color=NA
  )+ theme_dr()+theme(legend.position="none")

#b站 吉克学长
