前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >单细胞——从降维聚类分群、细胞命名、到批量富集分析,一文打通GSE104154博来霉素小鼠模型单细胞数据

单细胞——从降维聚类分群、细胞命名、到批量富集分析,一文打通GSE104154博来霉素小鼠模型单细胞数据

作者头像
生信菜鸟团
发布2023-09-09 16:52:58
2.1K0
发布2023-09-09 16:52:58
举报
文章被收录于专栏:生信菜鸟团生信菜鸟团

1.降维聚类分群

第一部分的正文内容从这里开始。


首先从geo下载数据,并解压拿到rawdata

代码语言:javascript
复制
#设置工作路径
path="/home/data/t040413/ipf/GSE104154_scRNA-seq_fibrotic MC_bleomycin"
dir.create(path)
setwd(path)
getwd()
list.files()
代码语言:javascript
复制

#read表达矩阵
raw_counts=read.csv("/home/data/t040413/ipf/GSE104154_scRNA-seq_fibrotic MC_bleomycin/GSE104154_d0_d21_sma_tm_Expr_raw/GSE104154_d0_d21_sma_tm_Expr_raw.csv"
                      )
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-1]
head(counts)[1:4,1:4]
rownames(counts)=counts$symbol

head(raw_counts)[1:4,1:4]

注意:这个数据使用了ensemble id作为基因名,后续需要换为gene symbol

代码语言:javascript
复制
head(raw_counts)[1:4,1:4]
counts=raw_counts[,-2]
head(counts)[1:4,1:4]
rownames(counts)=counts$id
head(counts)[,1:5]
counts=counts[,-1]
head(counts)[,1:5]

创建seurat对象

代码语言:javascript
复制
library(Seurat)
#https://zhuanlan.zhihu.com/p/385206713
rawdata=CreateSeuratObject(counts = counts,project = "blem",assay = "RNA")

这里的seurat对象行名是ensemble id,改为gene symbol 比较容易看,如何改呢?

代码语言:javascript
复制
#去重  取得唯一基因
ids=raw_counts[,1:2]
head(ids)
colnames(ids)= c('ENSEMBL','SYMBOL')
head(ids)
dim(ids) # [1] 16428 
ids=na.omit(ids)
dim(ids) # [1] 15504 
length(unique(ids$SYMBOL)) # [1] 15494 
# 这里的关系超级乱,互相之间都不是一对一 
# 凡是混乱的ID一律删除即???
ids=ids[!duplicated(ids$SYMBOL),]
ids=ids[!duplicated(ids$ENSEMBL),]
dim(ids)
pos=match(ids$ENSEMBL,rownames(rawdata) )
hp_sce=rawdata[pos,]
hp_sce

把seurat对象中的ensemble id 改为gene symble

代码语言:javascript
复制
#创建函数 改名
RenameGenesSeurat <- function(obj , 
                              newnames ) { 
  # Replace gene names in different slots of a Seurat object. Run this before integration. Run this before integration. 
  # It only changes obj@assays$RNA@counts, @data and @scale.data.
  print("Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.")
  RNA <- obj@assays$RNA
 
  if (nrow(RNA) == length(newnames)) {
    if (length(RNA@counts)) RNA@counts@Dimnames[[1]]            <- newnames
    if (length(RNA@data)) RNA@data@Dimnames[[1]]                <- newnames
    if (length(RNA@scale.data)) RNA@scale.data@Dimnames[[1]]    <- newnames
  } else {"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
  obj@assays$RNA <- RNA
  return(obj)
}

hp_sce=RenameGenesSeurat(obj = hp_sce, 
                      newnames = ids$SYMBOL)
getwd()
#save(hp_sce,file = 'first_sce.Rdata')
hp_sce

改名成功

代码语言:javascript
复制
rownames(hp_sce)[grepl('^mt-',rownames(hp_sce))]
代码语言:javascript
复制
rownames(hp_sce)[grepl('^Rp[sl]',rownames(hp_sce))]
hp_sce[["percent.mt"]] <- PercentageFeatureSet(hp_sce, pattern = "^mt-")
fivenum(hp_sce[["percent.mt"]][,1])
rb.genes <- rownames(hp_sce)[grep("^Rp[sl]",rownames(hp_sce))]
C<-GetAssayData(object = hp_sce, slot = "counts")
percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100
hp_sce <- AddMetaData(hp_sce, percent.ribo, col.name = "percent.ribo")

getwd()
plot1 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))
代码语言:javascript
复制
查看质控
rownames(hp_sce)[grepl('^mt-',rownames(hp_sce))]
rownames(hp_sce)[grepl('^Rp[sl]',rownames(hp_sce))]
hp_sce[["percent.mt"]] <- PercentageFeatureSet(hp_sce, pattern = "^mt-")
fivenum(hp_sce[["percent.mt"]][,1])
rb.genes <- rownames(hp_sce)[grep("^Rp[sl]",rownames(hp_sce))]
C<-GetAssayData(object = hp_sce, slot = "counts")
percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100
hp_sce <- AddMetaData(hp_sce, percent.ribo, col.name = "percent.ribo")

getwd()
plot1 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(hp_sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
CombinePlots(plots = list(plot1, plot2))

VlnPlot(hp_sce, features = c("percent.ribo", "percent.mt"), ncol = 2)
VlnPlot(hp_sce, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
VlnPlot(hp_sce, features = c("percent.ribo", "nCount_RNA"), ncol = 2)
hp_sce
#为了演示celltypeist,把物种改为human的gene symbol
hp_sce=RenameGenesSeurat(obj = hp_sce, 
                            newnames = toupper(rownames(hp_sce)))
hp_sce1 <- subset(hp_sce, subset = nFeature_RNA > 200 & nCount_RNA > 1000 & percent.mt < 20)
hp_sce1

设置组别

代码语言:javascript
复制
sce=hp_sce1
sce
colnames(sce)
grep(colnames(sce),pattern = ".1")
grep(colnames(sce),pattern = ".2")
sce@meta.data$stim <-c(rep("PBS", length(grep("1$", sce@assays$RNA@counts@Dimnames[[2]]))), 
                       rep("PBS", length(grep("2$", sce@assays$RNA@counts@Dimnames[[2]]))),
                       rep("PBS", length(grep("3$", sce@assays$RNA@counts@Dimnames[[2]]))),
                       rep("Bleomycin", length(grep("4$", sce@assays$RNA@counts@Dimnames[[2]]))),
                       rep("Bleomycin", length(grep("5$", sce@assays$RNA@counts@Dimnames[[2]]))),
                       rep("Bleomycin", length(grep("6$", sce@assays$RNA@counts@Dimnames[[2]])))
                        ) ## 8186,7947;

table(sce$stim)

library(dplyr)
sce[["RNA"]]@meta.features <- data.frame(row.names = rownames(sce[["RNA"]]))

seurat标准流程 :

代码语言:javascript
复制
All = sce%>%Seurat::NormalizeData(verbose = FALSE) %>%  
  FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
  ScaleData(verbose = FALSE)
All = RunPCA(All, npcs = 50, verbose = FALSE)

pdf("2_ElbowPlot.pdf")
ElbowPlot(All, ndims = 50)
dev.off()

使用harmony降维聚类分群

代码语言:javascript
复制
library(cowplot)
#All@meta.data$stim <- c(rep("case", length(grep("1$", All@assays$RNA@counts@Dimnames[[2]]))), rep("ctrl", length(grep("2$", All@assays$RNA@counts@Dimnames[[2]])))) ## 8186,7947;
pdf("2_pre_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p1 <- DimPlot(object = All, reduction = "pca", pt.size = .1, group.by = "stim")
p2 <- VlnPlot(object = All, features = "PC_1", group.by = "stim", pt.size = .1)
plot_grid(p1, p2)
dev.off()


##########################run harmony
library(harmony)
All <- All %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(All, 'harmony') 

pdf("2_after_harmony_harmony_plot.pdf")
options(repr.plot.height = 5, repr.plot.width = 12)
p3 <- DimPlot(object = All, reduction = "harmony", pt.size = .1, group.by = "stim")
p4 <- VlnPlot(object = All, features = "harmony_1", group.by = "stim", pt.size = .1)
plot_grid(p3, p4)
dev.off()

#############cluster
library(harmony)
All <- All %>% 
  RunUMAP(reduction = "harmony", dims = 1:30) %>% 
  RunTSNE(reduction = "harmony", dims = 1:30) %>% 
  FindNeighbors(reduction = "harmony", dims = 1:30)
All<-All%>% FindClusters(resolution = 3) %>% identity()

保存 重新加载

代码语言:javascript
复制
#save(All,file ="/home/data/t040413/ipf/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds" )
load("/home/data/t040413/ipf/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds")

计算marker基因

代码语言:javascript
复制
library(Seurat)
DimPlot(All,label = T,reduction = 'tsne')
DimPlot(All,label = T,reduction = 'umap',group.by = 'stim')
getwd()
Disease.markers <- FindAllMarkers(All, min.pct = 0.35, logfc.threshold = 0.35, only.pos = T)
openxlsx::write.xlsx(Disease.markers,file ="G:/silicosis/geo/GSE104154_scRNA-seq_fibrotic MC_bleomycin/markers_for_all.xlsx" )

“ 分群之后,如何给细胞命名呢,可以使用celltypist自动命名,省去人工烦恼。”

02

2.分群之后,如何给细胞命名呢,可以使用celltypist自动命名,省去人工烦恼

此部分的正文内容从这里开始。

代码语言:javascript
复制
#https://mp.weixin.qq.com/s/CdhAp7lO5nRJW4cnLWGwLQ
.libPaths(c("/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2","/home/data/t040413/R/yll/usr/local/lib/R/site-library",  "/usr/local/lib/R/library"))
library(Seurat)
load("/home/data/t040413/ipf/GSE104154_scRNA-seq_fibrotic MC_bleomycin/All_for_clustering.rds")
toupper()

#为了演示celltypeist,把物种改为human的gene symbol
All.merge=All

DimPlot(All.merge,label = T)

#在r中创建如下环境========================================
#conda create -n celltypist python=3.8
#conda activate celltypist

## 两种方法安装 pip or conda
#pip install celltypist  -i  https://pypi.douban.com/simple/  --use-pep517
#mamba install -c bioconda -c conda-forge celltypist
#使用use_condaenv()函数仅在当前R会话中激活指定的Conda环境。如果您希望在多个会话中使用相同的Conda环境,需要在每个会话中都执行相应的use_condaenv()函数调用。
#如果您使用的是conda而不是mamba,请将上述的mamba命令替换为conda命令进行安装。
#use_condaenv("celltypist")  #如果您的Conda环境在默认路径中,您可以省略路径参数,仅提供环境的名称

library(reticulate)
use_condaenv("/home/data/t040413/anaconda3/envs/celltypist")#celltypist应该是环境的名称,而不是环境文件夹的名称。如果您的Conda环境文件夹名为celltypist,则上述代码应该可以正确激活环境。

## 载入python模块
scanpy = import("scanpy")
celltypist = import("celltypist")
pandas <- import("pandas")
numpy = import("numpy")

#### 下载参考数据集
celltypist$models$download_models(force_update = T) 

library(Seurat)
#library(SeuratData)
library(ggplot2)
library(patchwork)
library(dplyr)
library(stringr)
library(readr)
library(cowplot)
library(reticulate)
1#Step1. 加载上述下载好的参考数据集

# 首先确认用户存放.pkl文件的位置,默认在
celltypist$models$celltypist_path #   [1] "/home/data/t040413/.celltypist"

# 加载所有的参考数据集
model_type = list.files("~/.celltypist/data/models/") 
names(model_type) = str_split(string = model_type,pattern = "\\.", simplify = T)[,1]
model_type
model_type[grep(pattern = "Mouse",model_type)]
names(model_type) 

1.1#根据需要选择,,,,,这里我只选2个模型
model_type=model_type[c("Human_Lung_Atlas","Human_IPF_Lung")]
model_type

1.2#这个model_list非常重要,用于最后不同模型的可视化
model_list = lapply(model_type, function(x){
  celltypist$models$Model$load(model = x)})
model_list
2.#加载数据

#load("/home/data/t040413/ipf/gse132771_colgen_fibroblasts/step1_get_matrix_ipf/All.merge.rds")
ifnb.data=All.merge
seurat.data=All.merge


2.1 #Seurat对象转为celltypist所需要的对象:
adata = scanpy$AnnData(X = numpy$array(as.matrix(t(as.matrix(ifnb.data[['RNA']]@counts)))),
                       obs = pandas$DataFrame(ifnb.data@meta.data),
                       var = pandas$DataFrame(data.frame(gene = rownames(ifnb.data[['RNA']]@counts),
                                                         row.names = rownames(ifnb.data[['RNA']]@counts)))
)


adata_copy <- adata$copy() #in order  to edit array or otherwise Error: ValueError: output array is read-only
scanpy$pp$normalize_total(adata_copy, target_sum = 1e4)
scanpy$pp$log1p(adata_copy)
adata=adata_copy
3.# 细胞亚群预测和可视化
# Add cluster information to adata
adata$obs$seurat_clusters <- ifnb.data$seurat_clusters


# Update the neighborhood graph using cluster information
#scanpy::tl.louvain(adata, key_added = "seurat_clusters")

3.1#根据上面对各个参考数据集的介绍,我这里先用Human_IPF_Lung参考数据集预测看看:
### 1. Human_IPF_Lung
predictions = celltypist$annotate(adata, model = model_list[["Human_IPF_Lung"]], majority_voting = T)
## 把这些信息加入到seurat对象中去
seurat.data  = AddMetaData(seurat.data, predictions$predicted_labels$majority_voting, col.name ="Human_IPF_Lung") 
### 2. Human_Lung_Atlas
predictions = celltypist$annotate(adata, model = model_list[["Human_Lung_Atlas"]], majority_voting = T)
## 把这些信息加入到seurat对象中去
seurat.data  = AddMetaData(seurat.data, predictions$predicted_labels$majority_voting, col.name ="Human_Lung_Atlas") 

head(ifnb.data@meta.data)
head(predictions$predicted_labels)
table(predictions$predicted_labels$majority_voting)
table(predictions$predicted_labels$predicted_labels)

3.2### 3. 可视化
p0 = DimPlot(seurat.data , reduction = "umap",label = T,label.box = T,group.by = "seurat_clusters")+ NoLegend();p0
p1 = DimPlot(seurat.data ,group.by = "Human_IPF_Lung", reduction = "umap", label = TRUE) 
p2 = DimPlot(seurat.data ,group.by = "Human_Lung_Atlas", reduction = "umap", label = TRUE,repel = T) 
p0 + p2 

pdf("celltypist.pdf",height=10,width=15)
print(p0 + p2 )
dev.off()

head(seurat.data@meta.data)

4.# 写函数批量运行
celltypist_vis = function(adata,
                          seurat_Data,
                          model = Immune_All_Low.pkl,
                          title = "Immune_All_Low"){
  ### 4.开始预测
  predictions = celltypist$annotate(adata, model = model, majority_voting = T)
  # predictions$predicted_labels %>% head()
 
  #model=model_type[1]
  #### 5.把这些信息加入到seurat对象中去
  print(paste("运行到这里了__1:",names(model)))
  seurat_Data  = AddMetaData(seurat_Data , predictions$predicted_labels) 
 
  # model=model_list[[1]]
  # model$load(model = model)
  #names(model) 
  print(paste("运行到这里了__2:",names(model), paste(names(model),"_celltype_labels")))
  head(seurat_Data )
  
  seurat_Data  = SetIdent(seurat_Data , value = "predicted_labels") #"majority_voting"
  p.umap = DimPlot(seurat_Data, reduction = "umap", label = TRUE, pt.size = 0.5,label.box = T,repel = T) + 
    NoLegend() + ggtitle(title)
  return(p.umap)
}

### 批量预测
plot.list = list()
for (i in 1:length(model_list)) {
  plot.list[[i]] = celltypist_vis(adata = adata,
                                  seurat_Data = ifnb.data,  ###输入自己的
                                  model = model_list[[i]],
                                  title = names(model_list[i])  )  
  print(paste0("done==plot==",names(model_list)))
 
}  

p.ct = wrap_plots(plot.list,ncol = 4) + p0
p.ct
ggsave(p.ct, filename = "./annotation_celltypist---.jpeg",limitsize = FALSE,
       width = 60,height = 20)

这样就可以依据上面的细胞类型来命名了

代码语言:javascript
复制
#细胞类型命名,这里只是举例子,不是实际的数据
All.merge=RenameIdents(All.merge,
                       "1"="SMC","2"="SMC",
                       "7"="Pericyte",                     
                       "13"="Myofibroblast","14"="Myofibroblast",                  
                       "4"="Myofibroblast","10"="Myofibroblast","0"="Myofibroblast",   
                       "5"="Fibroblast",
 
                       "12"="AT2",
                       "15"="AT1",
                       "21"="Mesothelial",
                       "20"="Goblet",
                       "19"="Ciliated",
 
                       "16"="B Plasma",
                       "22"="Mast",
                       "9"="T/NK",
                       "17"="B cells",
                       "24"="Plasmacytoid DCs",
 
                       "23"="Macrophage",
                       "3"="Macrophage",
                       "11"="DCs","18"="DCs",
                       "8"="Monocyte",
 
                       "6"="Endothelial",
                       "25"="Endothelial"
)

下面部分着重一些常规的画图。

03

批量差异分析 画图 groupgo批量富集分析

此部分的正文内容从这里开始。

1.首先来画曼哈顿图

数据准备:

代码语言:javascript
复制
pbmc$celltype.group=pbmc$celltype.stim
pbmc$celltype=pbmc$seurat_clusters
Idents(pbmc)=pbmc$celltype.group

cellfordeg<-levels(pbmc$celltype)
data_forplot=data.frame()

for(i in 1:length(cellfordeg)){ #
  CELLDEG <- FindMarkers(pbmc, ident.1 = paste0(cellfordeg[i],"_PT"), ident.2 = paste0(cellfordeg[i],"_sham"), verbose = FALSE)
  CELLDEG$gene=rownames(CELLDEG)
  CELLDEG$cluster=cellfordeg[i]
  write.csv(CELLDEG,paste0(cellfordeg[i],"PT_VS_Sham.CSV"))
  data_forplot=rbind(CELLDEG,data_forplot)
 
}
head(data_forplot)

对上述数据进行可视化

代码语言:javascript
复制
.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2","/home/data/t040413/R/yll/usr/local/lib/R/site-library",  "/home/data/refdir/Rlib/", "/usr/local/lib/R/library"))
library(Seurat)
library(scRNAtoolVis)
library(colourpicker)
markers_for_all_3groups=data_forplot
head(markers_for_all_3groups)
getwd()
# plot
jjVolcano(diffData = markers_for_all_3groups, 
          legend.position = c(0.93, 0.99), 
          topGeneN=2,#top genes to be labeled in plot, default 5.
          cluster.order=seq(0,23,1),
          pSize=0.4,
          tile.col = c("#EE3B3B", "#FF7F00", "#CD6600", "#8B2323", "#DEB887", "#76EEC6", 
                     "#F0FFFF", "#008B8B", "#FFB90F", "#F5F5DC", "#1F1F1F", "#66CD00", 
                     "#0000FF", "#97FFFF", "#528B8B", "#9400D3", "#EE1289", "#00BFFF", 
                     "#00FF00", "#191970", "#FFFF00", "#4A708B", "#00FF7F", "#8B8B00", 
                     "#FF1493", "#FFA500", "#8B4513"))

2.画各种基础的图 featureplot dimplot

数据处理

代码语言:javascript
复制
#request 2
.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2","/home/data/t040413/R/yll/usr/local/lib/R/site-library",  "/home/data/refdir/Rlib/", "/usr/local/lib/R/library"))
library(Seurat)
getwd()
dir.create("/home/data/t040413/ipf/GSE104154_scRNA-seq_fibrotic MC_bleomycin/enrichments")
setwd("/home/data/t040413/ipf/GSE104154_scRNA-seq_fibrotic MC_bleomycin/enrichments")
getwd()
library(dplyr)

load("~/silicosis/silicosis_cluster_merge.rds")
All.merge$Cell_subtype=All.merge$cell.type
table(Idents(All.merge))
subset_data=All.merge
代码语言:javascript
复制
##############################################dimp plot 
if(TRUE){
  pdf("2__all_celltype_tsne.pdf",width = 8,height = 6)
  p=DimPlot(subset_data,label = T,group.by = "Cell_subtype",reduction = "tsne",raster=FALSE,repel = T)
  print(p)
  dev.off()
 
  pdf("2__all_cellsubtype_tsne_split.pdf",width = 18,height = 6)
  p=DimPlot(subset_data,label = T,group.by = "Cell_subtype",split.by = "stim",reduction = "tsne",raster=FALSE,repel = T)
  print(p)
  dev.off()
 
  pdf("2_all_cellsubtype_umap.pdf",width = 8,height = 6)
  p=DimPlot(subset_data,label = T,group.by = "Cell_subtype",raster=FALSE,repel = T)
  print(p)
  dev.off()
 
  pdf("2_all_cellsubtype_umap_split.pdf",width = 18,height = 6)
  p=DimPlot(subset_data,label = T,group.by = "Cell_subtype",split.by = "stim",raster=FALSE,repel = T,reduction = "umap")
  print(p)
  dev.off()
 
}

展示其中一张

2.2 heatmap

代码语言:javascript
复制
markers=FindAllMarkers(subset_data,only.pos = T,logfc.threshold = 1.1)
head(markers)
markers$gene=rownames(markers)
write.csv(markers,file = "markers.csv")
openxlsx::write.xlsx(markers,file = "markers.xlsx")
getwd()

markers %>%
  group_by(cluster) %>%
  top_n(n = 20, wt = avg_log2FC) -> top20
DoHeatmap(subset_data, features = top20$gene)   #+ NoLegend()

pdf("top20_heatmap_celltypes_Legend__order_by_averageExpression.pdf")
p=DoHeatmap(subset_data, features = top20$gene)  #+  # scale_color_discrete(name = "Identity", labels = rep("",13))
print(p)
dev.off()

head(markers_for_Three_cd8subset)
#--------------------------------------------------
markers%>%
  group_by(cluster) %>%
  top_n(n = 5, wt = -p_val_adj) -> top5
DoHeatmap(subset_data, features = top5$gene)   #+ NoLegend()

pdf("top5_heatmap_celltypes_Legend__order_by_p_val_adj.pdf")
p=DoHeatmap(subset_data, features = top5$gene)  #+  # scale_color_discrete(name = "Identity", labels = rep("",13))
print(p)
dev.off()

3.如何利用单细胞数据画下面的富集分析图

3.1首先进行批量差异分析

输入数据要求

代码语言:javascript
复制
#--------------------------------------=======================--------批量差异分析差异分析
DimPlot(subset_data,label = T)
table(Idents(subset_data))
head(subset_data@meta.data)
all_degs=data.frame()

gsub(pattern = "/",replacement = "_",x = "yofibroblast/vascular smooth muscle cell degs.csv")

for (eachgroup in levels(subset_data$Cell_subtype) ) {
 
 # # eachgroup="nLung"
 #  subset_data2=subset_data
 #  Idents(subset_data2)=subset_data2$stim
 #  subset_data2=subset(subset_data2,idents=eachgroup)
 #  
 #  Idents(subset_data2)=subset_data2$Cell_subtype
 #  table(Idents(subset_data2))
 #  degs=FindMarkers(subset_data2,ident.1 = "SiO2_56",ident.2 = "NS_56")
 
  subset_data2=subset(subset_data,idents = eachgroup)
  Idents(subset_data2)=subset_data2$stim
  degs=FindMarkers(subset_data2,ident.1 = "SiO2_56",ident.2 = "NS_56")
 
   degs$gene=rownames(degs)
  degs$group=eachgroup
  #防止类似文件名称引起的报错 Myofibroblast/vascular smooth muscle cell degs.csv': No such file or directory
  write.csv(degs,file = paste(gsub(pattern = "/","_",x = eachgroup),"degs.csv"))
  print(paste0(eachgroup,"__done"))
  all_degs=rbind(degs,all_degs)
}
head(all_degs)
dim(all_degs)
write.csv(all_degs,file = "./all_degs.csv")

3.2 使用上述的all_degs进行分组批量富集分析

代码语言:javascript
复制
DimPlot(subset_data,label = T)
table(Idents(subset_data))
head(subset_data@meta.data)
all_degs=data.frame()
gsub(pattern = "/",replacement = "_",x = "yofibroblast/vascular smooth muscle cell degs.csv")

for (eachgroup in levels(subset_data$Cell_subtype) ) {
 
 # # eachgroup="nLung"
 #  subset_data2=subset_data
 #  Idents(subset_data2)=subset_data2$stim
 #  subset_data2=subset(subset_data2,idents=eachgroup)
 #  
 #  Idents(subset_data2)=subset_data2$Cell_subtype
 #  table(Idents(subset_data2))
 #  degs=FindMarkers(subset_data2,ident.1 = "SiO2_56",ident.2 = "NS_56")
 
  subset_data2=subset(subset_data,idents = eachgroup)
  Idents(subset_data2)=subset_data2$stim
  degs=FindMarkers(subset_data2,ident.1 = "SiO2_56",ident.2 = "NS_56")
 
   degs$gene=rownames(degs)
  degs$group=eachgroup
  #防止类似文件名称引起的报错 Myofibroblast/vascular smooth muscle cell degs.csv': No such file or directory
  write.csv(degs,file = paste(gsub(pattern = "/","_",x = eachgroup),"degs.csv"))
  print(paste0(eachgroup,"__done"))
  all_degs=rbind(degs,all_degs)
}

head(all_degs)
dim(all_degs)
write.csv(all_degs,file = "./all_degs.csv")
getwd()

#all_degs=read.csv()
##########################----------------------enrichment analysis==================================================
#https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg
df=all_degs
##筛选阈值确定:p<0.05,|log2FC|>1
p_val_adj = 0.05
avg_log2FC = 0.6
#根据阈值添加上下调分组标签:
df$direction <- case_when(
  df$avg_log2FC > avg_log2FC & df$p_val_adj < p_val_adj ~ "up",
  df$avg_log2FC < -avg_log2FC & df$p_val_adj < p_val_adj ~ "down",
  TRUE ~ 'none'
)
head(df)

df=df[df$direction!="none",]
head(df)
dim(df)
df$mygroup=paste(df$group,df$direction,sep = "_")
head(df)
dim(df)
##########################----------------------enrichment analysis==================================================
#https://mp.weixin.qq.com/s/WyT-7yKB9YKkZjjyraZdPg
{
  library(clusterProfiler)
  library(org.Hs.eg.db) #人
  library(org.Mm.eg.db) #鼠
  library(ggplot2)
 # degs_for_nlung_vs_tlung$gene=rownames(degs_for_nlung_vs_tlung)
  sce.markers=df
  head(sce.markers)
 
  ids=bitr(sce.markers$gene,'SYMBOL','ENTREZID',"org.Mm.eg.db") #'org.Hs.eg.db'
  head(ids)
  head(sce.markers)
  sce.markers=merge(sce.markers,ids,by.x='gene',by.y='SYMBOL')
  head(sce.markers)
  dim(sce.markers)
  sce.markers=sce.markers[sce.markers$group!="none",]
  dim(sce.markers)
  head(sce.markers)
  sce.markers$cluster=sce.markers$mygroup
  dim(sce.markers)
 gcSample=split(sce.markers$ENTREZID, sce.markers$cluster)
  gcSample # entrez id , compareCluster 
   print("===========开始go============")
  xx <- compareCluster(gcSample, fun="enrichGO",OrgDb="org.Mm.eg.db" ,   #'org.Hs.eg.db',
                       pvalueCutoff=0.05) #organism="hsa",
 
   print("===========开始 kegg============")
  gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",
                                      keyType = 'kegg',  #KEGG 富集
                                      organism='mmu',#"rno",
                                      pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
  )
 
  p=dotplot(xx) 
  p=p+ theme(axis.text.x = element_text(angle = 90, 
                                      vjust = 0.5, hjust=0.5))
  p
  ggsave('degs_compareCluster-GO_enrichment.pdf',plot = p,width = 17,height = 25,limitsize = F)
  ggsave('degs_compareCluster-GO_enrichment.png',plot = p,width = 17,height = 25,limitsize = F)
 
  xx
  write.csv(xx,file = "compareCluster-GO_enrichment.csv")
 
  p=dotplot(gg) 
  p4=p+ theme(axis.text.x = element_text(angle = 90, 
                                         vjust = 0.5, hjust=0.5))
  p4
  print(paste("保存位置",getwd(),sep = "  :   "))
  ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 16,height = 25,limitsize = F)
  ggsave('degs_compareCluster-KEGG_enrichment-2.png',plot = p4,width = 16,height = 25,limitsize = F)
 
  gg
  openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")
  
 
  if (F) { #大鼠
    print("===========开始go============")
    xx <-clusterProfiler::compareCluster(gcSample, fun="enrichGO",OrgDb="org.Rn.eg.db",
                                         readable=TRUE,
                                         ont = 'ALL',  #GO Ontology,可选 BP、MF、CC,也可以指定 ALL 同时计算 3 者
                                         pvalueCutoff=0.05) #organism="hsa", #'org.Hs.eg.db',
 
    print("===========开始 kegg============")
    gg<-clusterProfiler::compareCluster(gcSample,fun = "enrichKEGG",
                                        keyType = 'kegg',  #KEGG 富集
                                        organism="rno",
                                        pvalueCutoff = 0.05 #指定 p 值阈值(可指定 1 以输出全部
    )
 
    p=dotplot(xx) 
    p2=p+ theme(axis.text.x = element_text(angle = 90, 
                                           vjust = 0.5, hjust=0.5))
    p2
    ggsave('degs_compareCluster-GO_enrichment-2.pdf',plot = p2,width = 6,height = 20,limitsize = F)
    xx
    openxlsx::write.xlsx(xx,file = "compareCluster-GO_enrichment.xlsx")
    # -----  
    p=dotplot(gg) 
    p4=p+ theme(axis.text.x = element_text(angle = 90, 
                                           vjust = 0.5, hjust=0.5))
    p4
    print(paste("保存位置",getwd(),sep = "  :   "))
    ggsave('degs_compareCluster-KEGG_enrichment-2.pdf',plot = p4,width = 6,height = 12,limitsize = F)
    gg
    openxlsx::write.xlsx(gg,file = "compareCluster-KEGG_enrichment.xlsx")   
}  
}
本文参与 腾讯云自媒体同步曝光计划,分享自微信公众号。
原始发表:2023-07-02,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 生信菜鸟团 微信公众号,前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档