用clusterProfiler对单细胞的基因做功能富集分析超简单！！

生信菜鸟团

发布于 2023-09-09 16:47:51

2.4K00

代码可运行

文章被收录于专栏：生信菜鸟团生信菜鸟团

运行总次数：0

代码可运行

❝在单细胞转录组测序数据分析的过程中常用到的几大高级分析包括：富集分析，细胞通讯，拟时序分析，CNV, SNV, 基因调控网络等等。 ❞

❝今天我就拿一篇Nature Communication的单细胞转录组数据来做一下功能富集分析，在做功能富集分析之前，要先得到单细胞亚群中特异性高表达的基因，现在主流方法是FindAllMarker函数，由于我这个数据集数据比较大，用该方法耗时耗力，我就在这里采用了cosg方法，得到top gene的列表数据。 ❞

对于功能富集来说，常用的数据库包括KEGG，GO还有ReactomePA数据库，我把示例代码都展示如下。

rm(list=ls())

library(Seurat)
library(ggplot2)
library(dplyr)
library(clusterProfiler)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ReactomePA)
library(ggplot2)
library(stringr)

#BiocManager::install("ReactomePA")
#BiocManager::install("DOSE")

getwd()
dir.create("./3-cell")
setwd("./3-cell")  
load("sce.all_by_celltype.Rdata")

DimPlot(sce.all, reduction = "umap", group.by = "celltype",label = T)

sce.all
table(Idents(sce.all))  
Idents(sce.all)=sce.all$celltype
table(Idents(sce.all)) 

#COSG
pro = 'cosg_celltype_'
library(COSG)
marker_cosg <- cosg(
  sce.all,
  groups='all',
  assay='RNA',
  slot='data',
  mu=1,
  n_genes_user=100)

save(marker_cosg,file = paste0(pro,'_marker_cosg.Rdata'))

导入数据及GO功能富集

#导入前期数据
load("cosg_celltype__marker_cosg.Rdata")

head(marker_cosg[[1]])

##GO
#BP
x_BP = compareCluster(marker_cosg[[1]], fun='enrichGO', 
                   OrgDb = 'org.Hs.eg.db', keyType = 'SYMBOL', ont="BP")

p<-dotplot(x_BP) + theme(axis.text.x = element_text(angle=45, hjust=1),
                      axis.text.y = element_text(size=12),
                      panel.spacing = unit(5, "mm"))+
  scale_colour_gradientn(colours =c("#E54924","#498EA4"));p

ggsave(file="BP.pdf",height= 15,width = 8)

「用dotplot画出来的图纵坐标重叠部分比较高，我没进行调整，下面我就使用ggplot又美化了一下」

####提取数据####
data<-p$data

colnames(data)

p1 = ggplot(data,aes(Cluster,Description,size = GeneRatio))+
  geom_point(shape=21,aes(fill= pvalue),position =position_dodge(0))+
  theme_minimal()+xlab(NULL)+ylab(NULL) +
  scale_size_continuous(range=c(1,10))+
  theme_bw()+
  scale_fill_gradient(low = "#E54924", high = "#498EA4")+
  theme(legend.position = "right",legend.box = "vertical",
        legend.margin=margin(t= 0, unit='cm'),
       legend.spacing = unit(0,"in"),
        axis.text.x  = element_text(color="black",size=16,angle=45, hjust=1),
        axis.text.y  = element_text(color="black",size=12),
        legend.text = element_text(size =12,color="black"),
        legend.title = element_text(size =12,color="black")
  ) +
scale_y_discrete(labels= data$Description) 

p1

ggsave(file="BP-1.pdf",height= 10,width = 15)

#MF
x_MF = compareCluster(marker_cosg[[1]], fun='enrichGO', 
                    OrgDb = 'org.Hs.eg.db', keyType = 'SYMBOL', ont="MF")
dotplot(x_MF) + theme(axis.text.x = element_text(angle=45, hjust=1))



#CC
x_CC = compareCluster(marker_cosg[[1]], fun='enrichGO', 
                    OrgDb = 'org.Hs.eg.db', keyType = 'SYMBOL', ont="CC")
dotplot(x_CC) + theme(axis.text.x = element_text(angle=45, hjust=1)

基因名字转为 entrezID

symbols_list<-marker_cosg[[1]]
## 全部的symbol需要转为 entrezID
gene = lapply(symbols_list, function(y){ 
  y=as.character(na.omit(select(org.Hs.eg.db,
                                keys = y,
                                columns = 'ENTREZID',
                                keytype = 'SYMBOL')[,2])
  )
  y
})

KEGG

##KEGG通路富集网站应该还有问题,我把代码先放在这里~
# x2 <- compareCluster(gene, fun="enrichKEGG",
#                      organism='hsa', pvalueCutoff=0.05)
# p<-dotplot(x2) + theme(axis.text.x = element_text(angle=45, hjust=1),
#                          axis.text.y = element_text(size=12),
#                          panel.spacing = unit(5, "mm"))+
#   scale_colour_gradientn(colours =c("#E54924","#498EA4"));p

ReactomePA

##ReactomePA 
x3 <- compareCluster(gene, fun="enrichPathway",
                     organism = 'human',
                     pvalueCutoff=0.05)
p<-dotplot(x3) + theme(axis.text.x = element_text(angle=45, hjust=1),
                       axis.text.y = element_text(size=12),
                       panel.spacing = unit(5, "mm"))+
  scale_colour_gradientn(colours =c("#E54924","#498EA4"));p

####提取数据####
data<-p$data

colnames(data)

p1 = ggplot(data,aes(Cluster,Description,size = GeneRatio))+
  geom_point(shape=21,aes(fill= pvalue),position =position_dodge(0))+
  theme_minimal()+xlab(NULL)+ylab(NULL) +
  scale_size_continuous(range=c(1,10))+
  theme_bw()+
  scale_fill_gradient(low = "#E54924", high = "#498EA4")+
  theme(legend.position = "right",legend.box = "vertical",
        legend.margin=margin(t= 0, unit='cm'),
        legend.spacing = unit(0,"in"),
        axis.text.x  = element_text(color="black",size=16,angle=45, hjust=1),
        axis.text.y  = element_text(color="black",size=12),
        legend.text = element_text(size =12,color="black"),
        legend.title = element_text(size =12,color="black")
  ) +
  scale_y_discrete(labels= data$Description) 

p1

ggsave(file="ReactomePA.pdf",height= 10,width = 16)