提取特定基因细胞亚群后富集分析-细胞相互作用

生信菜鸟团

发布于 2024-07-31 19:24:44

1570

发布于 2024-07-31 19:24:44

文章被收录于专栏：生信菜鸟团

下一次上课竟然是七夕，七夕还能有人听课？不过于我，差别不大哈哈哈哈哈

这一次的主题起源于，怎么说呢，随便的一个想法（做完了之后发现，很多事不是不能深挖和解读，而是责任方不在自己的话，不会放在心上） ①选取一个数据集，总注释 ②取出T&NK亚群，细分注释 ③取出Treg亚群，注释 ④提取出Treg亚群中某个基因阳性的细胞，GO和GSVA分析 ⑤这次不用合并的方式了，使用match ID的方式把后面细分的亚群注释挪到①的总注释中，cellchat相互作用分析

rm(list=ls())
options(stringsAsFactors = F)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
source('../scRNA_scripts/lib.R')
source('../scRNA_scripts/mycolors.R')
getwd()
set.seed(12345)
sce.all=sce
table(sce.all$celltype)
sce1 = sce.all[, sce.all$celltype %in% c( 'Treg' )]
LayerData(sce1, assay = "RNA", layer = "counts")
sce1 <- JoinLayers(sce1)

as.data.frame(sce1@assays$RNA$counts[1:10, 1:2])
head(sce1@meta.data, 10)
table(sce1$orig.ident) 

sce = sce1
sce <- NormalizeData(sce, normalization.method =  "LogNormalize",  
                     scale.factor = 1e4)
GetAssay(sce,assay = "RNA")
sce <- FindVariableFeatures(sce, 
                            selection.method = "vst", nfeatures = 2000)  
sce <- ScaleData(sce) 
sce <- RunPCA(object = sce, pc.genes = VariableFeatures(sce)) 
DimHeatmap(sce, dims = 1:12, cells = 100, balanced = TRUE)
ElbowPlot(sce) 

sce <- FindNeighbors(sce, dims = 1:15)
sce <- FindClusters(sce, resolution = 0.3)
table(sce@meta.data$RNA_snn_res.0.3)  

set.seed(321)
sce <- RunTSNE(object = sce, dims = 1:15, do.fast = TRUE)

sce <- RunUMAP(object = sce, dims = 1:5, do.fast = TRUE)

p = DimPlot(sce,label=T,cols = mycolors) 
p


###提取特定基因表达的细胞亚群
library(Seurat)
library(dplyr)
DimPlot(sce, label = T)+NoLegend()
FeaturePlot(sce, features = 'TNFRSF9', pt.size = 1)


expr <- sce@assays$RNA$data
gene_expression <- expr %>% .["TNFRSF9",] %>% as.data.frame()
colnames(gene_expression) <- "TNFRSF9"
gene_expression$cell <- rownames(gene_expression)



#选择4-1BB阳性细胞,提取阳性Treg亚群
gene_expression_sel <- gene_expression[which(gene_expression$TNFRSF9>0),]
phe = sce@meta.data
phe$celltype2 = 'NA'
table(phe$celltype2)

phe$celltype2 <- ifelse(rownames(phe) %in% rownames(gene_expression_sel), "TNFRSF9_Treg", "nonTNFRSF9_Treg")
table(phe$celltype2)
sce@meta.data = phe

###下面进行GO和GSVA分析
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
library(clusterProfiler)

source('../scRNA_scripts/lib.R')
source('../scRNA_scripts/mycolors.R')
Idents(sce)

###热图表示GO富集分析
library(grid)
library(ggplot2)
library(gridExtra)
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ggplot2)
library(org.Hs.eg.db)
library(GOplot)
Idents(sce) = sce$celltype2

if (!file.exists('Tregmarkers.csv')) {
  markers <- FindAllMarkers(object = sce, only.pos = TRUE, 
                            min.pct = 0.25, 
                            thresh.use = 0.25)
  write.csv(markers,file='Tregmarkers.csv')
} else {

  markers = read.csv('Tregmarkers.csv',row.names = 1)
}

table(markers$cluster)
library(dplyr) 

ge = markers[markers$p_val <0.05 & markers$avg_log2FC >0.5,]
ge1 = ge[ge$cluster == 'TNFRSF9_Treg',]
symbol = ge1$gene

entrezIDs = bitr(symbol, fromType = "SYMBOL", toType = "ENTREZID", OrgDb= "org.Hs.eg.db", drop = TRUE)
gene<- entrezIDs$ENTREZID

marker_new = markers[markers$gene %in% entrezIDs$SYMBOL,]
marker_new = marker_new[!duplicated(marker_new$gene),]
identical(marker_new$gene, entrezIDs$SYMBOL)

p = identical(marker_new$gene, entrezIDs$SYMBOL);p
if(!p) entrezIDs = entrezIDs[match(marker_new$gene,entrezIDs$SYMBOL),]
marker_new$ID = entrezIDs$ENTREZID


## GO
gcSample=split(marker_new$ID, marker_new$cluster) 
###参数可以更改，看看?compareCluster
#One of "groupGO", "enrichGO", "enrichKEGG", "enrichDO" or "enrichPathway" 
xx <- compareCluster(gcSample,
                     fun = "enrichGO",
                     OrgDb = "org.Hs.eg.db",
                     ont = "BP",
                     pAdjustMethod = "BH",
                     pvalueCutoff = 0.01,
                     qvalueCutoff = 0.05
)

p <- dotplot(xx)
library(ggthemes)
p + theme(axis.text.x = element_text(
  angle = 45,
  vjust = 0.5, hjust = 0.5
))+theme_few()

###GSVA分析
library(Seurat)
library(msigdbr)
library(GSVA)
library(tidyverse)
library(clusterProfiler)
library(patchwork)
library(limma)

#这里可以选择通路H或者任意C，由于我想要总表格挑一挑，所以全选了
genesets <- msigdbr(species = "Homo sapiens") 
genesets <- subset(genesets, select = c("gs_name","gene_symbol")) %>% as.data.frame()
genesets <- split(genesets$gene_symbol, genesets$gs_name)

expr <- AverageExpression(sce, assays = "RNA", slot = "data")[[1]]
expr <- expr[rowSums(expr)>0,]  #选取非零基因
expr <- as.matrix(expr)
head(expr)

# gsva默认开启全部线程计算
gsva.res <- gsva(expr, genesets, method="gsva") 
saveRDS(gsva.res, "gsva.res.rds")
gsva.df <- data.frame(Genesets=rownames(gsva.res), gsva.res, check.names = F)
write.csv(gsva.df, "gsva_res.csv", row.names = F)
gsva_d = gsva.res[sample(nrow(gsva.res),30),]
pheatmap::pheatmap(gsva_d, show_colnames = T, 
                   scale = "row",angle_col = "45",
                   color = colorRampPalette(c("navy", "white", "firebrick3"))(50))


# 气泡图
gsva_long <- melt(gsva_d, id.vars = "Genesets")

# 创建气泡图
ggplot(gsva_long, aes(x = Var2, y = Var1, size = value, color = value)) +
  geom_point(alpha = 0.7) +  # 使用散点图层绘制气泡，alpha设置点的透明度
  scale_size_continuous(range = c(1, 6)) +  # 设置气泡大小的范围
  theme_minimal() + 
  scale_color_gradient(low='#427183',high='#D2D470') +
  labs(x = "Gene Set", y = "Sample", size = "GSVA Score")+
  theme(axis.text.x = element_text(angle = 45,vjust = 0.5,hjust = 0.5))


saveRDS(sce, "Treg_sce_celltype.rds")

CellChat可以回答以下生物学问题:

细胞与细胞之间的通讯是否增强?
细胞类型之间的相互作用发生显著变化?
主要来源和目标在不同情况下如何变化 ?

###突然想到一个事，可以不把sce合并进去，而是直接把注释完成的celltype2替换进去
#按道理来说跟在第一步直接注释没多大区别吧？
#细胞亚群之间相互作用
library(Seurat)
options(stringsAsFactors = F)
library(SeuratObject)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
library(CellChat)
Treg=sce
TNKsce=readRDS( "TNKsce_celltype.rds")
Allsce = readRDS( "sce_celltype.rds")
Treg_phe = sce@meta.data
TNK_phe = TNKsce@meta.data
All_phe = Allsce@meta.data
colnames(All_phe)
All_phe$celltype3 = All_phe$celltype
table(All_phe$celltype3)

####把Treg中的注释转换到TNK中
identical(rownames(Treg_phe),rownames(TNK_phe[TNK_phe$celltype == 'Treg',]))

indices <- match(rownames(TNK_phe[TNK_phe$celltype == 'Treg', ]), rownames(Treg_phe))
# 检查indices是否所有值都找到了对应的行名
if (any(is.na(indices))) {
  warning("TNK_phe中某些'Treg'类型的行名在Treg_phe中没有找到对应的行名")
}
table(TNK_phe$celltype)

# 使用Treg_phe中的celltype列的值来替换TNK_phe中的celltype
table(Treg_phe$celltype2)
TNK_phe$celltype[TNK_phe$celltype == 'Treg'] <- Treg_phe$celltype2[indices]
table(TNK_phe$celltype)

##随便验证一下，看转换的怎么样
#GSM6204111_P03_CTGCTCAAGGCGAACT-1
a = TNK_phe[ TNK_phe$celltype == 'TNFRSF9_Treg',]
identical(rownames(a),rownames(Treg_phe[Treg_phe$celltype2 == 'TNFRSF9_Treg',]))


####把TNK中的注释转换到All中
table(All_phe$celltype)
identical(rownames(TNK_phe),rownames(All_phe[All_phe$celltype == 'T&NK',]))
indices <- match(rownames(All_phe[All_phe$celltype == 'T&NK', ]), rownames(TNK_phe))

# 检查indices是否所有值都找到了对应的行名
if (any(is.na(indices))) {
  warning("All_phe中某些'TNK'类型的行名在TNK_phe中没有找到对应的行名")
}

table(All_phe$celltype)
table(TNK_phe$celltype)
All_phe$celltype[All_phe$celltype == 'T&NK'] <- TNK_phe$celltype[indices]
table(All_phe$celltype)

##验证
a = All_phe[ All_phe$celltype == 'Unkown',]
identical(rownames(a),rownames(TNK_phe[TNK_phe$celltype == 'Unkown',]))

Allsce@meta.data = All_phe

sce = Allsce
colnames(All_phe)
table(sce$patient)
table(sce$celltype)
Idents(sce) = sce$celltype
colnames(sce@meta.data)

names(sce@assays$RNA@layers)
sce[["RNA"]]$counts 
# Alternate accessor function with the same result
LayerData(sce, assay = "RNA", layer = "counts")
sce

table(sce$treat)
naive = sce[, sce$treat %in% c( 'naive')]
treat = sce[, sce$treat %in% c( 'treated')]

save(sce,naive,treat,file = 'celltype_input.Rdata')

cellchatA = function(sce){
  #创建cellchat对象
  cellchat <- createCellChat(sce@assays$RNA$data, meta = sce@meta.data, group.by = "celltype")
  levels(cellchat@idents)
  groupSize <- as.numeric(table(cellchat@idents)) 
  CellChatDB <- CellChatDB.human#导入配受体库
  showDatabaseCategory(CellChatDB) #查看描述该数据库组成的饼状图
  #直接使用CellChatDB全库进行细胞通讯分析：
  ##CellChatDB.use <- CellChatDB 
  CellChatDB.use <- subsetDB(CellChatDB, search = "Secreted Signaling")#选择特定的信号来进行分析，这里还可以选择ECM-receptor和Cell-Cell Contact。 
  cellchat@DB <- CellChatDB.use

  #预处理表达数据以进行细胞间通讯分析
  # subset the expression data of signaling genes for saving computation cost
  cellchat <- subsetData(cellchat)
  cellchat <- identifyOverExpressedGenes(cellchat)
  cellchat <- identifyOverExpressedInteractions(cellchat)
  # project gene expression data onto PPI network (optional)
  cellchat <- projectData(cellchat, PPI.human) #PPI.human PPI.mouse


  ###2.细胞通讯预测
  #计算通信概率并推断通信网络
  #Compute the communication probability and iC3D1er cellular communication network
  cellchat <- computeCommunProb(cellchat)
  # Filter out the cell-cell communication if there are only few number of cells in certain cell groups
  #这里还是不要过滤了，不然有些细胞亚群细胞数量过少被过滤掉之后会造成数据不一致而无法进行后续分析
  #cellchat <- filterCommunication(cellchat, min.cells = 10)
  #2）提取配受体对细胞通讯结果表：
  df.net <- subsetCommunication(cellchat, slot.name = 'net')
  head(df.net) #得到配受体对细胞通讯结果表
  # #或访问其它感兴趣/特定的细胞通讯结果：
  # df.net1 <- subsetCommunication(cellchat,
  #                                sources.use = c('LC'),
  #                                targets.use = c('FBN1+ FIB')) #访问特定细胞对子集
  # head(df.net1)
  df.net2 <- subsetCommunication(cellchat, signaling = c('MIF')) #访问特定信号通路子集
  head(df.net2)
  #3）提取信号通路水平的细胞通讯表：
  cellchat <- computeCommunProbPathway(cellchat) #计算信号通路水平上的通讯概率
  df.netp <- subsetCommunication(cellchat, slot.name = 'netP') #得到信号通路水平细胞通讯表
  head(df.netp)

  #4）细胞互作关系展示：
  #计算细胞对间通讯的数量和概率强度
  cellchat <- aggregateNet(cellchat)
  return(cellchat)
} 

allcellchat = cellchatA(sce)
naivecellchat = cellchatA(naive)
treatcellchat = cellchatA(treat)


object.list <- list(all = allcellchat,naive = naivecellchat,treat = treatcellchat )
cellchat <- mergeCellChat(object.list, add.names = names(object.list), cell.prefix = TRUE)

gg1 <- compareInteractions(cellchat, show.legend = F, group = c(1,2,3), measure = "count")
gg2 <- compareInteractions(cellchat, show.legend = F, group = c(1,2,3), measure = "weight")
p <- gg1 + gg2
p
ggsave("Overview_number_strength.pdf", p, width = 10, height = 7)

其实有个问题，这个通讯数量和通讯强度的分析跟细胞数量有关系么，如果单看all组的细胞之间关系，是没有联系的。给药后就是会减少细胞间相互作用的数量和强度。

par(mfrow = c(1,2))
##设置比较组：与2相比，3红色上调，蓝色下调
#这里就可以大概得到我的结论了，分析可以结束了，再就是进一步论证了
netVisual_diffInteraction(cellchat, weight.scale = T,comparison = c(2,3))
netVisual_diffInteraction(cellchat, weight.scale = T, measure = "weight",comparison = c(2,3))

相互作用的差异数量或相互作用强度。顶部的彩色条形图表示热图（incoming, 传入信号）中显示的数值列的和。右边的彩色条形图表示一行值的和（outgoing, 输出信号）。在颜色条中，红色(或蓝色)表示与第二个数据集相比，第三个数据集中的信号增加(或减少)。

#差异性heatmap
gg1 <- netVisual_heatmap(cellchat,comparison = c(2,3))
gg2 <- netVisual_heatmap(cellchat, measure = "weight",comparison = c(2,3))
gg1 + gg2

# 比较二维空间中传出和传入的交互强度，可以方便地识别在不同数据集之间发送或接收信号发生显著变化的细胞群。
#BiocManager::install('netAnalysis')
library(netAnalysis)
# 计算每个网络对象的中心性得分
for (i in 1:length(object.list)) {
  object.list[[i]] <- netAnalysis_computeCentrality(object.list[[i]])
}

# 计算链接的总数，不包括对角线上的值
num.link <- sapply(object.list, function(x) {rowSums(x@net$count) + colSums(x@net$count) - diag(x@net$count)})

# 设置权重的最小值和最大值，用于控制不同数据集中点的大小
weight.MinMax <- c(min(num.link), max(num.link))

gg <- list()
for (i in 1:length(object.list)) {
  gg[[i]] <- netAnalysis_signalingRole_scatter(object.list[[i]], 
                                               title = names(object.list)[i], 
                                               weight.MinMax = weight.MinMax)
}

patchwork::wrap_plots(plots = gg)

治疗后看起来没什么特异的source和targets，myeloid变化稍大。

## 通路信号强度对比分析
#comparison可以修改
#红色的顶级信号通路在naive富集，而这些绿色的信号通路在treat富集。
gg1 <- rankNet(cellchat, mode = "comparison", stacked = T, do.stat = TRUE,comparison = c(2,3))
gg2 <- rankNet(cellchat, mode = "comparison", stacked = F, do.stat = TRUE,comparison = c(2,3))
p <- gg1 + gg2
p

明显可以看到红色的顶级信号通路在naive富集，而这些绿色的信号通路在treat富集。

pathways.show <- c("CXCL") 
weight.max <- getMaxWeight(object.list, slot.name = c("netP"), attribute = pathways.show) 
par(mfrow = c(1,3), xpd=TRUE)
for (i in 1:length(object.list)) {
  netVisual_aggregate(object.list[[i]], signaling = pathways.show, layout = "circle", 
                      edge.weight.max = weight.max[1], edge.width.max = 10, 
                      signaling.name = paste(pathways.show, names(object.list)[i]))
}

如果拿上面的结果看CXCL，确实是在treat中增加