前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >单细胞二次分群和细胞周期

单细胞二次分群和细胞周期

原创
作者头像
用户10300557
发布2024-06-26 20:47:38
1210
发布2024-06-26 20:47:38
举报
文章被收录于专栏:生信学习111

二次分群

1 加载和分群

用的数据是降维聚类分群 + 自动注释的数据

代码语言:R
复制
seu.obj = scRNA
head(seu.obj@meta.data)
library(Seurat)
library(dplyr)
p1 = DimPlot(seu.obj, reduction = "umap",label=T)+NoLegend()
p1
levels(Idents(scRNA)) #看一下都有啥类型
my_sub = "Endothelial_cells"
seu.obj = scRNA
sub.cells <- subset(seu.obj, idents = my_sub)
f = "obj0626Rdata" # 新建一个
if(!file.exists(f)){
  sub.cells = sub.cells %>%
    NormalizeData() %>%
    FindVariableFeatures() %>%
    ScaleData(features = rownames(.)) %>%
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>%
    FindClusters(resolution = 0.5) %>%
    RunUMAP(dims = 1:15) 
  save(sub.cells,file = f)
}
load(f)
DimPlot(sub.cells, reduction = 'umap',label = T)+NoLegend() #出图
sub.cells@meta.data$celltype = paste0("M",sub.cells$seurat_clusters) #把子对象的亚群注释结果添加到表格上面去。
save(sub.cells,file = "sub.cells.Rdata")

marker 基因及其可视化

找差异基因

代码语言:R
复制
sub.cells.markers <- FindAllMarkers(sub.cells, only.pos = TRUE,  
                                    min.pct = 0.25, logfc.threshold = 0.25)

top10 <- sub.cells.markers %>% 
  group_by(cluster) %>% 
  top_n(n = 10, wt = avg_log2FC) %>% 
  pull(gene);top10

VlnPlot(sub.cells, features = top10) #小提琴可视化,no.1


top5 <- sub.cells.markers %>% #减少一点看看
  group_by(cluster) %>% 
  top_n(n = 5, wt = avg_log2FC) %>% 
  pull(gene); top5

VlnPlot(sub.cells, features = top5)#小提琴可视化 no.2
RidgePlot(sub.cells, features = top5)#山脊图可视化 no.3
FeaturePlot(sub.cells, features = top5) #特征点图 no.4
DotPlot(sub.cells,features = top10)+ RotatedAxis() #no.5
DoHeatmap(sub.cells, features = top5) + NoLegend()#no.6
no.1
no.1
no.2
no.2
no.3
no.3
no.4
no.4
no.5
no.5
no.6
no.6

上面是单独显示也可以放回去

match函数时匹配,用于找出一个向量中的元素在另一个向量中的对应位置

代码语言:R
复制
seu.obj$celltype = as.character(Idents(seu.obj))
seu.obj$celltype = ifelse(seu.obj$celltype==my_sub,
                          sub.cells$celltype[match(colnames(seu.obj),colnames(sub.cells))],
                          seu.obj$celltype) #使用ifelse函数实现了分情况讨论:判断seu.obj的每个细胞是否是my_sub(本例是Fibroblasts),如果是的话,就替换成sub.cells里面匹配的每个细胞对应的celltype,不是就不管

Idents(seu.obj) = seu.obj$celltype
p2 = DimPlot(seu.obj,label = T)+NoLegend()
p1+p2

2 细胞周期

细胞的有丝分裂,分为分裂期(M)和分裂间期(G1,S,G2),细胞处于不同的细胞周期时,代谢活跃状态和染色体的状态大不相同,所以在不同周期表达量是有差异的

直接比较表达量是不可取的,不过得分情况,有些对下游的聚类和分群影响比较大,有些影响比较小。

质控前
质控前
 质控后
质控后

2.1 1 marrow

代码语言:R
复制
> dir.create("day9")
> save(ct,/day9)
Error: unexpected '/' in "save(ct,/"
> save(ct, file = "day9/day9.RData")
> exp.mat <- read.delim("day9/nestorawa_forcellcycle_expressionMatrix.txt",row.names = 1)
> marrow <- CreateSeuratObject(counts = exp.mat,
+                              project = "b",
+                              min.cells = 3, 
+                              min.features = 200) #nestorawa_forcellcycle_expressionMatrix.txt,我觉得这个的意义在于比较
Warning: Data is of class data.frame. Coercing to dgCMatrix.
> marrow[["percent.mt"]] <- PercentageFeatureSet(marrow, pattern = "^MT-")
> head(marrow@meta.data, 3)
         orig.ident nCount_RNA nFeature_RNA percent.mt
Prog_013       Prog    2563086        10208   5.187809
Prog_019       Prog    3030619         9990   5.965877
Prog_031       Prog    1293487        10192   5.664456
> VlnPlot(marrow, 
+         features = c("nFeature_RNA",
+                      "nCount_RNA", 
+                      "percent.mt"), 
+         ncol = 3,pt.size = 0.5)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.
受细胞周期影响比较大的这套数据明显看出来细胞
受细胞周期影响比较大的这套数据明显看出来细胞

2.1.2 计算细胞周期评分

代码语言:R
复制
# 计算细胞周期评分
check_cc =  function(ob){
  s.genes <- intersect(cc.genes$s.genes,rownames(ob))
  g2m.genes <- intersect(cc.genes$g2m.genes,rownames(ob))
  ob = ob %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    CellCycleScoring(s.features = s.genes, #计算细胞周期评分的函数
                     g2m.features = g2m.genes) %>%
    ScaleData(features = rownames(.)) %>%  
    RunPCA(features = c(s.genes,g2m.genes))
  return(ob)
}
ch1 = check_cc(seu.obj)
head(ch1@meta.data)
                      orig.ident nCount_RNA nFeature_RNA percent.mt     S.Score
AAACCCACAGTCGGTC-1 SeuratProject       4243         1256   6.292717 -0.02545080
AAACGAAAGAATCGCG-1 SeuratProject       7307         2577   2.572875  0.03719043
AAAGAACAGCTTACGT-1 SeuratProject       8154         1881   4.083885  0.04363704
AAAGAACAGGTTCATC-1 SeuratProject       8223         2182   4.377964 -0.05930417
AAAGAACAGTCTGTAC-1 SeuratProject       3884         1377   4.763131 -0.02251541
AAAGAACTCCACCTCA-1 SeuratProject       3997         1307   3.402552  0.01273964
                     G2M.Score Phase
AAACCCACAGTCGGTC-1  0.05694222   G2M
AAACGAAAGAATCGCG-1 -0.12520510     S
AAAGAACAGCTTACGT-1 -0.02283322     S
AAAGAACAGGTTCATC-1 -0.03018975    G1
AAAGAACAGTCTGTAC-1  0.02168140   G2M
AAAGAACTCCACCTCA-1 -0.13158839     S

2.1.3 比较两个数据的细胞周期评分和PCA

函数处理之后meta.data多了3列,分别是s和g2m的评分以及推断的细胞周期

代码语言:R
复制
table(ch1$Phase)
ch2 = check_cc(marrow)
table(ch2$Phase)
PCAPlot(ch1,group.by = "Phase")+ PCAPlot(ch2,group.by = "Phase") # no.2

library(patchwork)
#横纵坐标保持一致,更加直观一点
PCAPlot(ch1,group.by = "Phase")+ 
  PCAPlot(ch2,group.by = "Phase")&
  xlim(-60,15)&
  ylim(-10,15) #no.2
#比较S.Score和G2M.Score
p1 = VlnPlot(ch1,"S.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"S.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.6,0.6) #S.Score no.3
p1 = VlnPlot(ch1,"G2M.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"G2M.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.5,1) #G2M.Score,no.4
no.1
no.1
no.2
no.2
no.3
no.3
no.4
no.4

2.1.4 比较去除和不去处细胞周期影响的下游注释

代码语言:R
复制
f = "ob1.Rdata"
if(!file.exists(f)){
  ob1 = seu.obj %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    ScaleData(features = rownames(.)) %>%  
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>% 
    FindClusters(resolution = 0.5) %>% 
    RunUMAP(dims = 1:15) %>% 
    RunTSNE(dims = 1:15)
  save(ob1,file = f)
}
load(f) 
#根据s期和g2m期各自有代表性的基因来打分,在ScaleData函数中加上vars.to.regress参数来消除影响
s.genes <- intersect(cc.genes$s.genes,rownames(seu.obj))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(seu.obj)) #cc.genes是自带的数据
f = "ob2.Rdata"
if(!file.exists(f)){
  ob2 = seu.obj %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    CellCycleScoring(s.features = s.genes, g2m.features = g2m.genes) %>%
    ScaleData(vars.to.regress = c("S.Score", "G2M.Score"),features = rownames(.)) %>%  #运行极其慢
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>% 
    FindClusters(resolution = 0.5) %>% 
    RunUMAP(dims = 1:15) %>% 
    RunTSNE(dims = 1:15)
  save(ob2,file = f)
}
load(f) 
p1 <- DimPlot(ob1, reduction = "umap",label = T)+NoLegend()
p2 <- DimPlot(ob2, reduction = "umap",label = T)+NoLegend()
p1+p2

#注释用single
library(celldex)
library(SingleR)
ls("package:celldex")

f = "single_ref/ref_BlueprintEncode.RData"
if(!file.exists(f)){
  ref <-  celldex::BlueprintEncodeData()
  save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = ob1
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
m = scRNA
p3 #b不考虑细胞周期的注释
scRNA = ob2
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p4 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p4 #考虑细胞周期的注释
p3+p4

统计考虑和不考虑细胞周期的变化,要是false太多就得采用去除后的结果更为可信一点

代码语言:R
复制
> table(as.character(Idents(m))==as.character(Idents(scRNA)))

FALSE  TRUE 
  151  2723 
左不考虑细胞周期,右考虑
左不考虑细胞周期,右考虑
左不考虑细胞周期,右考虑
左不考虑细胞周期,右考虑

多样本的细胞周期

代码语言:R
复制
#rm(list = ls())
library(tidyverse)
library(Seurat)
load("sce.little.Rdata") #day7读取并抽样的数据
sce.all[["percent.mt"]] <- PercentageFeatureSet(sce.all, pattern = "^MT-")
sce.all[["percent.rp"]] <- PercentageFeatureSet(sce.all, pattern = "^RP[SL]")
sce.all[["percent.hb"]] <- PercentageFeatureSet(sce.all, pattern = "^HB[^(P)]")

VlnPlot(sce.all, 
        features = c("nFeature_RNA",
                     "nCount_RNA", 
                     "percent.mt",
                     "percent.rp",
                     "percent.hb"),
        ncol = 3,pt.size = 0.5, group.by = "orig.ident")
        #根据小提琴图指定指标去掉离群值
sce.all = subset(sce.all,percent.mt < 20&
                   nCount_RNA < 40000 &
                   nFeature_RNA < 6000)
table(sce.all@meta.data$orig.ident)
seu.obj = sce.all
#单样本的marrow那步,再做一下
check_cc =  function(ob){
  s.genes <- intersect(cc.genes$s.genes,rownames(ob))
  g2m.genes <- intersect(cc.genes$g2m.genes,rownames(ob))
  ob = ob %>% 
  NormalizeData() %>%  
  FindVariableFeatures() %>%  
  CellCycleScoring(s.features = s.genes, 
                   g2m.features = g2m.genes) %>%
  ScaleData(features = rownames(.)) %>%  
  RunPCA(features = c(s.genes,g2m.genes))
  return(ob)
}
ch1 = check_cc(seu.obj)
head(ch1@meta.data)
table(ch1$Phase)
ch2 = check_cc(marrow)
table(ch2$Phase)
PCAPlot(ch1,group.by = "Phase")+ PCAPlot(ch2,group.by = "Phase")
library(patchwork)
PCAPlot(ch1,group.by = "Phase")+ 
  PCAPlot(ch2,group.by = "Phase")&
  xlim(-50,15)&
  ylim(-20,20)
  p1 = VlnPlot(ch1,"S.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"S.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.6,0.6)
p1 = VlnPlot(ch1,"G2M.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"G2M.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.5,1)
#不考虑细胞周期的降维数据分析
library(harmony)
f = "ob1m.Rdata"
if(!file.exists(f)){
  ob1 = seu.obj %>% 
  NormalizeData() %>%  
  FindVariableFeatures() %>%  
  ScaleData(features = rownames(.)) %>%  
  RunPCA(pc.genes = VariableFeatures(.))  %>%
  RunHarmony("orig.ident") %>%
  FindNeighbors(dims = 1:15, reduction = "harmony") %>% 
  FindClusters(resolution = 0.5) %>% 
  RunUMAP(dims = 1:15,reduction = "harmony") %>% 
  RunTSNE(dims = 1:15,reduction = "harmony")
  save(ob1,file = f)
}
load(f)
#考虑细胞周期的降维聚类分群
s.genes <- intersect(cc.genes$s.genes,rownames(seu.obj))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(seu.obj))
f = "ob2m.Rdata"
if(!file.exists(f)){
  ob2 = seu.obj %>% 
  NormalizeData() %>%  
  FindVariableFeatures() %>%  
  CellCycleScoring(s.features = s.genes, g2m.features = g2m.genes) %>%
  ScaleData(vars.to.regress = c("S.Score", "G2M.Score"),features = rownames(.)) %>%  #运行极其慢
  RunPCA(pc.genes = VariableFeatures(.))  %>%
  RunHarmony("orig.ident") %>%
  FindNeighbors(dims = 1:15, reduction = "harmony") %>% 
  FindClusters(resolution = 0.5) %>% 
  RunUMAP(dims = 1:15,reduction = "harmony") %>% 
  RunTSNE(dims = 1:15,reduction = "harmony")
  save(ob2,file = f)
}
load(f)
p1 <- DimPlot(ob1, reduction = "umap",label = T)+NoLegend() #不考虑
p2 <- DimPlot(ob2, reduction = "umap",label = T)+NoLegend() #考虑
p1+p2
#singleR注释
library(celldex)
library(SingleR)
ls("package:celldex")
f = "single_ref/ref_BlueprintEncode.RData"
if(!file.exists(f)){
  ref <-  celldex::BlueprintEncodeData()
  save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = ob1
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
m = scRNA
scRNA = ob2
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p4 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p3+p4
table(as.character(Idents(m))==as.character(Idents(scRNA)))

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 二次分群
    • 1 加载和分群
      • marker 基因及其可视化
        • 上面是单独显示也可以放回去
        • 2 细胞周期
          • 2.1 1 marrow
            • 2.1.2 计算细胞周期评分
              • 2.1.3 比较两个数据的细胞周期评分和PCA
                • 2.1.4 比较去除和不去处细胞周期影响的下游注释
                • 多样本的细胞周期
                相关产品与服务
                腾讯云 BI
                腾讯云 BI(Business Intelligence,BI)提供从数据源接入、数据建模到数据可视化分析全流程的BI能力,帮助经营者快速获取决策数据依据。系统采用敏捷自助式设计,使用者仅需通过简单拖拽即可完成原本复杂的报表开发过程,并支持报表的分享、推送等企业协作场景。
                领券
                问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档