单细胞二次分群和细胞周期

原创

用户10300557

发布于 2024-06-26 20:47:38

1210

发布于 2024-06-26 20:47:38

文章被收录于专栏：生信学习111

二次分群

1 加载和分群

用的数据是降维聚类分群 + 自动注释的数据

seu.obj = scRNA
head(seu.obj@meta.data)
library(Seurat)
library(dplyr)
p1 = DimPlot(seu.obj, reduction = "umap",label=T)+NoLegend()
p1
levels(Idents(scRNA)) #看一下都有啥类型
my_sub = "Endothelial_cells"
seu.obj = scRNA
sub.cells <- subset(seu.obj, idents = my_sub)
f = "obj0626Rdata" # 新建一个
if(!file.exists(f)){
  sub.cells = sub.cells %>%
    NormalizeData() %>%
    FindVariableFeatures() %>%
    ScaleData(features = rownames(.)) %>%
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>%
    FindClusters(resolution = 0.5) %>%
    RunUMAP(dims = 1:15) 
  save(sub.cells,file = f)
}
load(f)
DimPlot(sub.cells, reduction = 'umap',label = T)+NoLegend() #出图
sub.cells@meta.data$celltype = paste0("M",sub.cells$seurat_clusters) #把子对象的亚群注释结果添加到表格上面去。
save(sub.cells,file = "sub.cells.Rdata")

marker 基因及其可视化

找差异基因

sub.cells.markers <- FindAllMarkers(sub.cells, only.pos = TRUE,  
                                    min.pct = 0.25, logfc.threshold = 0.25)

top10 <- sub.cells.markers %>% 
  group_by(cluster) %>% 
  top_n(n = 10, wt = avg_log2FC) %>% 
  pull(gene);top10

VlnPlot(sub.cells, features = top10) #小提琴可视化，no.1


top5 <- sub.cells.markers %>% #减少一点看看
  group_by(cluster) %>% 
  top_n(n = 5, wt = avg_log2FC) %>% 
  pull(gene); top5

VlnPlot(sub.cells, features = top5)#小提琴可视化 no.2
RidgePlot(sub.cells, features = top5)#山脊图可视化 no.3
FeaturePlot(sub.cells, features = top5) #特征点图 no.4
DotPlot(sub.cells,features = top10)+ RotatedAxis() #no.5
DoHeatmap(sub.cells, features = top5) + NoLegend()#no.6

上面是单独显示也可以放回去

match函数时匹配，用于找出一个向量中的元素在另一个向量中的对应位置

seu.obj$celltype = as.character(Idents(seu.obj))
seu.obj$celltype = ifelse(seu.obj$celltype==my_sub,
                          sub.cells$celltype[match(colnames(seu.obj),colnames(sub.cells))],
                          seu.obj$celltype) #使用ifelse函数实现了分情况讨论：判断seu.obj的每个细胞是否是my_sub(本例是Fibroblasts),如果是的话，就替换成sub.cells里面匹配的每个细胞对应的celltype,不是就不管

Idents(seu.obj) = seu.obj$celltype
p2 = DimPlot(seu.obj,label = T)+NoLegend()
p1+p2

2 细胞周期

细胞的有丝分裂，分为分裂期（M）和分裂间期（G1，S，G2），细胞处于不同的细胞周期时，代谢活跃状态和染色体的状态大不相同，所以在不同周期表达量是有差异的

直接比较表达量是不可取的，不过得分情况，有些对下游的聚类和分群影响比较大，有些影响比较小。

2.1 1 marrow

> dir.create("day9")
> save(ct,/day9)
Error: unexpected '/' in "save(ct,/"
> save(ct, file = "day9/day9.RData")
> exp.mat <- read.delim("day9/nestorawa_forcellcycle_expressionMatrix.txt",row.names = 1)
> marrow <- CreateSeuratObject(counts = exp.mat,
+                              project = "b",
+                              min.cells = 3, 
+                              min.features = 200) #nestorawa_forcellcycle_expressionMatrix.txt，我觉得这个的意义在于比较
Warning: Data is of class data.frame. Coercing to dgCMatrix.
> marrow[["percent.mt"]] <- PercentageFeatureSet(marrow, pattern = "^MT-")
> head(marrow@meta.data, 3)
         orig.ident nCount_RNA nFeature_RNA percent.mt
Prog_013       Prog    2563086        10208   5.187809
Prog_019       Prog    3030619         9990   5.965877
Prog_031       Prog    1293487        10192   5.664456
> VlnPlot(marrow, 
+         features = c("nFeature_RNA",
+                      "nCount_RNA", 
+                      "percent.mt"), 
+         ncol = 3,pt.size = 0.5)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.

2.1.2 计算细胞周期评分

# 计算细胞周期评分
check_cc =  function(ob){
  s.genes <- intersect(cc.genes$s.genes,rownames(ob))
  g2m.genes <- intersect(cc.genes$g2m.genes,rownames(ob))
  ob = ob %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    CellCycleScoring(s.features = s.genes, #计算细胞周期评分的函数
                     g2m.features = g2m.genes) %>%
    ScaleData(features = rownames(.)) %>%  
    RunPCA(features = c(s.genes,g2m.genes))
  return(ob)
}
ch1 = check_cc(seu.obj)
head(ch1@meta.data)
                      orig.ident nCount_RNA nFeature_RNA percent.mt     S.Score
AAACCCACAGTCGGTC-1 SeuratProject       4243         1256   6.292717 -0.02545080
AAACGAAAGAATCGCG-1 SeuratProject       7307         2577   2.572875  0.03719043
AAAGAACAGCTTACGT-1 SeuratProject       8154         1881   4.083885  0.04363704
AAAGAACAGGTTCATC-1 SeuratProject       8223         2182   4.377964 -0.05930417
AAAGAACAGTCTGTAC-1 SeuratProject       3884         1377   4.763131 -0.02251541
AAAGAACTCCACCTCA-1 SeuratProject       3997         1307   3.402552  0.01273964
                     G2M.Score Phase
AAACCCACAGTCGGTC-1  0.05694222   G2M
AAACGAAAGAATCGCG-1 -0.12520510     S
AAAGAACAGCTTACGT-1 -0.02283322     S
AAAGAACAGGTTCATC-1 -0.03018975    G1
AAAGAACAGTCTGTAC-1  0.02168140   G2M
AAAGAACTCCACCTCA-1 -0.13158839     S

2.1.3 比较两个数据的细胞周期评分和PCA

函数处理之后meta.data多了3列，分别是s和g2m的评分以及推断的细胞周期

table(ch1$Phase)
ch2 = check_cc(marrow)
table(ch2$Phase)
PCAPlot(ch1,group.by = "Phase")+ PCAPlot(ch2,group.by = "Phase") # no.2

library(patchwork)
#横纵坐标保持一致，更加直观一点
PCAPlot(ch1,group.by = "Phase")+ 
  PCAPlot(ch2,group.by = "Phase")&
  xlim(-60,15)&
  ylim(-10,15) #no.2
#比较S.Score和G2M.Score
p1 = VlnPlot(ch1,"S.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"S.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.6,0.6) #S.Score no.3
p1 = VlnPlot(ch1,"G2M.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"G2M.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.5,1) #G2M.Score，no.4

2.1.4 比较去除和不去处细胞周期影响的下游注释

f = "ob1.Rdata"
if(!file.exists(f)){
  ob1 = seu.obj %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    ScaleData(features = rownames(.)) %>%  
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>% 
    FindClusters(resolution = 0.5) %>% 
    RunUMAP(dims = 1:15) %>% 
    RunTSNE(dims = 1:15)
  save(ob1,file = f)
}
load(f) 
#根据s期和g2m期各自有代表性的基因来打分，在ScaleData函数中加上vars.to.regress参数来消除影响
s.genes <- intersect(cc.genes$s.genes,rownames(seu.obj))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(seu.obj)) #cc.genes是自带的数据
f = "ob2.Rdata"
if(!file.exists(f)){
  ob2 = seu.obj %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    CellCycleScoring(s.features = s.genes, g2m.features = g2m.genes) %>%
    ScaleData(vars.to.regress = c("S.Score", "G2M.Score"),features = rownames(.)) %>%  #运行极其慢
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>% 
    FindClusters(resolution = 0.5) %>% 
    RunUMAP(dims = 1:15) %>% 
    RunTSNE(dims = 1:15)
  save(ob2,file = f)
}
load(f) 
p1 <- DimPlot(ob1, reduction = "umap",label = T)+NoLegend()
p2 <- DimPlot(ob2, reduction = "umap",label = T)+NoLegend()
p1+p2

#注释用single
library(celldex)
library(SingleR)
ls("package:celldex")

f = "single_ref/ref_BlueprintEncode.RData"
if(!file.exists(f)){
  ref <-  celldex::BlueprintEncodeData()
  save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = ob1
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
m = scRNA
p3 #b不考虑细胞周期的注释
scRNA = ob2
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p4 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p4 #考虑细胞周期的注释
p3+p4

统计考虑和不考虑细胞周期的变化，要是false太多就得采用去除后的结果更为可信一点

> table(as.character(Idents(m))==as.character(Idents(scRNA)))

FALSE  TRUE 
  151  2723

多样本的细胞周期

#rm(list = ls())
library(tidyverse)
library(Seurat)
load("sce.little.Rdata") #day7读取并抽样的数据
sce.all[["percent.mt"]] <- PercentageFeatureSet(sce.all, pattern = "^MT-")
sce.all[["percent.rp"]] <- PercentageFeatureSet(sce.all, pattern = "^RP[SL]")
sce.all[["percent.hb"]] <- PercentageFeatureSet(sce.all, pattern = "^HB[^(P)]")

VlnPlot(sce.all, 
        features = c("nFeature_RNA",
                     "nCount_RNA", 
                     "percent.mt",
                     "percent.rp",
                     "percent.hb"),
        ncol = 3,pt.size = 0.5, group.by = "orig.ident")
        #根据小提琴图指定指标去掉离群值
sce.all = subset(sce.all,percent.mt < 20&
                   nCount_RNA < 40000 &
                   nFeature_RNA < 6000)
table(sce.all@meta.data$orig.ident)
seu.obj = sce.all
#单样本的marrow那步，再做一下
check_cc =  function(ob){
  s.genes <- intersect(cc.genes$s.genes,rownames(ob))
  g2m.genes <- intersect(cc.genes$g2m.genes,rownames(ob))
  ob = ob %>% 
  NormalizeData() %>%  
  FindVariableFeatures() %>%  
  CellCycleScoring(s.features = s.genes, 
                   g2m.features = g2m.genes) %>%
  ScaleData(features = rownames(.)) %>%  
  RunPCA(features = c(s.genes,g2m.genes))
  return(ob)
}
ch1 = check_cc(seu.obj)
head(ch1@meta.data)
table(ch1$Phase)
ch2 = check_cc(marrow)
table(ch2$Phase)
PCAPlot(ch1,group.by = "Phase")+ PCAPlot(ch2,group.by = "Phase")
library(patchwork)
PCAPlot(ch1,group.by = "Phase")+ 
  PCAPlot(ch2,group.by = "Phase")&
  xlim(-50,15)&
  ylim(-20,20)
  p1 = VlnPlot(ch1,"S.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"S.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.6,0.6)
p1 = VlnPlot(ch1,"G2M.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"G2M.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.5,1)
#不考虑细胞周期的降维数据分析
library(harmony)
f = "ob1m.Rdata"
if(!file.exists(f)){
  ob1 = seu.obj %>% 
  NormalizeData() %>%  
  FindVariableFeatures() %>%  
  ScaleData(features = rownames(.)) %>%  
  RunPCA(pc.genes = VariableFeatures(.))  %>%
  RunHarmony("orig.ident") %>%
  FindNeighbors(dims = 1:15, reduction = "harmony") %>% 
  FindClusters(resolution = 0.5) %>% 
  RunUMAP(dims = 1:15,reduction = "harmony") %>% 
  RunTSNE(dims = 1:15,reduction = "harmony")
  save(ob1,file = f)
}
load(f)
#考虑细胞周期的降维聚类分群
s.genes <- intersect(cc.genes$s.genes,rownames(seu.obj))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(seu.obj))
f = "ob2m.Rdata"
if(!file.exists(f)){
  ob2 = seu.obj %>% 
  NormalizeData() %>%  
  FindVariableFeatures() %>%  
  CellCycleScoring(s.features = s.genes, g2m.features = g2m.genes) %>%
  ScaleData(vars.to.regress = c("S.Score", "G2M.Score"),features = rownames(.)) %>%  #运行极其慢
  RunPCA(pc.genes = VariableFeatures(.))  %>%
  RunHarmony("orig.ident") %>%
  FindNeighbors(dims = 1:15, reduction = "harmony") %>% 
  FindClusters(resolution = 0.5) %>% 
  RunUMAP(dims = 1:15,reduction = "harmony") %>% 
  RunTSNE(dims = 1:15,reduction = "harmony")
  save(ob2,file = f)
}
load(f)
p1 <- DimPlot(ob1, reduction = "umap",label = T)+NoLegend() #不考虑
p2 <- DimPlot(ob2, reduction = "umap",label = T)+NoLegend() #考虑
p1+p2
#singleR注释
library(celldex)
library(SingleR)
ls("package:celldex")
f = "single_ref/ref_BlueprintEncode.RData"
if(!file.exists(f)){
  ref <-  celldex::BlueprintEncodeData()
  save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = ob1
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
m = scRNA
scRNA = ob2
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p4 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p3+p4
table(as.character(Idents(m))==as.character(Idents(scRNA)))

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

学习笔记

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

学习笔记

登录后参与评论

0 条评论

热度

单细胞二次分群和细胞周期

单细胞二次分群和细胞周期

二次分群

1 加载和分群

marker 基因及其可视化

上面是单独显示也可以放回去

2 细胞周期

2.1 1 marrow

2.1.2 计算细胞周期评分

2.1.3 比较两个数据的细胞周期评分和PCA

2.1.4 比较去除和不去处细胞周期影响的下游注释

多样本的细胞周期

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐