前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >day9 二次分群和细胞周期

day9 二次分群和细胞周期

原创
作者头像
昆兰
发布2024-07-01 15:33:23
570
发布2024-07-01 15:33:23
举报

二次分群

输入数据并指定感兴趣的细胞

这里我选择CD8T细胞

代码语言:R
复制
rm(list = ls())
library(Seurat)
library(dplyr)

load("../day5-6/sce.Rdata")
seu.obj = sce
p1 = DimPlot(seu.obj, reduction = "umap",label=T)+NoLegend();p1

my_sub = "CD8 T"
sub.cells <- subset(seu.obj, idents = my_sub)

寻常差异基因

sub.cells.markers

代码语言:R
复制
f = "obj.Rdata"
if(!file.exists(f)){
  sub.cells = sub.cells %>%
    NormalizeData() %>%
    FindVariableFeatures() %>%
    ScaleData(features = rownames(.)) %>%
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>%
    FindClusters(resolution = 0.5) %>%
    RunUMAP(dims = 1:15) 
  save(sub.cells,file = f)
}

load(f)
DimPlot(sub.cells, reduction = 'umap',label = T)+NoLegend()
sub.cells@meta.data$celltype = paste0("M",sub.cells$seurat_clusters) #把子对象的亚群注释结果添加到表格上面去。
save(sub.cells,file = "sub.cells.Rdata")

sub.cells.markers <- FindAllMarkers(sub.cells, only.pos = TRUE,  
                                    min.pct = 0.25, logfc.threshold = 0.25)
#每个亚群选前3
top10 <- sub.cells.markers %>% 
  group_by(cluster) %>% 
  top_n(n = 3, wt = avg_log2FC) %>% 
  pull(gene);top10

可视化

代码语言:R
复制
VlnPlot(sub.cells, features = top10)
RidgePlot(sub.cells, features = top10)
FeaturePlot(sub.cells, features = top10)
DotPlot(sub.cells,features = top10)+ RotatedAxis()
DoHeatmap(sub.cells, features = top10) + NoLegend()

二次分群的注释(可以看到下图分了3群)

代码语言:R
复制
seu.obj$celltype = as.character(Idents(seu.obj))
seu.obj$celltype = ifelse(seu.obj$celltype==my_sub,
                          sub.cells$celltype[match(colnames(seu.obj),colnames(sub.cells))],
                          seu.obj$celltype)
Idents(seu.obj) = seu.obj$celltype
p2 = DimPlot(seu.obj,label = T)+NoLegend()
p1+p2

细胞周期

输入数据(两份)

  • 一份是我们前面的数据seu.obj,分为单样本和多样本的两种情况
代码语言:R
复制
#如果是单样本
load("../day5-6/sce.Rdata")
#如果是多样本
load("../day7/sce.all.Rdata")
  • 另一份是受细胞周期影响大的marroww数据,表达矩阵下载后读取并创建对象
代码语言:R
复制
exp.mat <- read.delim("nestorawa_forcellcycle_expressionMatrix.txt",row.names = 1)
marrow <- CreateSeuratObject(counts = exp.mat,
                             project = "b",
                             min.cells = 3, 
                             min.features = 200)
marrow[["percent.mt"]] <- PercentageFeatureSet(marrow, pattern = "^MT-") 
#提琴图可以看出是一个过滤后的数据
VlnPlot(marrow, 
        features = c("nFeature_RNA","nCount_RNA", "percent.mt", 
        ncol = 3,pt.size = 0.5) 

细胞周期周期评分

自定义一个评分函数;主要用到CellCycleScoring

代码语言:R
复制
check_cc =  function(ob){
  s.genes <- intersect(cc.genes$s.genes,rownames(ob))
  g2m.genes <- intersect(cc.genes$g2m.genes,rownames(ob))
  ob = ob %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    CellCycleScoring(s.features = s.genes, 
                     g2m.features = g2m.genes) %>%
    ScaleData(features = rownames(.)) %>%  
    RunPCA(features = c(s.genes,g2m.genes))
  return(ob)
}
#对两份数据进行评分
ch1 = check_cc(seu.obj);table(ch1$Phase)
ch2 = check_cc(marrow);table(ch2$Phase)

可视化

  • 调整坐标轴比较两个数据的评分和PCA
代码语言:R
复制
library(patchwork)
PCAPlot(ch1,group.by = "Phase")+ 
  PCAPlot(ch2,group.by = "Phase")&
  xlim(-60,15)&
  ylim(-10,15)
  • 再比较一下S.Score和G2M.Score
代码语言:R
复制
p1 = VlnPlot(ch1,"S.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"S.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.6,0.6)
p1 = VlnPlot(ch1,"G2M.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"G2M.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.5,1)

去除细胞周期的影响

如果是大多数点都集中在0点附近的,就可以不用去除细胞周期的影响!,分布范围较广或者是有较多的离群值那就需要要去除。

ob1对象1

代码语言:R
复制
### 不考虑细胞周期
f = "ob1.Rdata"
if(!file.exists(f)){
  ob1 = seu.obj %>% 
    NormalizeData() %>%  
    FindVariableFeatures() %>%  
    ScaleData(features = rownames(.)) %>%  
    RunPCA(features = VariableFeatures(.))  %>%
    FindNeighbors(dims = 1:15) %>% 
    FindClusters(resolution = 0.5) %>% 
    RunUMAP(dims = 1:15) %>% 
    RunTSNE(dims = 1:15)
  save(ob1,file = f)
}
load(f)

ob2对象2

代码语言:R
复制
### 考虑细胞周期
#cc.genes是Seurat包里自带的数据,无需任何赋值或加载的操作
s.genes <- intersect(cc.genes$s.genes,rownames(seu.obj))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(seu.obj))

f = "ob2.Rdata"
if(!file.exists(f)){
  ob2 = seu.obj %>% 
  NormalizeData() %>%  
  FindVariableFeatures() %>%  
  CellCycleScoring(s.features = s.genes, g2m.features = g2m.genes) %>%
  ScaleData(vars.to.regress = c("S.Score", "G2M.Score"),features = rownames(.)) %>%  #运行极其慢
  RunPCA(features = VariableFeatures(.))  %>%
  FindNeighbors(dims = 1:15) %>% 
  FindClusters(resolution = 0.5) %>% 
  RunUMAP(dims = 1:15) %>% 
  RunTSNE(dims = 1:15)
  save(ob2,file = f)
}

可视化比较

代码语言:R
复制
p1 <- DimPlot(ob1, reduction = "umap",label = T)+NoLegend()
p2 <- DimPlot(ob2, reduction = "umap",label = T)+NoLegend()
p1+p2

#加注释版本
library(celldex)
library(SingleR)

f = "../day5-6/ref_BlueprintEncode.RData"
if(!file.exists(f)){
  ref <- celldex::HumanPrimaryCellAtlasData()
  save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = ob1  #1
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
m = scRNA

scRNA = ob2 #2
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test, 
                      ref = ref,
                      labels = ref$label.main, 
                      clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p4 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p3+p4

table(as.character(Idents(m))==as.character(Idents(scRNA)))
#查看区别

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
作者已关闭评论
0 条评论
热度
最新
推荐阅读
目录
  • 二次分群
    • 输入数据并指定感兴趣的细胞
      • 寻常差异基因
        • 可视化
        • 细胞周期
          • 输入数据(两份)
            • 细胞周期周期评分
              • 可视化
                • 去除细胞周期的影响
                领券
                问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档