用的数据是降维聚类分群 + 自动注释的数据
seu.obj = scRNA
head(seu.obj@meta.data)
library(Seurat)
library(dplyr)
p1 = DimPlot(seu.obj, reduction = "umap",label=T)+NoLegend()
p1
levels(Idents(scRNA)) #看一下都有啥类型
my_sub = "Endothelial_cells"
seu.obj = scRNA
sub.cells <- subset(seu.obj, idents = my_sub)
f = "obj0626Rdata" # 新建一个
if(!file.exists(f)){
sub.cells = sub.cells %>%
NormalizeData() %>%
FindVariableFeatures() %>%
ScaleData(features = rownames(.)) %>%
RunPCA(features = VariableFeatures(.)) %>%
FindNeighbors(dims = 1:15) %>%
FindClusters(resolution = 0.5) %>%
RunUMAP(dims = 1:15)
save(sub.cells,file = f)
}
load(f)
DimPlot(sub.cells, reduction = 'umap',label = T)+NoLegend() #出图
sub.cells@meta.data$celltype = paste0("M",sub.cells$seurat_clusters) #把子对象的亚群注释结果添加到表格上面去。
save(sub.cells,file = "sub.cells.Rdata")
找差异基因
sub.cells.markers <- FindAllMarkers(sub.cells, only.pos = TRUE,
min.pct = 0.25, logfc.threshold = 0.25)
top10 <- sub.cells.markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC) %>%
pull(gene);top10
VlnPlot(sub.cells, features = top10) #小提琴可视化,no.1
top5 <- sub.cells.markers %>% #减少一点看看
group_by(cluster) %>%
top_n(n = 5, wt = avg_log2FC) %>%
pull(gene); top5
VlnPlot(sub.cells, features = top5)#小提琴可视化 no.2
RidgePlot(sub.cells, features = top5)#山脊图可视化 no.3
FeaturePlot(sub.cells, features = top5) #特征点图 no.4
DotPlot(sub.cells,features = top10)+ RotatedAxis() #no.5
DoHeatmap(sub.cells, features = top5) + NoLegend()#no.6
match函数时匹配,用于找出一个向量中的元素在另一个向量中的对应位置
seu.obj$celltype = as.character(Idents(seu.obj))
seu.obj$celltype = ifelse(seu.obj$celltype==my_sub,
sub.cells$celltype[match(colnames(seu.obj),colnames(sub.cells))],
seu.obj$celltype) #使用ifelse函数实现了分情况讨论:判断seu.obj的每个细胞是否是my_sub(本例是Fibroblasts),如果是的话,就替换成sub.cells里面匹配的每个细胞对应的celltype,不是就不管
Idents(seu.obj) = seu.obj$celltype
p2 = DimPlot(seu.obj,label = T)+NoLegend()
p1+p2
细胞的有丝分裂,分为分裂期(M)和分裂间期(G1,S,G2),细胞处于不同的细胞周期时,代谢活跃状态和染色体的状态大不相同,所以在不同周期表达量是有差异的
直接比较表达量是不可取的,不过得分情况,有些对下游的聚类和分群影响比较大,有些影响比较小。
> dir.create("day9")
> save(ct,/day9)
Error: unexpected '/' in "save(ct,/"
> save(ct, file = "day9/day9.RData")
> exp.mat <- read.delim("day9/nestorawa_forcellcycle_expressionMatrix.txt",row.names = 1)
> marrow <- CreateSeuratObject(counts = exp.mat,
+ project = "b",
+ min.cells = 3,
+ min.features = 200) #nestorawa_forcellcycle_expressionMatrix.txt,我觉得这个的意义在于比较
Warning: Data is of class data.frame. Coercing to dgCMatrix.
> marrow[["percent.mt"]] <- PercentageFeatureSet(marrow, pattern = "^MT-")
> head(marrow@meta.data, 3)
orig.ident nCount_RNA nFeature_RNA percent.mt
Prog_013 Prog 2563086 10208 5.187809
Prog_019 Prog 3030619 9990 5.965877
Prog_031 Prog 1293487 10192 5.664456
> VlnPlot(marrow,
+ features = c("nFeature_RNA",
+ "nCount_RNA",
+ "percent.mt"),
+ ncol = 3,pt.size = 0.5)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.
# 计算细胞周期评分
check_cc = function(ob){
s.genes <- intersect(cc.genes$s.genes,rownames(ob))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(ob))
ob = ob %>%
NormalizeData() %>%
FindVariableFeatures() %>%
CellCycleScoring(s.features = s.genes, #计算细胞周期评分的函数
g2m.features = g2m.genes) %>%
ScaleData(features = rownames(.)) %>%
RunPCA(features = c(s.genes,g2m.genes))
return(ob)
}
ch1 = check_cc(seu.obj)
head(ch1@meta.data)
orig.ident nCount_RNA nFeature_RNA percent.mt S.Score
AAACCCACAGTCGGTC-1 SeuratProject 4243 1256 6.292717 -0.02545080
AAACGAAAGAATCGCG-1 SeuratProject 7307 2577 2.572875 0.03719043
AAAGAACAGCTTACGT-1 SeuratProject 8154 1881 4.083885 0.04363704
AAAGAACAGGTTCATC-1 SeuratProject 8223 2182 4.377964 -0.05930417
AAAGAACAGTCTGTAC-1 SeuratProject 3884 1377 4.763131 -0.02251541
AAAGAACTCCACCTCA-1 SeuratProject 3997 1307 3.402552 0.01273964
G2M.Score Phase
AAACCCACAGTCGGTC-1 0.05694222 G2M
AAACGAAAGAATCGCG-1 -0.12520510 S
AAAGAACAGCTTACGT-1 -0.02283322 S
AAAGAACAGGTTCATC-1 -0.03018975 G1
AAAGAACAGTCTGTAC-1 0.02168140 G2M
AAAGAACTCCACCTCA-1 -0.13158839 S
函数处理之后meta.data多了3列,分别是s和g2m的评分以及推断的细胞周期
table(ch1$Phase)
ch2 = check_cc(marrow)
table(ch2$Phase)
PCAPlot(ch1,group.by = "Phase")+ PCAPlot(ch2,group.by = "Phase") # no.2
library(patchwork)
#横纵坐标保持一致,更加直观一点
PCAPlot(ch1,group.by = "Phase")+
PCAPlot(ch2,group.by = "Phase")&
xlim(-60,15)&
ylim(-10,15) #no.2
#比较S.Score和G2M.Score
p1 = VlnPlot(ch1,"S.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"S.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.6,0.6) #S.Score no.3
p1 = VlnPlot(ch1,"G2M.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"G2M.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.5,1) #G2M.Score,no.4
f = "ob1.Rdata"
if(!file.exists(f)){
ob1 = seu.obj %>%
NormalizeData() %>%
FindVariableFeatures() %>%
ScaleData(features = rownames(.)) %>%
RunPCA(features = VariableFeatures(.)) %>%
FindNeighbors(dims = 1:15) %>%
FindClusters(resolution = 0.5) %>%
RunUMAP(dims = 1:15) %>%
RunTSNE(dims = 1:15)
save(ob1,file = f)
}
load(f)
#根据s期和g2m期各自有代表性的基因来打分,在ScaleData函数中加上vars.to.regress参数来消除影响
s.genes <- intersect(cc.genes$s.genes,rownames(seu.obj))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(seu.obj)) #cc.genes是自带的数据
f = "ob2.Rdata"
if(!file.exists(f)){
ob2 = seu.obj %>%
NormalizeData() %>%
FindVariableFeatures() %>%
CellCycleScoring(s.features = s.genes, g2m.features = g2m.genes) %>%
ScaleData(vars.to.regress = c("S.Score", "G2M.Score"),features = rownames(.)) %>% #运行极其慢
RunPCA(features = VariableFeatures(.)) %>%
FindNeighbors(dims = 1:15) %>%
FindClusters(resolution = 0.5) %>%
RunUMAP(dims = 1:15) %>%
RunTSNE(dims = 1:15)
save(ob2,file = f)
}
load(f)
p1 <- DimPlot(ob1, reduction = "umap",label = T)+NoLegend()
p2 <- DimPlot(ob2, reduction = "umap",label = T)+NoLegend()
p1+p2
#注释用single
library(celldex)
library(SingleR)
ls("package:celldex")
f = "single_ref/ref_BlueprintEncode.RData"
if(!file.exists(f)){
ref <- celldex::BlueprintEncodeData()
save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = ob1
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test,
ref = ref,
labels = ref$label.main,
clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
m = scRNA
p3 #b不考虑细胞周期的注释
scRNA = ob2
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test,
ref = ref,
labels = ref$label.main,
clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p4 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p4 #考虑细胞周期的注释
p3+p4
统计考虑和不考虑细胞周期的变化,要是false太多就得采用去除后的结果更为可信一点
> table(as.character(Idents(m))==as.character(Idents(scRNA)))
FALSE TRUE
151 2723
#rm(list = ls())
library(tidyverse)
library(Seurat)
load("sce.little.Rdata") #day7读取并抽样的数据
sce.all[["percent.mt"]] <- PercentageFeatureSet(sce.all, pattern = "^MT-")
sce.all[["percent.rp"]] <- PercentageFeatureSet(sce.all, pattern = "^RP[SL]")
sce.all[["percent.hb"]] <- PercentageFeatureSet(sce.all, pattern = "^HB[^(P)]")
VlnPlot(sce.all,
features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.rp",
"percent.hb"),
ncol = 3,pt.size = 0.5, group.by = "orig.ident")
#根据小提琴图指定指标去掉离群值
sce.all = subset(sce.all,percent.mt < 20&
nCount_RNA < 40000 &
nFeature_RNA < 6000)
table(sce.all@meta.data$orig.ident)
seu.obj = sce.all
#单样本的marrow那步,再做一下
check_cc = function(ob){
s.genes <- intersect(cc.genes$s.genes,rownames(ob))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(ob))
ob = ob %>%
NormalizeData() %>%
FindVariableFeatures() %>%
CellCycleScoring(s.features = s.genes,
g2m.features = g2m.genes) %>%
ScaleData(features = rownames(.)) %>%
RunPCA(features = c(s.genes,g2m.genes))
return(ob)
}
ch1 = check_cc(seu.obj)
head(ch1@meta.data)
table(ch1$Phase)
ch2 = check_cc(marrow)
table(ch2$Phase)
PCAPlot(ch1,group.by = "Phase")+ PCAPlot(ch2,group.by = "Phase")
library(patchwork)
PCAPlot(ch1,group.by = "Phase")+
PCAPlot(ch2,group.by = "Phase")&
xlim(-50,15)&
ylim(-20,20)
p1 = VlnPlot(ch1,"S.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"S.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.6,0.6)
p1 = VlnPlot(ch1,"G2M.Score",group.by = "Phase")
p2 = VlnPlot(ch2,"G2M.Score",group.by = "Phase")
wrap_plots(p1,p2,nrow = 1) & ylim(-0.5,1)
#不考虑细胞周期的降维数据分析
library(harmony)
f = "ob1m.Rdata"
if(!file.exists(f)){
ob1 = seu.obj %>%
NormalizeData() %>%
FindVariableFeatures() %>%
ScaleData(features = rownames(.)) %>%
RunPCA(pc.genes = VariableFeatures(.)) %>%
RunHarmony("orig.ident") %>%
FindNeighbors(dims = 1:15, reduction = "harmony") %>%
FindClusters(resolution = 0.5) %>%
RunUMAP(dims = 1:15,reduction = "harmony") %>%
RunTSNE(dims = 1:15,reduction = "harmony")
save(ob1,file = f)
}
load(f)
#考虑细胞周期的降维聚类分群
s.genes <- intersect(cc.genes$s.genes,rownames(seu.obj))
g2m.genes <- intersect(cc.genes$g2m.genes,rownames(seu.obj))
f = "ob2m.Rdata"
if(!file.exists(f)){
ob2 = seu.obj %>%
NormalizeData() %>%
FindVariableFeatures() %>%
CellCycleScoring(s.features = s.genes, g2m.features = g2m.genes) %>%
ScaleData(vars.to.regress = c("S.Score", "G2M.Score"),features = rownames(.)) %>% #运行极其慢
RunPCA(pc.genes = VariableFeatures(.)) %>%
RunHarmony("orig.ident") %>%
FindNeighbors(dims = 1:15, reduction = "harmony") %>%
FindClusters(resolution = 0.5) %>%
RunUMAP(dims = 1:15,reduction = "harmony") %>%
RunTSNE(dims = 1:15,reduction = "harmony")
save(ob2,file = f)
}
load(f)
p1 <- DimPlot(ob1, reduction = "umap",label = T)+NoLegend() #不考虑
p2 <- DimPlot(ob2, reduction = "umap",label = T)+NoLegend() #考虑
p1+p2
#singleR注释
library(celldex)
library(SingleR)
ls("package:celldex")
f = "single_ref/ref_BlueprintEncode.RData"
if(!file.exists(f)){
ref <- celldex::BlueprintEncodeData()
save(ref,file = f)
}
ref <- get(load(f))
library(BiocParallel)
scRNA = ob1
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test,
ref = ref,
labels = ref$label.main,
clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p3 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
m = scRNA
scRNA = ob2
test = scRNA@assays$RNA$data
pred.scRNA <- SingleR(test = test,
ref = ref,
labels = ref$label.main,
clusters = scRNA@active.ident)
new.cluster.ids <- pred.scRNA$pruned.labels
names(new.cluster.ids) <- levels(scRNA)
scRNA <- RenameIdents(scRNA,new.cluster.ids)
p4 <- DimPlot(scRNA, reduction = "umap",label = T,pt.size = 0.5) + NoLegend()
p3+p4
table(as.character(Idents(m))==as.character(Idents(scRNA)))
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。