今天跟大家分享的是新鲜出炉的一篇骨髓单细胞测序文章【Mapping the cellular biogeography of human bone marrow niches using single-cell transcriptomics and proteomic imaging】,作者这次将目光聚焦于骨髓的间充质干细胞而非免疫细胞,此外还提供了分群策略,值得反复学习~
之前在群里看到有小伙伴提到了这篇文章,其独特之处还在于样本的整合没有用到harmony。出于好奇,又恰好作者提供了非常详尽的代码和数据,因此决定将这篇文章好好消化一下,包括思路和分析过程,强推!
Single-cell RNA-seq data can be downloaded from NCBI Gene Expression Omnibus (GEO) database with the accession number GSE253355
作者不仅提供了原始的features|barcodes|matrix文件,还直接提供了seurat文件,也方便我们对比。
GitHub at https://github.com/ tanlabcode/spatial-bonemarrow-atlas
管中窥豹,看:
文章的每一处figure的出处都可以找到对应的代码,还有比这更生动的”学习实践“吗?而这仅仅是其中一个小小的部分,如果肯花时间把整个流程多捋几遍,收获肯定很大~
当然,要说明的是,一开始合并样本的部分,代码实在是有一丢丢繁琐,应该是可以更精简一点的:
# Load the cellranger output files
MACS <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/SB51_H21andH23_scRNA/H14/With_Introns/MACS/filtered_feature_bc_matrix/")
H21 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/SB51_H21andH23_scRNA/H21/filtered_feature_bc_matrix/")
H23 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/SB51_H21andH23_scRNA/H23/filtered_feature_bc_matrix/")
H24 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/SB62_scRNASeq_S2/H24/filtered_feature_bc_matrix/")
H32 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/SB62_scRNASeq_S2/H32/filtered_feature_bc_matrix/")
H33 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/SB62_scRNASeq_S2/H33/filtered_feature_bc_matrix/")
H34 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/SB62_scRNASeq_S2/H34/filtered_feature_bc_matrix/")
H35 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/SB62_scRNASeq_S2/H35/filtered_feature_bc_matrix/")
H36 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/NBM_Atlas_scRNA/H36/filtered_feature_bc_matrix/")
H38 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/NBM_Atlas_scRNA/H38/filtered_feature_bc_matrix/")
H39 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/NBM_Atlas_scRNA/H39/filtered_feature_bc_matrix/")
H41 <- Read10X(data.dir = "~/Documents/NBM_Microenvironment/NBM_Atlas_scRNA/H41/filtered_feature_bc_matrix/")
# Initialize the Seurat object with the raw (non-normalized data).
MACS <- CreateSeuratObject(counts = MACS, project = "H14_MACS", min.cells = 3, min.features = 100)
MACS
H21 <- CreateSeuratObject(counts = H21, project = "H21", min.cells = 3, min.features = 100)
H21
H23 <- CreateSeuratObject(counts = H23, project = "H23", min.cells = 3, min.features = 100)
H23
H24 <- CreateSeuratObject(counts = H24, project = "H24", min.cells = 3, min.features = 100)
H24
H32 <- CreateSeuratObject(counts = H32, project = "H32", min.cells = 3, min.features = 100)
H32
H33 <- CreateSeuratObject(counts = H33, project = "H33", min.cells = 3, min.features = 100)
H33
H34 <- CreateSeuratObject(counts = H34, project = "H34", min.cells = 3, min.features = 100)
H34
H35 <- CreateSeuratObject(counts = H35, project = "H35", min.cells = 3, min.features = 100)
H35
H36 <- CreateSeuratObject(counts = H36, project = "H36", min.cells = 3, min.features = 100)
H36
H38 <- CreateSeuratObject(counts = H38, project = "H38", min.cells = 3, min.features = 100)
H38
H39 <- CreateSeuratObject(counts = H39, project = "H39", min.cells = 3, min.features = 100)
H39
H41 <- CreateSeuratObject(counts = H41, project = "H41", min.cells = 3, min.features = 100)
H41
然后,就是反复×3:
ob.list <- list(MACS, H21, H23,H24,H32,H33,H34,H35, H36, H38, H39, H41)
for (i in 1:length(ob.list)) {
ob.list[[i]][["percent.mt"]] <- PercentageFeatureSet(ob.list[[i]], pattern = "^MT-")
ob.list[[i]] <- subset(ob.list[[i]], subset = nFeature_RNA > 100 & nFeature_RNA < 10000 & percent.mt < 10)
# ob.list[[i]] <- subset(ob.list[[i]], cells = cells.use)
ob.list[[i]] <- NormalizeData(ob.list[[i]])
ob.list[[i]] <- FindVariableFeatures(ob.list[[i]])
ob.list[[i]] <- ScaleData(ob.list[[i]])
ob.list[[i]] <- RunPCA(ob.list[[i]], features = VariableFeatures(object = ob.list[[i]]))
ob.list[[i]] <- RunUMAP(ob.list[[i]], dims = 1:30, reduction.name = "UMAP_dim30", reduction.key = "UMAP_dim30_")
ob.list[[i]] <- RunUMAP(ob.list[[i]], dims = 1:50, reduction.name = "UMAP_dim50", reduction.key = "UMAP_dim50_")
ob.list[[i]] <- FindNeighbors(ob.list[[i]], dims = 1:30)
ob.list[[i]] <- FindClusters(ob.list[[i]], resolution = 1)
ob.list[[i]] <- FindDoublets(ob.list[[i]], PCs = 1:30, sct = FALSE, exp_rate = (length(colnames(ob.list[[i]]))/125000))
}
MACS <- ob.list[[1]]
H21 <- ob.list[[2]]
H23 <- ob.list[[3]]
H24 <- ob.list[[4]]
H32 <- ob.list[[5]]
H33 <- ob.list[[6]]
H34 <- ob.list[[7]]
H35 <- ob.list[[8]]
H36 <- ob.list[[9]]
H38 <- ob.list[[10]]
H39 <- ob.list[[11]]
H41 <- ob.list[[12]]
combined <- merge(x = MACS, y = c(H21, H23,H24, H32, H33,H34,H35,H36, H38, H39, H41), add.cell.ids = c("H14_MACS", "H21", "H23","H24", "H32", "H33", "H34", "H35", "H36", "H38", "H39","H41"), project = "SB62_NBM")
ob.list <- list(combined)
for (i in 1:length(ob.list)) {
ob.list[[i]][["percent.mt"]] <- PercentageFeatureSet(ob.list[[i]], pattern = "^MT-")
ob.list[[i]] <- subset(ob.list[[i]], subset = nFeature_RNA > 100 & nFeature_RNA < 10000 & percent.mt < 10)
ob.list[[i]] <- subset(ob.list[[i]], subset = Doublet_Singlet == "Singlet")
ob.list[[i]] <- NormalizeData(ob.list[[i]])
ob.list[[i]] <- FindVariableFeatures(ob.list[[i]])
ob.list[[i]] <- ScaleData(ob.list[[i]])
ob.list[[i]] <- RunPCA(ob.list[[i]], features = VariableFeatures(object = ob.list[[i]]))
ob.list[[i]] <- RunUMAP(ob.list[[i]], dims = 1:30, reduction.name = "UMAP_dim30", reduction.key = "UMAP_dim30_")
ob.list[[i]] <- RunUMAP(ob.list[[i]], dims = 1:50, reduction.name = "UMAP_dim50", reduction.key = "UMAP_dim50_")
ob.list[[i]] <- FindNeighbors(ob.list[[i]], dims = 1:30)
ob.list[[i]] <- FindClusters(ob.list[[i]], algorithm = 2, resolution = 1)
}
combined <- FindClusters(combined, algorithm = 2, resolution = 1.5)
combined <- ob.list[[1]]
combined <- subset(combined, subset = seurat_clusters != 46 & seurat_clusters != 41 & seurat_clusters != 45)
ob.list <- list(combined)
for (i in 1:length(ob.list)) {
ob.list[[i]][["percent.mt"]] <- PercentageFeatureSet(ob.list[[i]], pattern = "^MT-")
ob.list[[i]] <- subset(ob.list[[i]], nFeature_RNA > 300 & nCount_RNA > 1000 & nFeature_RNA < 10000 & percent.mt < 10)
ob.list[[i]] <- subset(ob.list[[i]], subset = Doublet_Singlet == "Singlet")
ob.list[[i]] <- NormalizeData(ob.list[[i]])
ob.list[[i]] <- FindVariableFeatures(ob.list[[i]])
ob.list[[i]] <- ScaleData(ob.list[[i]])
ob.list[[i]] <- RunPCA(ob.list[[i]], features = VariableFeatures(object = ob.list[[i]]))
ob.list[[i]] <- RunUMAP(ob.list[[i]], dims = 1:30, reduction.name = "UMAP_dim30", reduction.key = "UMAP_dim30_")
ob.list[[i]] <- RunUMAP(ob.list[[i]], dims = 1:50, reduction.name = "UMAP_dim50", reduction.key = "UMAP_dim50_")
ob.list[[i]] <- FindNeighbors(ob.list[[i]], dims = 1:30)
ob.list[[i]] <- FindClusters(ob.list[[i]], algorithm = 2, resolution = 1.5)
}
combined <- ob.list[[1]]
如果这一套在本地跑的话,一定要对自己的电脑有很大的信心才行😀
如果不想从开始跑的话,就直接下载rds文件赋值为combined,然后从
# Find and save all markers for combined UMAP
combined_markers <- FindAllMarkers(combined, max.cells.per.ident = 1000) # Downsample 1000 cells per cluster
write.csv(combined_markers, "SB66_combined_markers.csv")
这一步往下走😄
每出一个图可以直接和原文对比,比较异同。
此外,作者还在补充材料中提供了细胞亚群的marker,这可以成为后续我们对骨髓组织分群的依据
好记性不如烂笔头,我是手写方便随时翻阅啦~
除了作者自己的数据以外,作者还将自己的数据与另一个数据库中骨髓的数据进行了对比,这无疑又是另一个学习的资源
Azimuth (hubmapconsortium.org)