小绿书:生信医道
数据号:GSE163558
用此篇文献提供数据处理,单细胞下游处理代码
https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE163558 ,GEO数据库搜索GEO数据号,下载并整理成Seurat所需的格式。
rm(list=ls())
setwd("./11.29-scRNAseq-GSE163558")
library(data.table)
library(stringr)
library(data.table)
library(Seurat)
library(dplyr)
library(Seurat)
#library(infercnv)
library(dplyr)
library(ggplot2)
library(ggplotify)
library(NMF)
library(ggalluvial)
library(CellChat)
library(Seurat)
options("repos" = c(CRAN = "https://mirrors.westlake.edu.cn/CRAN/"))
getwd()
dir <- "./11.29-scRNAseq-GSE163558/GSE163558"
# 获取样本列表
samples <- c("PT1", "PT2", "PT3", "NT1", "LN1", "LN2", "O1", "P1", "Li1", "Li2")
# 设置全局选项以创建V5版本的assays
options(Seurat.object.assay.version = "v5")
# 使用lapply函数遍历样本列表
sceList <- lapply(samples, function(pro) {
print(pro) # 打印当前处理的样本名称
# 读取10X Genomics数据
tmp <- Read10X(file.path(dir, pro))
# 根据Read10X的返回值创建Seurat对象
# 如果返回两个对象(细胞和特征),则使用第一个
if (length(tmp) == 2) {
ct <- tmp[[1]]
} else {
ct <- tmp
}
# 使用CreateSeuratObject函数创建Seurat对象
sce <- CreateSeuratObject(counts = ct,
project = gsub('GSM[0-9]*_rep[0-9]_','', pro),
min.cells = 0,
min.features = 0)
return(sce)
})
# 打印Seurat对象列表
print(sceList)
# 方法1:显式指定 reference
do.call(rbind,lapply(sceList,dim))
scRNAlist=merge(x=sceList[[1]],y=sceList[-1],add.cell.ids=samples)
names(scRNAlist@assays$RNA@layers)
dim(table(scRNAlist@meta.data$orig.ident))
# 计算线粒体比例
scRNAlist[["percent.mt"]] <- PercentageFeatureSet(scRNAlist, pattern = "^MT-")
# QC筛选
# 过滤细胞
scRNAlist <- subset(scRNAlist,
subset = nFeature_RNA > 200 & # 至少200个基因
nFeature_RNA < 5000 & # 原文是5000,你的是7000
percent.mt < 20) # 原文是20%线粒体基因
# 标准化
scRNAlist <- NormalizeData(scRNAlist, normalization.method = "LogNormalize")
# 找到2000个高可变基因
scRNAlist <- FindVariableFeatures(scRNAlist, nfeatures = 2000)
# 细胞周期基因
# 1. 加载细胞周期基因
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
# 先进行标准化,保证有data层
for(i in 1:length(sceList)){
sceList[[i]] <- NormalizeData(sceList[[i]])
sceList[[i]] <- CellCycleScoring(sceList[[i]],
s.features = s.genes,
g2m.features = g2m.genes,
set.ident = TRUE)
}
# 回归UMI数量、线粒体污染和细胞周期的影响
scRNAlist <- ScaleData(scRNAlist,
vars.to.regress = c("nCount_RNA",
"percent.mt",
"S.Score",
"G2M.Score"))
# 先运行PCA
scRNAlist <- RunPCA(scRNAlist, verbose = FALSE)
# 然后进行整合
scRNAlist <- IntegrateLayers(object = scRNAlist,
method = HarmonyIntegration,
orig.reduction = "pca",
new.reduction = "integrated.Harmony",
verbose = FALSE)
# 重新连接层
scRNAlist[["RNA"]] <- JoinLayers(scRNAlist[["RNA"]])
# 选择合适的PC数量
Seurat::ElbowPlot(scRNAlist, ndims = 50)
# 聚类分析
scRNAlist <- FindNeighbors(scRNAlist,
reduction = "integrated.Harmony",
dims = 1:30)
scRNAlist <- FindClusters(scRNAlist, resolution = 0.4)
# 降维可视化
scRNAlist <- RunTSNE(scRNAlist,
dims = 1:30,
reduction = "integrated.Harmony")
DimPlot(scRNAlist, reduction = "tsne", label = TRUE,group.by = "seurat_clusters")
DimPlot(scRNAlist, reduction = "tsne", group.by = "orig.ident")
# 查看细胞周期的影响
save(scRNAlist,file="11.29-Rdata.Rdata")
load(file="11.29-Rdata.Rdata")
cell_types_markers <- list(
Myeloid_cell = c("CSF1R", "CSF3R", "CD68"),
Epithelial_cell = c("MUC1", "KRT18", "KRT19", "CLDN4", "EPCAM"),
T_cell = c("CD3E", "CD2","CD3G","CD3D"),
Stromal_cell = c("COL1A2", "DCN", "PECAM1", "VCAM1","VWF"),
Proliferative_cell = c("MK67", "PCNA", "STMN1"),
B_cell = c("CD79A", "IGHG1", "MS4A1"),
NK_cell = c("GNLY", "KLRF1","KLRD1")
)
dot_plot <- DotPlot(scRNAlist, features = cell_types_markers) +
scale_color_gradientn(colors = c("blue", "white", "red"))
# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
dot_plot
table(scRNAlist$seurat_clusters)
# 添加celltype列
scRNAlist$celltype <- "Unassigned" # 先将所有细胞标记为未分配
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(3,7))] <- "Myeloid_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(6))] <- "Epithelial_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(0,1,5))] <- "T_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(11,13,15))] <- "Stromal_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(10))] <- "Proliferative_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(2,9,12,14))] <- "B_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(8))] <- "NK_cell"
# 将celltype设置为活跃标识
Idents(scRNAlist) <- "celltype"
# 可视化注释结果
DimPlot(scRNAlist, reduction = "tsne", label = TRUE, pt.size = 0.5)
# 删除未分配的细胞
scRNAlist <- subset(scRNAlist, celltype != "Unassigned")
# 检查结果
table(scRNAlist$celltype)
table(scRNAlist$seurat_clusters,scRNAlist$celltype)
Idents(scRNAlist) <- 'celltype'
table(scRNAlist$celltype)
DimPlot(scRNAlist, reduction = "tsne", label = T)
pdf("24-11-29-celltype.pdf",width = 8,height = 6)
DimPlot(scRNAlist, reduction = "tsne", label = T,group.by = "celltype",pt.size = 0.5)
dev.off()
pdf("24-11-29-sample.pdf",width = 8,height = 6)
DimPlot(scRNAlist, reduction = "tsne", label = F,group.by = "orig.ident",pt.size = 0.5)
dev.off()
head(scRNAlist@meta.data)
table(scRNAlist$orig.ident)
# 添加样本分组信息
scRNAlist$group = scRNAlist$orig.ident
# 添加转移部位分组信息
scRNAlist$sample = scRNAlist$orig.ident
scRNAlist$sample = gsub("PT1|PT2|PT3", "Primary_Tumor", scRNAlist$sample)
scRNAlist$sample = gsub("LN1|LN2", "Lymph_Node_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("O1", "Ovary_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("P1", "Peritoneum_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("Li1|Li2", "Liver_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("NT1", "Adjacent_Nontumor", scRNAlist$sample)
# 添加患者来源信息
scRNAlist$patient = case_when(
scRNAlist$orig.ident %in% c("PT1", "Li1") ~ "Patient1",
scRNAlist$orig.ident %in% c("PT2", "NT1") ~ "Patient2",
scRNAlist$orig.ident == "O1" ~ "Patient3",
scRNAlist$orig.ident == "LN2" ~ "Patient5",
scRNAlist$orig.ident == "P1" ~ "Patient6",
TRUE ~ "Patient4"
)
# 添加组织类型信息
scRNAlist$tissue <- case_when(
grepl("^PT", scRNAlist$orig.ident) ~ "Primary_Tumor",
grepl("^NT", scRNAlist$orig.ident) ~ "Adjacent_Nontumor",
grepl("^LN|^O1|^P1|^Li", scRNAlist$orig.ident) ~ "Metastasis",
TRUE ~ "Unknown"
)
# 查看分组情况
table(scRNAlist$group)
table(scRNAlist$sample)
table(scRNAlist$patient)
table(scRNAlist$tissue)
pdf("24-11-29-tissue.pdf",width = 8,height = 6)
DimPlot(scRNAlist, reduction = "tsne", label = F,group.by = "tissue",pt.size = 0.5)
dev.off()
cell_types_markers <- c(
"CD3E", "CD2","CD3G","CD3D",
"COL1A2", "DCN", "PECAM1", "VCAM1","VWF",
"MK67", "PCNA", "STMN1",
"GNLY", "KLRF1","KLRD1",
"CSF1R", "CSF3R", "CD68",
"MUC1", "KRT18", "KRT19", "CLDN4", "EPCAM",
"CD79A", "IGHG1", "MS4A1"
)
dot_plot <- DotPlot(scRNAlist, features = cell_types_markers,group.by ="celltype" ) +
scale_color_gradientn(colors = c("blue", "white", "red"))
# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
pdf("24-11-29-marker.pdf",width = 10,height = 6)
dot_plot
dev.off()
每一个cluster的marker基因点图
注释过的tsne聚类图
细胞亚群marker基因表达点图
不同疾病状态的tsne图
Myeloid=subset(scRNAlist,celltype=="Myeloid_cell")
Myeloid <- NormalizeData(Myeloid)
Myeloid <- FindVariableFeatures(Myeloid)
Myeloid <- ScaleData(Myeloid, vars.to.regress = c("percent.mt"))
Myeloid <- RunPCA(Myeloid, verbose=F)
Seurat::ElbowPlot(Myeloid, ndims = 50)
Myeloid <- FindNeighbors(Myeloid, reduction = "integrated.Harmony", dims = 1:20)
Myeloid <- FindClusters(Myeloid, resolution = 0.2)
Myeloid <- RunTSNE(Myeloid, dims = 1:20, reduction = "integrated.Harmony")
DimPlot(Myeloid, reduction = "tsne", label = T,pt.size = 0.6)
cell_types_markers <- list(
Monocytes = c("MS4A4A", "LYZ", "S100A8", "S100A9"),
Macrophages = c( "VCAN", "FCN1", "C1QB", "C1QC", "MSR1", "APOE", "CD163", "MRC1"),
Dendritic_cell = c("CD1E", "CD1C", "FCER1A", "CLEC10A"),
Mast_cell = c("TPSAB1", "KIT", "CPA3")
)
cell_types_markers <- list(
Monocytes = c("S100A8", "S100A9"),
Macrophages = c( "C1QB", "C1QC", "MSR1", "APOE", "CD163", "MRC1"),
Dendritic_cell = c("BIRC3","HLA-DPB1","CLEC10A")
)
#a=as.data.frame(rownames(Myeloid))
dot_plot <- DotPlot(Myeloid, features = cell_types_markers) +
scale_color_gradientn(colors = c("blue", "white", "red"))
# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
dot_plot
table(Myeloid$seurat_clusters)
# 添加celltype列
Myeloid$celltype <- "Unassigned" # 先将所有细胞标记为未分配
Myeloid$celltype[which(Myeloid$seurat_clusters %in% c(0,1))] <- "Monocytes"
Myeloid$celltype[which(Myeloid$seurat_clusters %in% c(3))] <- "Macrophages"
Myeloid$celltype[which(Myeloid$seurat_clusters %in% c(2))] <- "Dendritic_cell"
# 将celltype设置为活跃标识
Idents(Myeloid) <- "celltype"
save(Myeloid,file="11-29-Myeloid.Rdata")
# 可视化注释结果
pdf("24-11-29-Myeloid.pdf",width = 8,height = 6)
DimPlot(Myeloid, reduction = "tsne", label = TRUE, pt.size = 1)
# 删除未分配的细胞
dev.off()
cell_types_markers <- c(
"S100A8", "S100A9",
"C1QB", "C1QC", "MSR1", "APOE", "CD163", "MRC1",
"BIRC3","HLA-DPB1","CLEC10A"
)
dot_plot <- DotPlot(Myeloid, features = cell_types_markers,group.by = "celltype") +
scale_color_gradientn(colors = c("blue", "white", "red"))
# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
pdf("24-11-29-Myeloid-marker.pdf",width = 10,height = 6)
dot_plot
dev.off()
髓系细胞celltype的tsne图
髓系细胞marker基因图
本次数据处理和代码分享就到此结束~
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。