GSE163558单细胞数据处理流程分享

原创

生信医道

修改于 2025-02-06 11:14:45

1540

文章被收录于专栏：单细胞分享单细胞分享

前言

小绿书：生信医道

数据号：GSE163558

用此篇文献提供数据处理，单细胞下游处理代码

处理流程

数据下载

https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE163558 ,GEO数据库搜索GEO数据号，下载并整理成Seurat所需的格式。

加载R包

rm(list=ls())
setwd("./11.29-scRNAseq-GSE163558")
library(data.table)
library(stringr)
library(data.table)
library(Seurat)
library(dplyr)
library(Seurat)
#library(infercnv)
library(dplyr)
library(ggplot2)
library(ggplotify)
library(NMF)
library(ggalluvial)
library(CellChat)
library(Seurat)
options("repos" = c(CRAN = "https://mirrors.westlake.edu.cn/CRAN/"))
getwd()

读取数据

dir <- "./11.29-scRNAseq-GSE163558/GSE163558"
# 获取样本列表
samples <- c("PT1", "PT2", "PT3", "NT1", "LN1", "LN2", "O1", "P1", "Li1", "Li2")
# 设置全局选项以创建V5版本的assays
options(Seurat.object.assay.version = "v5")
# 使用lapply函数遍历样本列表
sceList <- lapply(samples, function(pro) {
  print(pro)  # 打印当前处理的样本名称
  
  # 读取10X Genomics数据
  tmp <- Read10X(file.path(dir, pro))
  
  # 根据Read10X的返回值创建Seurat对象
  # 如果返回两个对象（细胞和特征），则使用第一个
  if (length(tmp) == 2) {
    ct <- tmp[[1]]
  } else {
    ct <- tmp
  }
  
  # 使用CreateSeuratObject函数创建Seurat对象
  sce <- CreateSeuratObject(counts = ct,
                            project = gsub('GSM[0-9]*_rep[0-9]_','', pro),
                            min.cells = 0,
                            min.features = 0)
  
  return(sce)
})

# 打印Seurat对象列表
print(sceList)

数据质量控制

# 方法1：显式指定 reference
do.call(rbind,lapply(sceList,dim))
scRNAlist=merge(x=sceList[[1]],y=sceList[-1],add.cell.ids=samples)
names(scRNAlist@assays$RNA@layers)
dim(table(scRNAlist@meta.data$orig.ident))

# 计算线粒体比例
scRNAlist[["percent.mt"]] <- PercentageFeatureSet(scRNAlist, pattern = "^MT-")

# QC筛选
# 过滤细胞
scRNAlist <- subset(scRNAlist, 
                    subset = nFeature_RNA > 200 &   # 至少200个基因
                             nFeature_RNA < 5000 &  # 原文是5000，你的是7000
                             percent.mt < 20)       # 原文是20%线粒体基因

# 标准化
scRNAlist <- NormalizeData(scRNAlist, normalization.method = "LogNormalize")

# 找到2000个高可变基因
scRNAlist <- FindVariableFeatures(scRNAlist, nfeatures = 2000)

# 细胞周期基因
# 1. 加载细胞周期基因
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes


# 先进行标准化，保证有data层
for(i in 1:length(sceList)){
  sceList[[i]] <- NormalizeData(sceList[[i]])
  sceList[[i]] <- CellCycleScoring(sceList[[i]], 
                                   s.features = s.genes, 
                                   g2m.features = g2m.genes, 
                                   set.ident = TRUE)
}

# 回归UMI数量、线粒体污染和细胞周期的影响
scRNAlist <- ScaleData(scRNAlist, 
                       vars.to.regress = c("nCount_RNA", 
                                           "percent.mt", 
                                           "S.Score", 
                                           "G2M.Score"))

# 先运行PCA
scRNAlist <- RunPCA(scRNAlist, verbose = FALSE)

# 然后进行整合
scRNAlist <- IntegrateLayers(object = scRNAlist, 
                             method = HarmonyIntegration, 
                             orig.reduction = "pca", 
                             new.reduction = "integrated.Harmony",
                             verbose = FALSE)
# 重新连接层
scRNAlist[["RNA"]] <- JoinLayers(scRNAlist[["RNA"]])
# 选择合适的PC数量
Seurat::ElbowPlot(scRNAlist, ndims = 50)
# 聚类分析
scRNAlist <- FindNeighbors(scRNAlist, 
                           reduction = "integrated.Harmony", 
                           dims = 1:30)
scRNAlist <- FindClusters(scRNAlist, resolution = 0.4)
# 降维可视化
scRNAlist <- RunTSNE(scRNAlist, 
                     dims = 1:30, 
                     reduction = "integrated.Harmony")

DimPlot(scRNAlist, reduction = "tsne", label = TRUE,group.by = "seurat_clusters")
DimPlot(scRNAlist, reduction = "tsne", group.by = "orig.ident")

# 查看细胞周期的影响


save(scRNAlist,file="11.29-Rdata.Rdata")
load(file="11.29-Rdata.Rdata")

细胞注释

cell_types_markers <- list(
    Myeloid_cell = c("CSF1R", "CSF3R", "CD68"),
    Epithelial_cell = c("MUC1", "KRT18", "KRT19", "CLDN4", "EPCAM"),
    T_cell = c("CD3E", "CD2","CD3G","CD3D"),
    Stromal_cell = c("COL1A2", "DCN", "PECAM1", "VCAM1","VWF"),
    Proliferative_cell = c("MK67", "PCNA", "STMN1"),
    B_cell = c("CD79A", "IGHG1", "MS4A1"),
    NK_cell = c("GNLY", "KLRF1","KLRD1")
)

dot_plot <- DotPlot(scRNAlist, features = cell_types_markers) + 
  scale_color_gradientn(colors = c("blue", "white", "red"))

# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

dot_plot




table(scRNAlist$seurat_clusters)

# 添加celltype列
scRNAlist$celltype <- "Unassigned"  # 先将所有细胞标记为未分配
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(3,7))] <- "Myeloid_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(6))] <- "Epithelial_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(0,1,5))] <- "T_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(11,13,15))] <- "Stromal_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(10))] <- "Proliferative_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(2,9,12,14))] <- "B_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(8))] <- "NK_cell"

# 将celltype设置为活跃标识
Idents(scRNAlist) <- "celltype"

# 可视化注释结果
DimPlot(scRNAlist, reduction = "tsne", label = TRUE, pt.size = 0.5)
# 删除未分配的细胞
scRNAlist <- subset(scRNAlist, celltype != "Unassigned")

# 检查结果
table(scRNAlist$celltype)

table(scRNAlist$seurat_clusters,scRNAlist$celltype)
Idents(scRNAlist) <- 'celltype'
table(scRNAlist$celltype)
DimPlot(scRNAlist, reduction = "tsne", label = T)
pdf("24-11-29-celltype.pdf",width = 8,height = 6)
DimPlot(scRNAlist, reduction = "tsne", label = T,group.by = "celltype",pt.size = 0.5)
dev.off()
pdf("24-11-29-sample.pdf",width = 8,height = 6)

DimPlot(scRNAlist, reduction = "tsne", label = F,group.by = "orig.ident",pt.size = 0.5)
dev.off()
head(scRNAlist@meta.data)
table(scRNAlist$orig.ident)

# 添加样本分组信息
scRNAlist$group = scRNAlist$orig.ident

# 添加转移部位分组信息
scRNAlist$sample = scRNAlist$orig.ident
scRNAlist$sample = gsub("PT1|PT2|PT3", "Primary_Tumor", scRNAlist$sample)
scRNAlist$sample = gsub("LN1|LN2", "Lymph_Node_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("O1", "Ovary_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("P1", "Peritoneum_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("Li1|Li2", "Liver_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("NT1", "Adjacent_Nontumor", scRNAlist$sample)

# 添加患者来源信息
scRNAlist$patient = case_when(
  scRNAlist$orig.ident %in% c("PT1", "Li1") ~ "Patient1",
  scRNAlist$orig.ident %in% c("PT2", "NT1") ~ "Patient2",
  scRNAlist$orig.ident == "O1" ~ "Patient3",
  scRNAlist$orig.ident == "LN2" ~ "Patient5",
  scRNAlist$orig.ident == "P1" ~ "Patient6",
  TRUE ~ "Patient4"
)

# 添加组织类型信息
scRNAlist$tissue <- case_when(
  grepl("^PT", scRNAlist$orig.ident) ~ "Primary_Tumor",
  grepl("^NT", scRNAlist$orig.ident) ~ "Adjacent_Nontumor",
  grepl("^LN|^O1|^P1|^Li", scRNAlist$orig.ident) ~ "Metastasis",
  TRUE ~ "Unknown"
)

# 查看分组情况
table(scRNAlist$group)
table(scRNAlist$sample)
table(scRNAlist$patient)
table(scRNAlist$tissue)
pdf("24-11-29-tissue.pdf",width = 8,height = 6)
DimPlot(scRNAlist, reduction = "tsne", label = F,group.by = "tissue",pt.size = 0.5)
dev.off()


cell_types_markers <- c(
    "CD3E", "CD2","CD3G","CD3D",
    "COL1A2", "DCN", "PECAM1", "VCAM1","VWF",
    "MK67", "PCNA", "STMN1",
    "GNLY", "KLRF1","KLRD1",
    "CSF1R", "CSF3R", "CD68",
    "MUC1", "KRT18", "KRT19", "CLDN4", "EPCAM",
    "CD79A", "IGHG1", "MS4A1"

)


dot_plot <- DotPlot(scRNAlist, features = cell_types_markers,group.by ="celltype" ) + 
  scale_color_gradientn(colors = c("blue", "white", "red"))

# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
pdf("24-11-29-marker.pdf",width = 10,height = 6)
dot_plot
dev.off()

每一个cluster的marker基因点图

注释过的tsne聚类图

细胞亚群marker基因表达点图

不同疾病状态的tsne图

提取髓系细胞重新分群

Myeloid=subset(scRNAlist,celltype=="Myeloid_cell")

Myeloid <- NormalizeData(Myeloid)
Myeloid <- FindVariableFeatures(Myeloid)
Myeloid <- ScaleData(Myeloid, vars.to.regress = c("percent.mt"))
Myeloid <- RunPCA(Myeloid, verbose=F)
Seurat::ElbowPlot(Myeloid, ndims = 50)
Myeloid <- FindNeighbors(Myeloid, reduction = "integrated.Harmony", dims = 1:20)

Myeloid <- FindClusters(Myeloid, resolution = 0.2)
Myeloid <- RunTSNE(Myeloid, dims = 1:20, reduction = "integrated.Harmony")
DimPlot(Myeloid, reduction = "tsne", label = T,pt.size = 0.6)

cell_types_markers <- list(
  Monocytes = c("MS4A4A", "LYZ", "S100A8", "S100A9"),
  Macrophages = c( "VCAN", "FCN1", "C1QB", "C1QC", "MSR1", "APOE", "CD163", "MRC1"),
  Dendritic_cell = c("CD1E", "CD1C", "FCER1A", "CLEC10A"),
  Mast_cell = c("TPSAB1", "KIT", "CPA3")
)
cell_types_markers <- list(
  Monocytes = c("S100A8", "S100A9"),
  Macrophages = c( "C1QB", "C1QC", "MSR1", "APOE", "CD163", "MRC1"),
  Dendritic_cell = c("BIRC3","HLA-DPB1","CLEC10A")
)
#a=as.data.frame(rownames(Myeloid))
dot_plot <- DotPlot(Myeloid, features = cell_types_markers) + 
  scale_color_gradientn(colors = c("blue", "white", "red"))

# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

dot_plot



table(Myeloid$seurat_clusters)

# 添加celltype列
Myeloid$celltype <- "Unassigned"  # 先将所有细胞标记为未分配
Myeloid$celltype[which(Myeloid$seurat_clusters %in% c(0,1))] <- "Monocytes"
Myeloid$celltype[which(Myeloid$seurat_clusters %in% c(3))] <- "Macrophages"
Myeloid$celltype[which(Myeloid$seurat_clusters %in% c(2))] <- "Dendritic_cell"

# 将celltype设置为活跃标识
Idents(Myeloid) <- "celltype"
save(Myeloid,file="11-29-Myeloid.Rdata")
# 可视化注释结果
pdf("24-11-29-Myeloid.pdf",width = 8,height = 6)
DimPlot(Myeloid, reduction = "tsne", label = TRUE, pt.size = 1)
# 删除未分配的细胞
dev.off()
cell_types_markers <- c(
  "S100A8", "S100A9",
   "C1QB", "C1QC", "MSR1", "APOE", "CD163", "MRC1",
  "BIRC3","HLA-DPB1","CLEC10A"
)

dot_plot <- DotPlot(Myeloid, features = cell_types_markers,group.by = "celltype") + 
  scale_color_gradientn(colors = c("blue", "white", "red"))

# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
pdf("24-11-29-Myeloid-marker.pdf",width = 10,height = 6)
dot_plot
dev.off()

髓系细胞celltype的tsne图

髓系细胞marker基因图

总结

本次数据处理和代码分享就到此结束～

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

生物基因

原创声明：本文系作者授权腾讯云开发者社区发表，未经许可，不得转载。

如有侵权，请联系 cloudcommunity@tencent.com 删除。

生物基因

登录后参与评论

0 条评论

热度

GSE163558单细胞数据处理流程分享

GSE163558单细胞数据处理流程分享

前言

处理流程

数据下载

加载R包

读取数据

数据质量控制

细胞注释

提取髓系细胞重新分群

总结

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐