前往小程序,Get更优阅读体验!
立即前往
发布
社区首页 >专栏 >GSE163558单细胞数据处理流程分享

GSE163558单细胞数据处理流程分享

原创
作者头像
生信医道
修改2025-02-06 11:14:45
修改2025-02-06 11:14:45
1540
举报
文章被收录于专栏:单细胞分享单细胞分享

前言

小绿书:生信医道

数据号:GSE163558

用此篇文献提供数据处理,单细胞下游处理代码

处理流程

数据下载

https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE163558 ,GEO数据库搜索GEO数据号,下载并整理成Seurat所需的格式。

加载R包

代码语言:r
复制
rm(list=ls())
setwd("./11.29-scRNAseq-GSE163558")
library(data.table)
library(stringr)
library(data.table)
library(Seurat)
library(dplyr)
library(Seurat)
#library(infercnv)
library(dplyr)
library(ggplot2)
library(ggplotify)
library(NMF)
library(ggalluvial)
library(CellChat)
library(Seurat)
options("repos" = c(CRAN = "https://mirrors.westlake.edu.cn/CRAN/"))
getwd()

读取数据

代码语言:r
复制
dir <- "./11.29-scRNAseq-GSE163558/GSE163558"
# 获取样本列表
samples <- c("PT1", "PT2", "PT3", "NT1", "LN1", "LN2", "O1", "P1", "Li1", "Li2")
# 设置全局选项以创建V5版本的assays
options(Seurat.object.assay.version = "v5")
# 使用lapply函数遍历样本列表
sceList <- lapply(samples, function(pro) {
  print(pro)  # 打印当前处理的样本名称
  
  # 读取10X Genomics数据
  tmp <- Read10X(file.path(dir, pro))
  
  # 根据Read10X的返回值创建Seurat对象
  # 如果返回两个对象(细胞和特征),则使用第一个
  if (length(tmp) == 2) {
    ct <- tmp[[1]]
  } else {
    ct <- tmp
  }
  
  # 使用CreateSeuratObject函数创建Seurat对象
  sce <- CreateSeuratObject(counts = ct,
                            project = gsub('GSM[0-9]*_rep[0-9]_','', pro),
                            min.cells = 0,
                            min.features = 0)
  
  return(sce)
})

# 打印Seurat对象列表
print(sceList)

数据质量控制

代码语言:r
复制
# 方法1:显式指定 reference
do.call(rbind,lapply(sceList,dim))
scRNAlist=merge(x=sceList[[1]],y=sceList[-1],add.cell.ids=samples)
names(scRNAlist@assays$RNA@layers)
dim(table(scRNAlist@meta.data$orig.ident))

# 计算线粒体比例
scRNAlist[["percent.mt"]] <- PercentageFeatureSet(scRNAlist, pattern = "^MT-")

# QC筛选
# 过滤细胞
scRNAlist <- subset(scRNAlist, 
                    subset = nFeature_RNA > 200 &   # 至少200个基因
                             nFeature_RNA < 5000 &  # 原文是5000,你的是7000
                             percent.mt < 20)       # 原文是20%线粒体基因

# 标准化
scRNAlist <- NormalizeData(scRNAlist, normalization.method = "LogNormalize")

# 找到2000个高可变基因
scRNAlist <- FindVariableFeatures(scRNAlist, nfeatures = 2000)

# 细胞周期基因
# 1. 加载细胞周期基因
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes


# 先进行标准化,保证有data层
for(i in 1:length(sceList)){
  sceList[[i]] <- NormalizeData(sceList[[i]])
  sceList[[i]] <- CellCycleScoring(sceList[[i]], 
                                   s.features = s.genes, 
                                   g2m.features = g2m.genes, 
                                   set.ident = TRUE)
}

# 回归UMI数量、线粒体污染和细胞周期的影响
scRNAlist <- ScaleData(scRNAlist, 
                       vars.to.regress = c("nCount_RNA", 
                                           "percent.mt", 
                                           "S.Score", 
                                           "G2M.Score"))

# 先运行PCA
scRNAlist <- RunPCA(scRNAlist, verbose = FALSE)

# 然后进行整合
scRNAlist <- IntegrateLayers(object = scRNAlist, 
                             method = HarmonyIntegration, 
                             orig.reduction = "pca", 
                             new.reduction = "integrated.Harmony",
                             verbose = FALSE)
# 重新连接层
scRNAlist[["RNA"]] <- JoinLayers(scRNAlist[["RNA"]])
# 选择合适的PC数量
Seurat::ElbowPlot(scRNAlist, ndims = 50)
# 聚类分析
scRNAlist <- FindNeighbors(scRNAlist, 
                           reduction = "integrated.Harmony", 
                           dims = 1:30)
scRNAlist <- FindClusters(scRNAlist, resolution = 0.4)
# 降维可视化
scRNAlist <- RunTSNE(scRNAlist, 
                     dims = 1:30, 
                     reduction = "integrated.Harmony")

DimPlot(scRNAlist, reduction = "tsne", label = TRUE,group.by = "seurat_clusters")
DimPlot(scRNAlist, reduction = "tsne", group.by = "orig.ident")

# 查看细胞周期的影响


save(scRNAlist,file="11.29-Rdata.Rdata")
load(file="11.29-Rdata.Rdata")

细胞注释

代码语言:r
复制
cell_types_markers <- list(
    Myeloid_cell = c("CSF1R", "CSF3R", "CD68"),
    Epithelial_cell = c("MUC1", "KRT18", "KRT19", "CLDN4", "EPCAM"),
    T_cell = c("CD3E", "CD2","CD3G","CD3D"),
    Stromal_cell = c("COL1A2", "DCN", "PECAM1", "VCAM1","VWF"),
    Proliferative_cell = c("MK67", "PCNA", "STMN1"),
    B_cell = c("CD79A", "IGHG1", "MS4A1"),
    NK_cell = c("GNLY", "KLRF1","KLRD1")
)

dot_plot <- DotPlot(scRNAlist, features = cell_types_markers) + 
  scale_color_gradientn(colors = c("blue", "white", "red"))

# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

dot_plot




table(scRNAlist$seurat_clusters)

# 添加celltype列
scRNAlist$celltype <- "Unassigned"  # 先将所有细胞标记为未分配
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(3,7))] <- "Myeloid_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(6))] <- "Epithelial_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(0,1,5))] <- "T_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(11,13,15))] <- "Stromal_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(10))] <- "Proliferative_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(2,9,12,14))] <- "B_cell"
scRNAlist$celltype[which(scRNAlist$seurat_clusters %in% c(8))] <- "NK_cell"

# 将celltype设置为活跃标识
Idents(scRNAlist) <- "celltype"

# 可视化注释结果
DimPlot(scRNAlist, reduction = "tsne", label = TRUE, pt.size = 0.5)
# 删除未分配的细胞
scRNAlist <- subset(scRNAlist, celltype != "Unassigned")

# 检查结果
table(scRNAlist$celltype)

table(scRNAlist$seurat_clusters,scRNAlist$celltype)
Idents(scRNAlist) <- 'celltype'
table(scRNAlist$celltype)
DimPlot(scRNAlist, reduction = "tsne", label = T)
pdf("24-11-29-celltype.pdf",width = 8,height = 6)
DimPlot(scRNAlist, reduction = "tsne", label = T,group.by = "celltype",pt.size = 0.5)
dev.off()
pdf("24-11-29-sample.pdf",width = 8,height = 6)

DimPlot(scRNAlist, reduction = "tsne", label = F,group.by = "orig.ident",pt.size = 0.5)
dev.off()
head(scRNAlist@meta.data)
table(scRNAlist$orig.ident)

# 添加样本分组信息
scRNAlist$group = scRNAlist$orig.ident

# 添加转移部位分组信息
scRNAlist$sample = scRNAlist$orig.ident
scRNAlist$sample = gsub("PT1|PT2|PT3", "Primary_Tumor", scRNAlist$sample)
scRNAlist$sample = gsub("LN1|LN2", "Lymph_Node_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("O1", "Ovary_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("P1", "Peritoneum_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("Li1|Li2", "Liver_Metastasis", scRNAlist$sample)
scRNAlist$sample = gsub("NT1", "Adjacent_Nontumor", scRNAlist$sample)

# 添加患者来源信息
scRNAlist$patient = case_when(
  scRNAlist$orig.ident %in% c("PT1", "Li1") ~ "Patient1",
  scRNAlist$orig.ident %in% c("PT2", "NT1") ~ "Patient2",
  scRNAlist$orig.ident == "O1" ~ "Patient3",
  scRNAlist$orig.ident == "LN2" ~ "Patient5",
  scRNAlist$orig.ident == "P1" ~ "Patient6",
  TRUE ~ "Patient4"
)

# 添加组织类型信息
scRNAlist$tissue <- case_when(
  grepl("^PT", scRNAlist$orig.ident) ~ "Primary_Tumor",
  grepl("^NT", scRNAlist$orig.ident) ~ "Adjacent_Nontumor",
  grepl("^LN|^O1|^P1|^Li", scRNAlist$orig.ident) ~ "Metastasis",
  TRUE ~ "Unknown"
)

# 查看分组情况
table(scRNAlist$group)
table(scRNAlist$sample)
table(scRNAlist$patient)
table(scRNAlist$tissue)
pdf("24-11-29-tissue.pdf",width = 8,height = 6)
DimPlot(scRNAlist, reduction = "tsne", label = F,group.by = "tissue",pt.size = 0.5)
dev.off()


cell_types_markers <- c(
    "CD3E", "CD2","CD3G","CD3D",
    "COL1A2", "DCN", "PECAM1", "VCAM1","VWF",
    "MK67", "PCNA", "STMN1",
    "GNLY", "KLRF1","KLRD1",
    "CSF1R", "CSF3R", "CD68",
    "MUC1", "KRT18", "KRT19", "CLDN4", "EPCAM",
    "CD79A", "IGHG1", "MS4A1"

)


dot_plot <- DotPlot(scRNAlist, features = cell_types_markers,group.by ="celltype" ) + 
  scale_color_gradientn(colors = c("blue", "white", "red"))

# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
pdf("24-11-29-marker.pdf",width = 10,height = 6)
dot_plot
dev.off()

每一个cluster的marker基因点图

注释过的tsne聚类图

细胞亚群marker基因表达点图

不同疾病状态的tsne图

提取髓系细胞重新分群

代码语言:r
复制
Myeloid=subset(scRNAlist,celltype=="Myeloid_cell")

Myeloid <- NormalizeData(Myeloid)
Myeloid <- FindVariableFeatures(Myeloid)
Myeloid <- ScaleData(Myeloid, vars.to.regress = c("percent.mt"))
Myeloid <- RunPCA(Myeloid, verbose=F)
Seurat::ElbowPlot(Myeloid, ndims = 50)
Myeloid <- FindNeighbors(Myeloid, reduction = "integrated.Harmony", dims = 1:20)

Myeloid <- FindClusters(Myeloid, resolution = 0.2)
Myeloid <- RunTSNE(Myeloid, dims = 1:20, reduction = "integrated.Harmony")
DimPlot(Myeloid, reduction = "tsne", label = T,pt.size = 0.6)

cell_types_markers <- list(
  Monocytes = c("MS4A4A", "LYZ", "S100A8", "S100A9"),
  Macrophages = c( "VCAN", "FCN1", "C1QB", "C1QC", "MSR1", "APOE", "CD163", "MRC1"),
  Dendritic_cell = c("CD1E", "CD1C", "FCER1A", "CLEC10A"),
  Mast_cell = c("TPSAB1", "KIT", "CPA3")
)
cell_types_markers <- list(
  Monocytes = c("S100A8", "S100A9"),
  Macrophages = c( "C1QB", "C1QC", "MSR1", "APOE", "CD163", "MRC1"),
  Dendritic_cell = c("BIRC3","HLA-DPB1","CLEC10A")
)
#a=as.data.frame(rownames(Myeloid))
dot_plot <- DotPlot(Myeloid, features = cell_types_markers) + 
  scale_color_gradientn(colors = c("blue", "white", "red"))

# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

dot_plot



table(Myeloid$seurat_clusters)

# 添加celltype列
Myeloid$celltype <- "Unassigned"  # 先将所有细胞标记为未分配
Myeloid$celltype[which(Myeloid$seurat_clusters %in% c(0,1))] <- "Monocytes"
Myeloid$celltype[which(Myeloid$seurat_clusters %in% c(3))] <- "Macrophages"
Myeloid$celltype[which(Myeloid$seurat_clusters %in% c(2))] <- "Dendritic_cell"

# 将celltype设置为活跃标识
Idents(Myeloid) <- "celltype"
save(Myeloid,file="11-29-Myeloid.Rdata")
# 可视化注释结果
pdf("24-11-29-Myeloid.pdf",width = 8,height = 6)
DimPlot(Myeloid, reduction = "tsne", label = TRUE, pt.size = 1)
# 删除未分配的细胞
dev.off()
cell_types_markers <- c(
  "S100A8", "S100A9",
   "C1QB", "C1QC", "MSR1", "APOE", "CD163", "MRC1",
  "BIRC3","HLA-DPB1","CLEC10A"
)

dot_plot <- DotPlot(Myeloid, features = cell_types_markers,group.by = "celltype") + 
  scale_color_gradientn(colors = c("blue", "white", "red"))

# 调整基因标签竖着排列
dot_plot <- dot_plot + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
pdf("24-11-29-Myeloid-marker.pdf",width = 10,height = 6)
dot_plot
dev.off()

髓系细胞celltype的tsne图

髓系细胞marker基因图

总结

本次数据处理和代码分享就到此结束~

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 前言
  • 处理流程
    • 数据下载
    • 加载R包
    • 读取数据
    • 数据质量控制
    • 图片
    • 图片
    • 细胞注释
    • 图片
    • 图片
    • 图片
    • 图片
    • 提取髓系细胞重新分群
  • 总结
领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档