最近在做反卷积,效果都不是特别好。🫠
其中比较能用的就是MuSiC
跑出来的结果了。💪
rm(list = ls())
#yulab.utils::install_zip_gh('xuranw/MuSiC')
library(MuSiC)
library(tidyverse)
library(reshape2)
library(cowplot)
GSE50244.bulk.eset = readRDS('./GSE50244bulkeset.rds')
GSE50244.bulk.eset
EMTAB.sce = readRDS('./EMTABsce_healthy.rds')
EMTAB.sce
XinT2D.sce = readRDS('./XinT2Dsce.rds')
XinT2D.sce
1️⃣ 89
个受试者的Bulk
数据,GSE50244.bulk.eset
;
2️⃣ 单细胞参考数据EMTAB.eset
进行的;
我们将估计限制在6
种主要细胞类型上:α、β、δ、γ、腺泡和导管,它们占整个胰岛的 90% 以上。😂
# Bulk expression matrix
bulk.mtx = exprs(GSE50244.bulk.eset)
# Estimate cell type proportions
Est.prop.GSE50244 = music_prop(bulk.mtx = bulk.mtx,
sc.sce = EMTAB.sce,
clusters = 'cellType',
samples = 'sampleID',
select.ct = c('alpha', 'beta', 'delta', 'gamma','acinar', 'ductal'),
verbose = F)
names(Est.prop.GSE50244)
可视化!~
这里我们用Jitter_Est
显示不同估计方法之间的差异。🧐
# Jitter plot of estimated cell type proportions
jitter.fig = Jitter_Est(list(data.matrix(Est.prop.GSE50244$Est.prop.weighted),
data.matrix(Est.prop.GSE50244$Est.prop.allgene)),
method.name = c('MuSiC', 'NNLS'), title = 'Jitter plot of Est Proportions')
m.prop.GSE50244 = rbind(melt(Est.prop.GSE50244$Est.prop.weighted),
melt(Est.prop.GSE50244$Est.prop.allgene))
colnames(m.prop.GSE50244) = c('Sub', 'CellType', 'Prop')
m.prop.GSE50244$CellType = factor(m.prop.GSE50244$CellType, levels = c('alpha', 'beta', 'delta', 'gamma', 'acinar', 'ductal'))
m.prop.GSE50244$Method = factor(rep(c('MuSiC', 'NNLS'), each = 89*6), levels = c('MuSiC', 'NNLS'))
m.prop.GSE50244$HbA1c = rep(GSE50244.bulk.eset$hba1c, 2*6)
m.prop.GSE50244 = m.prop.GSE50244[!is.na(m.prop.GSE50244$HbA1c), ]
m.prop.GSE50244$Disease = factor(c('Normal', 'T2D')[(m.prop.GSE50244$HbA1c > 6.5)+1], levels = c('Normal', 'T2D'))
m.prop.GSE50244$D = (m.prop.GSE50244$Disease == 'T2D')/5
m.prop.GSE50244 = rbind(subset(m.prop.GSE50244, Disease == 'Normal'), subset(m.prop.GSE50244, Disease != 'Normal'))
jitter.new = ggplot(m.prop.GSE50244, aes(Method, Prop)) +
geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease),
size = 2, alpha = 0.7, position = position_jitter(width=0.25, height=0)) +
facet_wrap(~ CellType, scales = 'free') +
scale_colour_manual( values = c('white', "gray20"))+
scale_shape_manual(values = c(21, 24))+
theme_minimal()
plot_grid(jitter.fig, jitter.new, labels = 'auto')
众所周知,β 细胞
比例与T2D
疾病状态有关。😘
在T2D
的进展中,β 细胞
的数量减少。🙊
T2D
最重要的指标之一是HbA1c
(糖化血红蛋白),当HbA1c
水平大于6.5%
时,患者被诊断为T2D
。😏
让我们看看β 细胞
比例与HbA1c
水平。💪
# Create dataframe for beta cell proportions and HbA1c levels
m.prop.ana = data.frame(pData(GSE50244.bulk.eset)[rep(1:89, 2), c('age', 'bmi', 'hba1c', 'gender')],
ct.prop = c(Est.prop.GSE50244$Est.prop.weighted[, 2],
Est.prop.GSE50244$Est.prop.allgene[, 2]),
Method = factor(rep(c('MuSiC', 'NNLS'), each = 89),
levels = c('MuSiC', 'NNLS')))
colnames(m.prop.ana)[1:4] = c('Age', 'BMI', 'HbA1c', 'Gender')
m.prop.ana = subset(m.prop.ana, !is.na(HbA1c))
m.prop.ana$Disease = factor( c('Normal', 'T2D')[(m.prop.ana$HbA1c > 6.5) + 1], c('Normal', 'T2D') )
m.prop.ana$D = (m.prop.ana$Disease == 'T2D')/5
ggplot(m.prop.ana, aes(HbA1c, ct.prop)) +
geom_smooth(method = 'lm', se = FALSE, col = 'black', lwd = 0.25) +
geom_point(aes(fill = Method, color = Disease, stroke = D, shape = Disease), size = 2, alpha = 0.7) + facet_wrap(~ Method) +
ggtitle('HbA1c vs Beta Cell Type Proportion') + theme_minimal() +
scale_colour_manual( values = c('white', "gray20")) +
scale_shape_manual(values = c(21, 24))
线性回归显示,年龄、BMI 和性别校正后, HbA1c
水平与β 细胞
比例之间存在显著的负相关。😘
lm.beta.MuSiC = lm(ct.prop ~ HbA1c + Age + BMI + Gender, data = subset(m.prop.ana, Method == 'MuSiC'))
summary(lm.beta.MuSiC)
lm.beta.NNLS = lm(ct.prop ~ HbA1c + Age + BMI + Gender, data = subset(m.prop.ana, Method == 'NNLS'))
summary(lm.beta.NNLS)
Mouse.bulk.eset = readRDS('./Mousebulkeset.rds')
Mouse.bulk.eset
单细胞数据集有16
种细胞类型,包括2
种新型细胞类型和一种过渡细胞类型 (CD-Trans
)。😘
我们在分析中排除了这3
种细胞类型。🫠
Mousesub.sce = readRDS('./Mousesub_sce.rds')
Mousesub.sce
levels(Mousesub.sce$cellType)
Screenshot 2025-07-06 at 13.35.58
我们先用hclust
函数对细胞类型进行聚类。😘
我们将13
种细胞类型分为4
组:👇
Group 1: Neutro;
Group 2: Podo;
Group 3: Endo, CD-PC, CD-IC, LOH, DCT, PT;
Group 4: Fib, Macro, NK, B lymph, T lymph;
# Produce the first step information
Mousesub.basis = music_basis(Mousesub.sce, clusters = 'cellType', samples = 'sampleID',
select.ct = c('Endo', 'Podo', 'PT', 'LOH', 'DCT', 'CD-PC', 'CD-IC', 'Fib',
'Macro', 'Neutro','B lymph', 'T lymph', 'NK'))
# Plot the dendrogram of design matrix and cross-subject mean of realtive abundance
par(mfrow = c(1, 2))
d <- dist(t(log(Mousesub.basis$Disgn.mtx + 1e-6)), method = "euclidean")
# Hierarchical clustering using Complete Linkage
hc1 <- hclust(d, method = "complete" )
# Plot the obtained dendrogram
plot(hc1, cex = 0.6, hang = -1, main = 'Cluster log(Design Matrix)')
d <- dist(t(log(Mousesub.basis$M.theta + 1e-8)), method = "euclidean")
# Hierarchical clustering using Complete Linkage
hc2 <- hclust(d, method = "complete")
# Plot the obtained dendrogram
plot(hc2, cex = 0.6, hang = -1, main = 'Cluster log(Mean of RA)')
用于小鼠肾脏分析的树导向递归估计包括 2 个步骤:
1️⃣ 步骤 1.估计每个高级集群的比例; (𝑝1,𝑝2,𝑝3,𝑝4)
2️⃣ 步骤 2.估计每个聚类中的细胞类型比例; (𝑝31,𝑝32,.,𝑝36,𝑝41,.,𝑝45)
clusters.type = list(C1 = 'Neutro', C2 = 'Podo', C3 = c('Endo', 'CD-PC', 'LOH', 'CD-IC', 'DCT', 'PT'), C4 = c('Macro', 'Fib', 'B lymph', 'NK', 'T lymph'))
cl.type = as.character(Mousesub.sce$cellType)
for(cl in 1:length(clusters.type)){
cl.type[cl.type %in% clusters.type[[cl]]] = names(clusters.type)[cl]
}
Mousesub.sce$clusterType = factor(cl.type, levels = c(names(clusters.type), 'CD-Trans', 'Novel1', 'Novel2'))
s.mouse = unlist(clusters.type)
s.mouse
# Construct artificial bulk dataset. Use all 4 cell types: alpha, beta, gamma, delta
XinT2D.construct.full = bulk_construct(XinT2D.sce, clusters = 'cellType', samples = 'SubjectName')
names(XinT2D.construct.full)
dim(XinT2D.construct.full$bulk.counts)
XinT2D.construct.full$bulk.counts[1:5, 1:5]
head(XinT2D.construct.full$num.real)
XinT2D.construct.full$prop.real = relative.ab(XinT2D.construct.full$num.real, by.col = FALSE)
head(XinT2D.construct.full$prop.real)
# Estimate cell type proportions of artificial bulk data
Est.prop.Xin = music_prop(bulk.mtx = XinT2D.construct.full$bulk.counts, sc.sce = EMTAB.sce,
clusters = 'cellType', samples = 'sampleID',
select.ct = c('alpha', 'beta', 'delta', 'gamma'))
# Estimation evaluation
Eval_multi(prop.real = data.matrix(XinT2D.construct.full$prop.real),
prop.est = list(data.matrix(Est.prop.Xin$Est.prop.weighted),
data.matrix(Est.prop.Xin$Est.prop.allgene)),
method.name = c('MuSiC', 'NNLS'))
library(cowplot)
prop.comp.fig = Prop_comp_multi(prop.real = data.matrix(XinT2D.construct.full$prop.real),
prop.est = list(data.matrix(Est.prop.Xin$Est.prop.weighted),
data.matrix(Est.prop.Xin$Est.prop.allgene)),
method.name = c('MuSiC', 'NNLS'),
title = 'Heatmap of Real and Est. Prop' )
abs.diff.fig = Abs_diff_multi(prop.real = data.matrix(XinT2D.construct.full$prop.real),
prop.est = list(data.matrix(Est.prop.Xin$Est.prop.weighted),
data.matrix(Est.prop.Xin$Est.prop.allgene)),
method.name = c('MuSiC', 'NNLS'),
title = 'Abs.Diff between Real and Est. Prop' )
plot_grid(prop.comp.fig, abs.diff.fig, labels = "auto", rel_widths = c(4,3))