前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >课前准备-单细胞velocity分析(贝叶斯模型)

课前准备-单细胞velocity分析(贝叶斯模型)

原创
作者头像
追风少年i
发布2024-06-16 11:25:21
710
发布2024-06-16 11:25:21
作者,Evil Genius
速率
  • Probabilistic modeling of RNA velocity
  • Direct modeling of raw spliced and unspliced read count
  • Multiple uncertainty diagnostics analysis and visualizations
  • Synchronized cell time estimation across genes
  • Multivariate denoised gene expression and velocity prediction

image

image

image

image

实现目标

image

整体框架
1、前处理
代码语言:javascript
复制
import scvelo as scv
adata = scv.read("local_file.h5ad")

adata.layers['raw_spliced']   = adata.layers['spliced']
adata.layers['raw_unspliced'] = adata.layers['unspliced']
adata.obs['u_lib_size_raw'] = adata.layers['raw_unspliced'].toarray().sum(-1)
adata.obs['s_lib_size_raw'] = adata.layers['raw_spliced'].toarray().sum(-1)
scv.pp.filter_and_normalize(adata, min_shared_counts=30, n_top_genes=2000)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
2、训练模型
代码语言:javascript
复制
from pyrovelocity.api import train_model
from pyrovelocity.plot import vector_field_uncertainty
num_epochs = 1000 # large data
# num_epochs = 4000 # small data
# Model 1
adata_model_pos = train_model(adata,
                               max_epochs=num_epochs, svi_train=True, log_every=100,
                               patient_init=45,
                               batch_size=4000, use_gpu=0, cell_state='state_info',
                               include_prior=True,
                               offset=False,
                               library_size=True,
                               patient_improve=1e-3,
                               model_type='auto',
                               guide_type='auto_t0_constraint',
                               train_size=1.0)
# Or Model 2
adata_model_pos = train_model(adata,
                               max_epochs=num_epochs, svi_train=True, log_every=100,
                               patient_init=45,
                               batch_size=4000, use_gpu=0, cell_state='state_info',
                               include_prior=True,
                               offset=True,
                               library_size=True,
                               patient_improve=1e-3,
                               model_type='auto',
                               guide_type='auto',
                               train_size=1.0)

# adata_model_pos is a returned list in which 0th element is the trained model,
# the 1st element is the posterior samples of all random variables

trained_model = adata_model_pos[0]
posterior_samples = adata_model_pos[1]
v_map_all, embeds_radian, fdri = vector_field_uncertainty(
    adata,
    posterior_samples=posterior_samples,
    basis="umap"
)

save_res = True
if save_res:
    trained_model.save('saved_model', overwrite=True)
    result_dict = {"adata_model_pos": posterior_samples,
                   "v_map_all": v_map_all,
                   "embeds_radian": embeds_radian, "fdri": fdri} #, "embed_mean": embed_mean}
    import pickle
    with open("posterior_samples.pkl", "wb") as f:
         pickle.dump(result_dict, f)
3、速率估计
代码语言:javascript
复制
from pyrovelocity.plot import plot_state_uncertainty
from pyrovelocity.plot import plot_posterior_time, plot_gene_ranking,\
      vector_field_uncertainty, plot_vector_field_uncertain,\
      plot_mean_vector_field, project_grid_points,rainbowplot,denoised_umap,\
      us_rainbowplot, plot_arrow_examples, set_colorbar

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

embedding = 'umap' # change to umap or tsne based on your embedding method

# This generates the posterior samples of all vector fields
# and statistical testing results from Rayleigh test
v_map_all, embeds_radian, fdri = vector_field_uncertainty(adata, posterior_samples,
                                                          basis=embedding, denoised=False, n_jobs=30)
fig, ax = plt.subplots()
# This returns the posterior mean of the vector field
embed_mean = plot_mean_vector_field(posterior_samples, adata, ax=ax, n_jobs=30, basis=embedding)
# This plot single-cell level vector field uncertainty
# and averaged cell vector field uncertainty on the grid points
# based on angular standard deviation
fig, ax = plt.subplots(1, 2)
fig.set_size_inches(11.5, 5)
plot_vector_field_uncertain(adata, embed_mean, embeds_radian,
                            ax=ax,
                            fig=fig, cbar=False, basis=embedding, scale=None)

# This generates shared time uncertainty plot with contour lines
fig, ax = plt.subplots(1, 3)
fig.set_size_inches(12, 2.8)
adata.obs['shared_time_uncertain'] = posterior_samples['cell_time'].std(0).flatten()
ax_cb = scv.pl.scatter(adata, c='shared_time_uncertain', ax=ax[0], show=False, cmap='inferno', fontsize=7, s=20, colorbar=True, basis=embedding)
select = adata.obs['shared_time_uncertain'] > np.quantile(adata.obs['shared_time_uncertain'], 0.9)
sns.kdeplot(adata.obsm[f'X_{embedding}'][:, 0][select],
            adata.obsm[f'X_{embedding}'][:, 1][select],
            ax=ax[0], levels=3, fill=False)

# This generates vector field uncertainty based on Rayleigh test.
adata.obs.loc[:, 'vector_field_rayleigh_test'] = fdri
im = ax[1].scatter(adata.obsm[f'X_{embedding}'][:, 0],
                   adata.obsm[f'X_{embedding}'][:, 1], s=3, alpha=0.9,
                   c=adata.obs['vector_field_rayleigh_test'], cmap='inferno_r',
                   linewidth=0)
set_colorbar(im, ax[1], labelsize=5, fig=fig, position='right')
select = adata.obs['vector_field_rayleigh_test'] > np.quantile(adata.obs['vector_field_rayleigh_test'], 0.95)
sns.kdeplot(adata.obsm[f'X_{embedding}'][:, 0][select],
            adata.obsm[f'X_{embedding}'][:, 1][select], ax=ax[1], levels=3, fill=False)
ax[1].axis('off')
ax[1].set_title("vector field\nrayleigh test\nfdr<0.05: %s%%" % (round((fdri < 0.05).sum()/fdri.shape[0], 2)*100), fontsize=7)
4、可视化
代码语言:javascript
复制
fig = plt.figure(figsize=(7.07, 4.5))
subfig = fig.subfigures(1, 2, wspace=0.0, hspace=0, width_ratios=[1.6, 4])
ax = fig.subplots(1)
# This generates the selected cell fate markers and output in DataFrame
volcano_data, _ = plot_gene_ranking([posterior_samples], [adata], ax=ax,
                                     time_correlation_with='st', assemble=True)
# This generates the rainbow plots for the selected markers.
_ = rainbowplot(volcano_data, adata, posterior_samples,
                subfig[1], data=['st', 'ut'], num_genes=4)
方法示例
代码语言:javascript
复制
%load_ext autoreload
%autoreload 2
import scvelo as scv
import numpy as np
import torch
import matplotlib.pyplot as plt
from pyrovelocity.data import load_data
from scipy.stats import spearmanr, pearsonr
from pyrovelocity.api import train_model
import seaborn as sns
import pandas as pd
from pyrovelocity.plot import plot_posterior_time, plot_gene_ranking,\
      vector_field_uncertainty, plot_vector_field_uncertain,\
      plot_mean_vector_field, project_grid_points,rainbowplot,denoised_umap,\
      us_rainbowplot, plot_arrow_examples
from pyrovelocity.utils import mae, mae_evaluate

adata = load_data(top_n=2000, min_shared_counts=30)
模型训练
代码语言:javascript
复制
adata_model_pos_split = train_model(adata, max_epochs=4000, svi_train=False, log_every=100,
                                    patient_init=45, batch_size=-1, use_gpu=1,
                                    include_prior=True, offset=False, library_size=True,
                                    patient_improve=1e-4, guide_type='auto_t0_constraint', train_size=0.67)

mae_evaluate(adata_model_pos_split, adata)

adata_model_pos = train_model(adata, max_epochs=4000, svi_train=False, log_every=100,
                              patient_init=45, batch_size=-1, use_gpu=0,
                              include_prior=True, offset=False, library_size=True,
                              patient_improve=1e-4, guide_type='auto_t0_constraint', train_size=1.0)

mae_evaluate(adata_model_pos[1], adata)

v_map_all, embeds_radian, fdri = vector_field_uncertainty(adata, adata_model_pos[1], basis='umap', n_jobs=10)
可视化
代码语言:javascript
复制
fig, ax = plt.subplots()
embed_mean = plot_mean_vector_field(adata_model_pos[1], adata, ax=ax, n_jobs=10)
图片排列
代码语言:javascript
复制
fig = plt.figure(figsize=(7.07, 4.5))
subfig_A = fig.subfigures(2, 1, wspace=0.0, hspace=0, height_ratios=[1.2, 3])
dot_size = 3
font_size = 7

ress = pd.DataFrame({"cell_type": adata.obs['clusters'].values,
                     "X1": adata.obsm['X_umap'][:,0],
                     "X2": adata.obsm['X_umap'][:,1]})
subfig_A0 = subfig_A[0].subfigures(1, 2, wspace=0.0, hspace=0, width_ratios=[4, 2])

ax = subfig_A0[0].subplots(1, 4)
sns.scatterplot(x='X1', y='X2', data=ress, alpha=0.9, s=dot_size,
                linewidth=0, edgecolor="none", hue='cell_type',
                ax=ax[0], legend='brief')
ax[0].axis('off')
ax[0].set_title("Cell types\n", fontsize=font_size)
ax[0].legend(bbox_to_anchor=[2.9, -0.01], ncol=4, prop={'size': font_size}, fontsize=font_size, frameon=False)
kwargs = dict(color='gray', density=.8, add_margin=.1, s=dot_size,
              show=False, alpha=.2, min_mass=3.5, frameon=False)
scv.pl.velocity_embedding_stream(adata, fontsize=font_size, ax=ax[1], title='', **kwargs)
ax[1].set_title("Scvelo\n", fontsize=7)

scv.pl.velocity_embedding_stream(adata, fontsize=font_size, basis='umap',
                                 title='', ax=ax[2], vkey='velocity_pyro', **kwargs)
ax[2].set_title("Pyro-Velocity\n", fontsize=7)

# plot_arrow_examples(adata_train, np.transpose(v_map_all, (1, 2, 0)), embeds_radian, ax=ax[3],
#                     n_sample=30, fig=fig, basis='umap', scale=0.004, alpha=0.18)
plot_arrow_examples(adata, np.transpose(v_map_all, (1, 2, 0)), embeds_radian, ax=ax[3],
                    n_sample=30, fig=fig, basis='umap', scale=0.008, alpha=0.2, index=5,
                    scale2=0.015, num_certain=6)
ax[3].set_title("Single cell\nvector field", fontsize=7)

plot_vector_field_uncertain(adata, embed_mean, embeds_radian,
                            fig=subfig_A0[1], cbar=True, basis='umap', scale=0.05)

subfig_A0[0].subplots_adjust(hspace=0.2, wspace=0.1, left=0.01, right=0.99, top=0.99, bottom=0.35)
subfig_A0[1].subplots_adjust(hspace=0.2, wspace=0.1, left=0.01, right=0.99, top=0.99, bottom=0.35)

subfig_A[0].text(-0.06, 0.58, "Pancreas", size=7, rotation="vertical", va="center")

subfig_B = subfig_A[1].subfigures(1, 2, wspace=0.0, hspace=0, width_ratios=[1.6, 4])
##subfig_B[0].subplots_adjust(hspace=0.2, wspace=0.1, left=0.01, right=0.99, top=0.99, bottom=0.1)

ax = subfig_B[0].subplots(2, 1)
plot_posterior_time(adata_model_pos[1], adata, ax=ax[0], fig=subfig_B[0], addition=False)
subfig_B[0].subplots_adjust(hspace=0.3, wspace=0.1, left=0.01, right=0.8, top=0.92, bottom=0.17)

volcano_data2, _ = plot_gene_ranking([adata_model_pos[1]], [adata], ax=ax[1],
                                    time_correlation_with='st', assemble=True)
_ = rainbowplot(volcano_data2, adata, adata_model_pos[1], subfig_B[1], data=['st', 'ut'], num_genes=4)

image

代码语言:javascript
复制
fig.savefig("pancreas_output_figure1.tif", facecolor=fig.get_facecolor(), bbox_inches='tight', edgecolor='none', dpi=300)

fig, ax = plt.subplots(1, 2)
fig.set_size_inches(11.5, 5)
plot_vector_field_uncertain(adata, embed_mean, embeds_radian,
                            ax=ax,
                            fig=fig, cbar=False, basis='umap', scale=0.05)

image

More uncertainty evaluation metrics
代码语言:javascript
复制
from pyrovelocity.plot import plot_state_uncertainty
fig, ax = plt.subplots(2, 3)
fig.set_size_inches(7.07, 3.5)
pos = adata_model_pos[1]
bin = 30
adata.obs['cell_time_uncertain'] = adata_model_pos[1]['cell_time'].std(0).flatten()
scv.pl.scatter(adata, c='cell_time_uncertain', ax=ax[0][0], show=False, cmap='inferno', fontsize=7)

_ = ax[1][0].hist(adata.obs.cell_time_uncertain, bins=bin, color='black', alpha=0.9)
ax[1][0].set_xlabel("shared time\nstandard deviation", fontsize=7)
select = adata.obs['cell_time_uncertain'] > np.quantile(adata.obs['cell_time_uncertain'], 0.9)
sns.kdeplot(adata.obsm['X_umap'][:, 0][select],
            adata.obsm['X_umap'][:, 1][select], ax=ax[0][0], levels=3, fill=False)

adata.obs['vector_field_rayleigh_test'] = fdri
scv.pl.scatter(adata, c='vector_field_rayleigh_test', ax=ax[0][1], show=False, cmap='inferno_r', fontsize=7)
select = adata.obs['vector_field_rayleigh_test'] > np.quantile(adata.obs['vector_field_rayleigh_test'], 0.9)
sns.kdeplot(adata.obsm['X_umap'][:, 0][select],
            adata.obsm['X_umap'][:, 1][select], ax=ax[0][1], levels=3, fill=False)

_ = ax[1][1].hist(adata.obs.vector_field_rayleigh_test, bins=bin, color='black', alpha=0.9)
ax[1][1].set_xlabel("vector field\nrayleigh test fdr", fontsize=7)
ax[1][1].text(0.08, 2000, "<1%% FDR:%s%%" % int((fdri < 0.01).sum()/fdri.shape[0]*100), fontsize=7)
ax[1][1].text(0.08, 1500, "<5%% FDR:%s%%" % int((fdri < 0.05).sum()/fdri.shape[0]*100), fontsize=7)

fig.subplots_adjust(hspace=0.3, wspace=0.7, left=0.01, right=0.8, top=0.92, bottom=0.17)
plot_state_uncertainty(pos, adata, kde=False, data='denoised', top_percentile=0.9, ax=ax[0][2])

_ = ax[1][2].hist(adata.obs['state_uncertain'], bins=bin, color='black', alpha=0.9)
ax[1][2].set_xlabel("averaged state uncertainty", fontsize=7)
select = adata.obs['state_uncertain'] > np.quantile(adata.obs['state_uncertain'], 0.9)
sns.kdeplot(adata.obsm['X_umap'][:, 0][select],
            adata.obsm['X_umap'][:, 1][select], ax=ax[0][2], levels=3, fill=False)

ax[1][0].set_ylabel("Frequency", fontsize=7)
fig.tight_layout()
生活很好,有你更好

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。

如有侵权,请联系 cloudcommunity@tencent.com 删除。

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • 作者,Evil Genius
  • 速率
  • 实现目标
  • 整体框架
    • 1、前处理
      • 2、训练模型
        • 3、速率估计
          • 4、可视化
            • 方法示例
              • 模型训练
                • 可视化
                • 图片排列
                  • More uncertainty evaluation metrics
                  • 生活很好,有你更好
                  领券
                  问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档