资料:
Monocle 3被重新设计用于分析大型、复杂的单细胞数据集,核心算法具有高度可扩展性,可以处理百万级别单细胞数据。Monocle 3增加了一些强大的新功能:
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(version = "3.10")
# 首先安装依赖
BiocManager::install(c('BiocGenerics', 'DelayedArray', 'DelayedMatrixStats',
'limma', 'S4Vectors', 'SingleCellExperiment',
'SummarizedExperiment', 'batchelor', 'Matrix.utils'))
# monocle3安装
install.packages("devtools")
devtools::install_github('cole-trapnell-lab/leidenbase')
devtools::install_github('cole-trapnell-lab/monocle3')
# 加载测试是否成功
library(monocle3)
monocle3的工作原理流程:

### ======== Store data in a cell_data_set object
cds <- new_cell_data_set(expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 100)
### ======== Remove batch effects(可选)
## Step 2: Remove batch effects with cell alignment
cds <- align_cds(cds, alignment_group = "batch")
### ======== Cluster your cells
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds)
## Step 4: Cluster the cells
cds <- cluster_cells(cds)
### ======== Order cells in pseudotime along a trajectory(可选)
## Step 5: Learn a graph
cds <- learn_graph(cds)
## Step 6: Order cells
cds <- order_cells(cds)
plot_cells(cds)
### ======== Perform differential expression analysis(可选)
# With regression:
gene_fits <- fit_models(cds, model_formula_str = "~embryo.time")
fit_coefs <- coefficient_table(gene_fits)
emb_time_terms <- fit_coefs %>% filter(term == "embryo.time")
emb_time_terms <- emb_time_terms %>% mutate(q_value = p.adjust(p_value))
sig_genes <- emb_time_terms %>% filter (q_value < 0.05) %>% pull(gene_short_name)
# With graph autocorrelation:
pr_test_res <- graph_test(cds, neighbor_graph="principal_graph", cores=4)
pr_deg_ids <- row.names(subset(pr_test_res, q_value < 0.05))
monocle3使用基因表达矩阵作为输入:
Monocle使用cell_data_set类对象保存单细胞表达数据。该类派生自Bioconductor singlecellexperexperiment类,提供了一个公共接口,这个类需要三个输入文件:
创建方式:
# Load the data
expression_matrix <- readRDS(url("http://staff.washington.edu/hpliner/data/cao_l2_expression.rds"))
cell_metadata <- readRDS(url("http://staff.washington.edu/hpliner/data/cao_l2_colData.rds"))
gene_annotation <- readRDS(url("http://staff.washington.edu/hpliner/data/cao_l2_rowData.rds"))
# Make the CDS object
cds <- new_cell_data_set(expression_matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
output结构:10x_data/outs/filtered_feature_bc_matrix/
# Provide the path to the Cell Ranger output.
cds <- load_cellranger_data("~/Downloads/10x_data")
# or
cds <- load_mm_data(mat_path = "~/Downloads/matrix.mtx",
feature_anno_path = "~/Downloads/features.tsv",
cell_anno_path = "~/Downloads/barcodes.tsv")
note:可以不需要转换稀疏矩阵为matrix对象

cds <- new_cell_data_set(as(umi_matrix, "sparseMatrix"),
cell_metadata = cell_metadata,
gene_metadata = gene_metadata)
# make a fake second cds object for demonstration
cds2 <- cds[1:100,]
big_cds <- combine_cds(list(cds, cds2))
[1] "approximate graph abstraction": https://www.biorxiv.org/content/early/2017/10/25/208819