CAF 是肿瘤微环境中的关键成分,能够通过细胞间相互作用影响免疫逃逸、药物耐受和转移。然而,CAF 的空间异质性与其生物功能尚未完全理解。癌症相关成纤维细胞(CAF)在肿瘤微环境(TME)中发挥着关键作用。2025年3月27日,Cancer Cell上在线发表了题为Conserved spatial subtypes and cellular neighborhoods of cancer-associated fibroblasts revealed by single-cell spatial multi-omics的最新研究成果,该研究通过整合来自10种癌症、7个空间转录组及蛋白组学平台的超过1400万个细胞的单细胞空间多组学数据中的肿瘤相关成纤维细胞(CAFs),成功识别出四种具有不同空间分布、细胞组成和功能特征的CAFs亚型。这些亚型在不同癌症类型中普遍存在,并且与肿瘤微环境特性、免疫细胞浸润及患者生存期密切相关。为理解CAFs在肿瘤中的作用以及开发针对CAFs的治疗策略提供了重要依据。
CAF空间亚型识别流程
代码实现
import numpy as np
import pandas as pd
import scanpy as sc
from sklearn.neighbors import KDTree
from sklearn.decomposition import NMF
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
# 函数封装
def identify_spatial_subtypes(adata, celltype_name='Fibroblasts', celltype_col='celltype', sample_col='sample', radius=80, n_components=5, resolution=0.1):
"""
step1: 对每个celltype_name细胞计算其radius范围内邻域细胞类型构成,
返回一个 cell × cell-type 比例的 DataFrame。
"""
all_vecs = []
all_indices = []
for sample in adata.obs[sample_col].unique():
adata_sample = adata[adata.obs[sample_col] == sample, :]
coords = adata_sample.obsm['spatial']
tree = KDTree(coords)
caf_mask = adata_sample.obs[celltype_col] == celltype_name
caf_indices = np.where(caf_mask)[0]
caf_coords = coords[caf_mask]
for i, idx in enumerate(caf_indices):
center = coords[idx]
ind = tree.query_radius(center.reshape(1, -1), r=radius)[0]
neighbor_types = adata_sample.obs.iloc[ind][celltype_col].value_counts(normalize=True)
vec = pd.Series(0, index=adata.obs[celltype_col].unique())
vec[neighbor_types.index] = neighbor_types.values
all_vecs.append(vec)
all_indices.append(adata_sample.obs_names[idx])
neighbor_matrix = pd.DataFrame(all_vecs, index=all_indices).fillna(0)
# print(neighbor_matrix)
"""
step2: NMF分解
"""
model = NMF(n_components=n_components, init='nndsvda', random_state=0)
W = model.fit_transform(neighbor_matrix.values) # cell × factor
H = model.components_ # factor × cell-type
W_df = pd.DataFrame(W, index=neighbor_matrix.index, columns=[f'Factor_{i}' for i in range(n_components)])
H_df = pd.DataFrame(H, columns=neighbor_matrix.columns, index=[f'Factor_{i}' for i in range(n_components)])
# print("W_df", W_df)
# print("H_df", H_df)
"""
step3: 识别空间亚型
"""
caf_adata = adata[adata.obs_names.isin(W_df.index)].copy()
caf_adata.obsm['X_nmf'] = W_df.loc[caf_adata.obs_names].values
sc.pp.neighbors(caf_adata, n_neighbors=20, use_rep='X_nmf', metric='cosine')
sc.tl.leiden(caf_adata, resolution=resolution)
# 写回主 AnnData
adata.obs.loc[caf_adata.obs_names, celltype_name+'_sub'] = caf_adata.obs['leiden'].astype(str)
"""
step4: 统计绘图
"""
subtypes = adata.obs.loc[neighbor_matrix.index, celltype_name+'_sub']
neighbor_matrix_with_subtype = neighbor_matrix.copy()
neighbor_matrix_with_subtype['subtype'] = subtypes
avg_neighbor_composition = neighbor_matrix_with_subtype.groupby('subtype').mean()
# print(avg_neighbor_composition)
n_subtypes = avg_neighbor_composition.shape[0]
fig, axes = plt.subplots(1, n_subtypes, figsize=(n_subtypes * 3, 3))
# 自动生成颜色
all_cell_types = avg_neighbor_composition.columns
cmap = plt.get_cmap('tab20')
cell_type_colors = {cell: cmap(i) for i, cell in enumerate(all_cell_types)}
subtype_names = {i: f"s{i+1}-CAFs" for i in range(n_subtypes)}
for i, (subtype, row) in enumerate(avg_neighbor_composition.iterrows()):
ax = axes[i]
values = row.values
labels = row.index
colors = [cell_type_colors[ct] for ct in labels]
wedges, texts = ax.pie(
values, colors=colors, startangle=90, counterclock=False,
wedgeprops={'linewidth': 0.5, 'edgecolor': 'white'}
)
dominant_type = labels[values.argmax()]
percent = values.max() * 100
ax.set_title(f"{percent:.1f}% {dominant_type}\n{subtype_names.get(subtype, str(subtype))}", fontsize=10)
plt.tight_layout()
return adata
4种空间成纤维细胞亚群