This procedure avoids calling false positive CNV regions due to cell-type specific expression of clustered gene regions (e.g. Immunoglobulin- or HLA genes in different immune cell types).
#! /bin/python
### 20240618
### zhaoyunfei
### https://infercnvpy.readthedocs.io/en/latest/notebooks/tutorial_3k.html
import pandas as pd
import numpy as np
import sklearn
import scanpy as sc
import infercnvpy as cnv
import matplotlib.pyplot as plt
import warnings
import argparse
warnings.simplefilter("ignore")
sc.settings.set_figure_params(figsize=(5, 5))
parse=argparse.ArgumentParser(description='scvelo')
parse.add_argument('--input',help='the anndata',type=str,required = True)
parse.add_argument('--outdir',help='the analysis dir',type=str)
parse.add_argument('--sample',help='the sample name',type=str,required = True)
parse.add_argument('--ref',help='the ref celltype;eg T,B',type=str,required = True)
parse.add_argument('--windiow',help='the bin size',type=int,default = 250)
argv = parse.parse_args()
adata = argv.input
outdir = argv.outdir
sample = argv.sample
ref = argv.ref
windiow = argv.windiow
adata = sc.read(adata)
sc.pp.normalize_total(adata)
adata.var['symbol'] = adata.var.index
adata.var.index = adata.var['gene_ids']
gene_order = pd.read_csv('/home/samples/DB/scRNA/inferCNV/gene_order.txt',sep = '\t',index_col = 0)
gene_intersect = list(set(adata.var.index) & set(gene_order.index))
adata = adata[:,gene_intersect]
gene_order = gene_order.loc[gene_intersect,:]
adata.var.index.name = 'gene'
tmp = pd.merge(adata.var,gene_order,on = 'gene_ids')
tmp.index = tmp['symbol']
adata.var = tmp
adata.var['chromosome'] = 'chr' + adata.var['chr']
adata.var['ensg'] = adata.var['gene_ids']
adata.var['start'] = adata.var['Start']
adata.var['end'] = adata.var['End']
reference = ref.split(',')
cnv.tl.infercnv(adata,reference_key="celltype",reference_cat=reference,window_size=windiow)
cnv.pl.chromosome_heatmap(adata, groupby="celltype")
plt.savefig(outdir + '/' + sample + '.cnv.heatmap.png',bbox_inches = 'tight')
####cnv cluster
cnv.tl.pca(adata)
cnv.pp.neighbors(adata)
cnv.tl.leiden(adata)
cnv.pl.chromosome_heatmap(adata, groupby="cnv_leiden", dendrogram=True)
plt.savefig(outdir + '/' + sample + '.cnv.leiden.heatmap.png',bbox_inches = 'tight')
####UMAP plot of CNV profiles
cnv.tl.umap(adata)
cnv.tl.cnv_score(adata)
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(11, 11))
ax4.axis("off")
cnv.pl.umap(adata,color="cnv_leiden",legend_loc="on data",legend_fontoutline=2,ax=ax1,show=False,)
cnv.pl.umap(adata, color="cnv_score", ax=ax2, show=False)
cnv.pl.umap(adata, color="celltype", ax=ax3)
plt.savefig(outdir + '/' + sample + '.cnv.celltype.umap.CNV.png',bbox_inches = 'tight')
####visualize the CNV score and clusters on the transcriptomics-based UMAP plot
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(12, 11), gridspec_kw={"wspace": 0.5})
ax4.axis("off")
sc.pl.umap(adata, color="cnv_leiden", ax=ax1, show=False)
sc.pl.umap(adata, color="cnv_score", ax=ax2, show=False)
sc.pl.umap(adata, color="celltype", ax=ax3)
plt.savefig(outdir + '/' + sample + '.cnv.celltype.umap.RNA.png',bbox_inches = 'tight')
####Classifying tumor cells
tumor = []
for i in adata.obs['cnv_score']:
if i > 0.2 :
tumor.append('tumor')
else :
tumor.append('normal')
adata.obs["cnv_status"] = tumor
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), gridspec_kw={"wspace": 0.5})
cnv.pl.umap(adata, color="cnv_status", ax=ax1, show=False)
sc.pl.umap(adata, color="cnv_status", ax=ax2)
plt.savefig(outdir + '/' + sample + '.cnv.status.png',bbox_inches = 'tight')
cnv.pl.chromosome_heatmap(adata[adata.obs["cnv_status"] == "tumor", :])
plt.savefig(outdir + '/' + sample + 'tumor.cnv.status.png',bbox_inches = 'tight')
cnv.pl.chromosome_heatmap(adata[adata.obs["cnv_status"] == "normal", :])
plt.savefig(outdir + '/' + sample + 'normal.cnv.status.png',bbox_inches = 'tight')
adata.write(outdir + '/' + sample + '.h5ad')
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。