











import imageio as io
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import tifffile
from tqdm.notebook import tqdm
import pathlib
from cellpose import models, core
import json
import cv2
import glob
# experiment paths
visium_path = "VisiumHD_data/LJI_001_visiumhd_SI"
experiment = "SI_d8pi"
output_path = "visium_hd/segmentation"
experiment_spatial_path = os.path.join(
visium_path, "count_outputs", f"visium_hd_count_{experiment}", "outs", "spatial"
)
highres_image = glob.glob(os.path.join(experiment_spatial_path, "*used.tif"))[0]
img_fpath = pathlib.Path(highres_image)
img = io.imread(highres_image)
####Write out chunks of the histology image to train a cellpose model
chunk_size = (500, 500)
for i in range(0, img.shape[1], int(chunk_size[1])):
for j in range(0, img.shape[0], int(chunk_size[0])):
# Define the coordinates for cropping
left = i
upper = j
right = i + chunk_size[1]
lower = j + chunk_size[0]
# Crop the img chunk
chunk = img[upper:lower, left:right]
cv2.imwrite(
os.path.join(output_path, "histology_images", f"chunk_{i}_{j}.png"),
chunk,
)
print(os.path.join(output_path, "histology_images", f"chunk_{i}_{j}.png"))import imageio as io
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import tifffile
from tqdm.notebook import tqdm
import pathlib
import geopandas as gpd
from cellpose import models, core
import json
from shapely.geometry import Polygon, Point
import scanpy as sc
import glob
experiment = "SI_d90pi"
visium_path = "VisiumHD_data/LJI_001_visiumhd_SI"
# model path
mp = r"visium_hd/segmentation/models/day8_SI_DAPI"
experiment_spatial_path = os.path.join(
visium_path, "count_outputs", f"visium_hd_count_{experiment}", "outs", "spatial"
)
highres_image = glob.glob(os.path.join(experiment_spatial_path, "*used.tif"))[0]
import PIL
PIL.Image.MAX_IMAGE_PIXELS = 2382717010
def read_dapi_image(path: str, downscale_factor: int = 2) -> np.ndarray:
img_fpath = pathlib.Path(path)
img = io.imread(img_fpath)
print(img.shape)
return downscale_image(img, downscale_factor=downscale_factor)
def downscale_image(img: np.ndarray, downscale_factor: int = 2) -> np.ndarray:
# Calculate the amount
#
# of padding needed for each axis
pad_height = (downscale_factor - img.shape[0] % downscale_factor) % downscale_factor
pad_width = (downscale_factor - img.shape[1] % downscale_factor) % downscale_factor
new_shape = (img.shape[0] + pad_height, img.shape[1] + pad_width, img.shape[2])
new_image = np.zeros(new_shape, dtype=img.dtype)
for channel in range(img.shape[2]):
new_image[:, :, channel] = np.pad(
img[:, :, channel], ((0, pad_height), (0, pad_width)), mode="constant"
)
# Pad the array with zeros
return new_image
maxed_visium = read_dapi_image(highres_image, downscale_factor=1)
def run_cellpose(
img: np.ndarray, model_path: str
) -> (np.ndarray, np.ndarray, np.ndarray):
use_GPU = core.use_gpu()
model = models.CellposeModel(gpu=use_GPU, pretrained_model=model_path)
channels = [0, 0]
masks, flows, styles = model.eval(
[img],
channels=channels,
diameter=model.diam_labels,
flow_threshold=1,
cellprob_threshold=0,
batch_size=4,
)
return (masks, flows, styles)
chunk_per_axis = 2
masks_top_left, flows, styles = run_cellpose(
maxed_visium[
: np.shape(maxed_visium)[0] // chunk_per_axis,
: np.shape(maxed_visium)[1] // chunk_per_axis,
:,
],
model_path=mp,
)
masks_top_right, flows, styles = run_cellpose(
maxed_visium[
: np.shape(maxed_visium)[0] // chunk_per_axis,
np.shape(maxed_visium)[1] // chunk_per_axis :,
:,
],
model_path=mp,
)
masks_bottom_left, flows, styles = run_cellpose(
maxed_visium[
np.shape(maxed_visium)[0] // chunk_per_axis :,
: np.shape(maxed_visium)[1] // chunk_per_axis,
:,
],
model_path=mp,
)
masks_bottom_right, flows, styles = run_cellpose(
maxed_visium[
np.shape(maxed_visium)[0] // chunk_per_axis :,
np.shape(maxed_visium)[1] // chunk_per_axis :,
:,
],
model_path=mp,
)
####Plot and save segmentation
constant = 1000000
full_mask = np.zeros_like(maxed_visium[:, :, 0], dtype=np.uint32)
full_mask[
: np.shape(maxed_visium)[0] // chunk_per_axis,
: np.shape(maxed_visium)[1] // chunk_per_axis,
] = masks_top_left[0]
full_mask[
: np.shape(maxed_visium)[0] // chunk_per_axis,
np.shape(maxed_visium)[1] // chunk_per_axis :,
] = masks_top_right[0] + (constant)
full_mask[
np.shape(maxed_visium)[0] // chunk_per_axis :,
: np.shape(maxed_visium)[1] // chunk_per_axis,
] = masks_bottom_left[0] + (constant * 2)
full_mask[
np.shape(maxed_visium)[0] // chunk_per_axis :,
np.shape(maxed_visium)[1] // chunk_per_axis :,
] = masks_bottom_right[0] + (constant * 3)
# when fullmask % constant == 0, set the value to 0
full_mask = np.where(full_mask % constant == 0, 0, full_mask)
# save np array as png
tifffile.imsave(
f"visium_hd/segmentation/segmentation_outputs/{experiment}_segmentation.png",
full_mask,
)
####Assign barcodes to cells
full_mask = tifffile.imread(
f"visium_hd/segmentation/segmentation_outputs/{experiment}_segmentation.png"
)
dir_base = "VisiumHD_data/LJI_001_visiumhd_SI/count_outputs/visium_hd_count_SI_d90pi/outs/binned_outputs/square_002um/"
# Load Visium HD data
raw_h5_file = dir_base + "filtered_feature_bc_matrix.h5"
adata = sc.read_10x_h5(raw_h5_file)
# Load the Spatial Coordinates
tissue_position_file = dir_base + "spatial/tissue_positions.parquet"
df_tissue_positions = pd.read_parquet(tissue_position_file)
# Set the index of the dataframe to the barcodes
df_tissue_positions = df_tissue_positions.set_index("barcode")
# Create an index in the dataframe to check joins
df_tissue_positions["index"] = df_tissue_positions.index
# Adding the tissue positions to the meta data
adata.obs = pd.merge(adata.obs, df_tissue_positions, left_index=True, right_index=True)
# Create a GeoDataFrame from the DataFrame of coordinates
geometry = [
Point(xy)
for xy in zip(
df_tissue_positions["pxl_col_in_fullres"],
df_tissue_positions["pxl_row_in_fullres"],
)
]
gdf_coordinates = gpd.GeoDataFrame(df_tissue_positions, geometry=geometry)
plt.imshow(full_mask)
plt.scatter(
list(adata.obs["pxl_col_in_fullres"].values)[::10],
list(adata.obs["pxl_row_in_fullres"].values)[::10],
s=1,
alpha=0.01,
c="r",
)
plt.show()
####assigning capture regions to individual nuclei
point_mask = (
gdf_coordinates["pxl_row_in_fullres"].values.astype(int) < np.shape(full_mask)[0]
) & (gdf_coordinates["pxl_col_in_fullres"].values.astype(int) < np.shape(full_mask)[1])
cells = full_mask[
gdf_coordinates["pxl_row_in_fullres"].values.astype(int)[point_mask],
gdf_coordinates["pxl_col_in_fullres"].values.astype(int)[point_mask],
]
gdf_coordinates = gdf_coordinates[point_mask]
gdf_coordinates["cells"] = cells
assigned_regions = gdf_coordinates[gdf_coordinates["cells"] > 0]
adata.obs = adata.obs.merge(
assigned_regions[["cells"]], left_index=True, right_index=True, how="left"
)
adata = adata[~pd.isna(adata.obs["cells"])]
adata.obs["cells"] = adata.obs["cells"].astype(int)
####Summing the transcript counts in capture regions assigned to each nucleus
import anndata
from scipy import sparse
# Group the data by unique nucleous IDs
groupby_object = adata.obs.groupby(["cells"], observed=True)
# Extract the gene expression counts from the AnnData object
counts = adata.X
spatial_coords = adata.obs[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values
# Obtain the number of unique nuclei and the number of genes in the expression data
N_groups = groupby_object.ngroups
N_genes = counts.shape[1]
# Initialize a sparse matrix to store the summed gene counts for each nucleus
summed_counts = sparse.lil_matrix((N_groups, N_genes))
# Lists to store the IDs of polygons and the current row index
polygon_id = []
cell_coords = []
row = 0
# Iterate over each unique polygon to calculate the sum of gene counts.
for polygons, idx_ in groupby_object.indices.items():
summed_counts[row] = counts[idx_].sum(0)
cell_coords.append(np.mean(spatial_coords[idx_], axis=0))
row += 1
polygon_id.append(polygons)
cell_coords = np.array(cell_coords)
# Create and AnnData object from the summed count matrix
summed_counts = summed_counts.tocsr()
grouped_adata = anndata.AnnData(
X=summed_counts,
obs=pd.DataFrame(polygon_id, columns=["cells"], index=polygon_id),
var=adata.var,
)
%store grouped_adata
# Calculate quality control metrics for the AnnData object
sc.pp.calculate_qc_metrics(grouped_adata, inplace=True)
grouped_adata.obsm["X_spatial"] = cell_coords
sc.set_figure_params(dpi=300)
sc.pl.embedding(
grouped_adata, basis="X_spatial", color="Acta2", alpha=1, vmax=1, cmap="Blues"
)
####Save out the single cell visiumHD adata
grouped_adata.write(
f"visium_hd/segmentation/segmentation_outputs/{experiment}_single_cell_adata.h5ad"
)原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。