Ken Furudate
import scanpy as sc
import anndata as ad
import squidpy as sq
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib.font_manager
plt.rcParams['font.sans-serif'] = ['Arial'] + plt.rcParams['font.sans-serif']
plt.rcParams["font.size"] = 20
import os
import seaborn as sns
from pathlib import Path
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
import warnings
warnings.filterwarnings('ignore')
sc.logging.print_header()
print(f"squidpy=={sq.__version__}")
from anndata import AnnData
import skimage
import tangram as tg
datadir="/data/spatial/"
in_f = "integrated_data.h5ad"
data = sc.read_h5ad(datadir + in_f)
data
in_f1 = "SampleA_cluster_patho.annot_region.txt"
in_f2 = "SampleB_cluster_patho.annot_region.txt"
in_f3 = "SampleC_cluster_patho.annot_region.txt"
in_f4 = "SampleD_cluster_patho.annot_region.txt"
a_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f1), index_col="Barcode")
b_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f2), index_col="Barcode")
c_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f3), index_col="Barcode")
d_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f4), index_col="Barcode")
adata.obs['pathology'] = a_annot["patho_diag"]
adata.obs['category'] = a_annot["category"]
adata.obs['cluster'] = a_annot["graph-based"]
bdata.obs['pathology'] = b_annot["patho_diag"]
bdata.obs['category'] = b_annot["category"]
bdata.obs['cluster'] = b_annot["graph-based"]
cdata.obs['pathology'] = c_annot["patho_diag"]
cdata.obs['category'] = c_annot["category"]
cdata.obs['cluster'] = c_annot["graph-based"]
ddata.obs['pathology'] = d_annot["patho_diag"]
ddata.obs['category'] = d_annot["category"]
ddata.obs['cluster'] = d_annot["graph-based"]
sample_name1="A"
sample_name2="B"
sample_name3="C"
sample_name4="D"
adata.obs['sample'] = sample_name1
bdata.obs['sample'] = sample_name2
cdata.obs['sample'] = sample_name3
ddata.obs['sample'] = sample_name4
adata_tumor = adata[adata.obs['category']=="tumor"]
bdata_tumor = bdata[bdata.obs['category']=="tumor"]
cdata_tumor = cdata[cdata.obs['category']=="tumor"]
ddata_tumor = ddata[ddata.obs['category']=="tumor"]
sc.pp.filter_cells(adata_tumor, min_counts=500)
sc.pp.filter_cells(bdata_tumor, min_counts=500)
sc.pp.filter_cells(cdata_tumor, min_counts=500)
sc.pp.filter_cells(ddata_tumor, min_counts=500)
sc.pp.filter_genes(adata_tumor, min_cells=2)
sc.pp.filter_genes(bdata_tumor, min_cells=2)
sc.pp.filter_genes(cdata_tumor, min_cells=2)
sc.pp.filter_genes(ddata_tumor, min_cells=2)
def set_param(input_data, sample):
scale = input_data.uns['spatial'][f"{sample}"]['scalefactors']['tissue_hires_scalef']
img = sq.im.ImageContainer(input_data.uns['spatial'][f"{sample}"]['images']['hires'],
scale=scale,
library_id=f"{sample}")
img.show()
return scale, img
scale_a, img_a = set_param(adata_tumor, sample_name1)
scale_b, img_b = set_param(bdata_tumor, sample_name2)
scale_c, img_c = set_param(cdata_tumor, sample_name3)
scale_d, img_d = set_param(ddata_tumor, sample_name4)
fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_a.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_a["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_b["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_a["image"][:, :, 0, 0]<0.6)
plt.tight_layout()
fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_b.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_b["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_b["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_b["image"][:, :, 0, 0]<0.58)
plt.tight_layout()
fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_c.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_c["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_c["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_c["image"][:, :, 0, 0]<0.78)
plt.tight_layout()
fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_d.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_d["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_d["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_d["image"][:, :, 0, 0]<0.65)
plt.tight_layout()
def segmentation_features(img, input_data, thresh):
sq.im.process(img=img, layer="image", method="smooth")
sq.im.segment(
img=img,
layer="image",
method="watershed",
channel=0,
thresh=thresh,
geq=False
)
# define image layer to use for segmentation
features_kwargs = {
"segmentation": {
"label_layer": "segmented_watershed",
"props": ["label", "centroid"],
"channels": [1, 2],
}
}
# calculate segmentation features
sq.im.calculate_image_features(
adata=input_data,
img=img,
layer="image",
key_added="image_features",
features_kwargs=features_kwargs,
features="segmentation",
mask_circle=True,
n_jobs=2
)
segmentation_features(img_a, adata_tumor, 0.6)
segmentation_features(img_b, bdata_tumor, 0.58)
segmentation_features(img_c, cdata_tumor, 0.78)
segmentation_features(img_d, ddata_tumor, 0.65)
adata_tumor.obs["cell_count"] = adata_tumor.obsm["image_features"]["segmentation_label"]
bdata_tumor.obs["cell_count"] = bdata_tumor.obsm["image_features"]["segmentation_label"]
cdata_tumor.obs["cell_count"] = cdata_tumor.obsm["image_features"]["segmentation_label"]
ddata_tumor.obs["cell_count"] = ddata_tumor.obsm["image_features"]["segmentation_label"]
in_f = "oscc_sc_data.h5ad"
adata_sc = sc.read_h5ad(datadir + in_f)
sc.tl.rank_genes_groups(adata_sc, groupby="label", method="wilcoxon", use_raw=False)
markers_df = pd.DataFrame(adata_sc.uns["rank_genes_groups"]["names"]).iloc[0:100, :]
markers_df
genes_sc = np.unique(markers_df.melt().value.values)
genes_st = adata_tumor.var_names.values
markers = list(set(genes_sc).intersection(set(genes_st)))
len(markers)
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
device
data_ = None
data_ = bdata_tumor.copy()
ad_map = None
annotation_list = list()
df_all_genes = pd.DataFrame()
tg.pp_adatas(adata_sc, data_, genes=markers)
ad_map = tg.map_cells_to_space(adata_sc=adata_sc, # scRNA-seq
adata_sp=data_, # spatial data
mode="constrained",
cluster_label='label', # .obs field w cell types
target_count=data_.obs.cell_count.sum(),
density_prior=np.array(data_.obs.cell_count) / data_.obs.cell_count.sum(),
scale=True,
num_epochs=10000,
device=device,
learning_rate=0.1,
random_state=42
)
tg.project_cell_annotations(adata_map=ad_map, adata_sp=data_, annotation="label", threshold=0.5)
annotation_list = list(pd.unique(adata_sc.obs['label']))
data_.obs = pd.concat([data_.obs, data_.obsm["tangram_ct_pred"]], axis=1)
sc.pl.spatial(data_,
color=annotation_list,
vmax="p85",
color_map="viridis",
legend_fontsize=20,
)
tg.plot_training_scores(ad_map, bins=20, alpha=.5)
ad_ge = tg.project_genes(adata_map=ad_map, adata_sc=adata_sc)
df_all_genes = tg.compare_spatial_geneexp(ad_ge, data_, adata_sc)
tg.plot_auc(df_all_genes)
tg.create_segment_cell_df(data_)
tg.count_cell_annotations(
ad_map,
adata_sc,
data_,
annotation="label",
)
data_segment = tg.deconvolve_cell_annotations(data_)
sc.pl.spatial(
data_segment[(data_segment.obs["cluster"] == "CRC" ) | (data_segment.obs["cluster"] == "CAF") | (data_segment.obs["cluster"] == "T cell")],
color="cluster",
size=0.05,
show=True,
frameon=True,
alpha_img=0.05,
legend_fontsize=20,
)
plt.show()