Ken Furudate

import scanpy as sc
import anndata as ad
import squidpy as sq

import numpy as np
import pandas as pd

import matplotlib as mpl
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib.font_manager
plt.rcParams['font.sans-serif'] = ['Arial'] + plt.rcParams['font.sans-serif']
plt.rcParams["font.size"] = 20

import os
import seaborn as sns
from pathlib import Path

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import warnings
warnings.filterwarnings('ignore')

sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

from anndata import AnnData

import skimage
import tangram as tg

/usr/local/lib/python3.7/dist-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
  defaults = yaml.load(f)

scanpy==1.9.1 anndata==0.8.0 umap==0.5.3 numpy==1.21.6 scipy==1.4.1 pandas==1.3.5 scikit-learn==1.0.2 statsmodels==0.13.2 python-igraph==0.9.11 pynndescent==0.5.7
squidpy==1.1.2

datadir="/data/spatial/"

in_f = "integrated_data.h5ad"

data = sc.read_h5ad(datadir + in_f)
data

AnnData object with n_obs × n_vars = 3637 × 4000
    obs: 'in_tissue', 'array_row', 'array_col', 'imagecol', 'imagerow', 'pathology', 'category', 'cluster', 'sample', 'n_counts', 'batch', '_scvi_batch', '_scvi_labels', 'leiden', 'sample_density', '_scvi_raw_norm_scaling'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells-0-0', 'n_cells-1-0', 'n_cells-1', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches'
    uns: '_scvi', 'dendrogram_leiden', 'hvg', 'leiden', 'leiden_colors', 'neighbors', 'pathology_colors', 'sample_colors', 'sample_density_params', 'umap', 'wilcoxon'
    obsm: 'X_pca', 'X_scvi', 'X_umap', 'spatial'
    layers: 'counts', 'scvi_expr'
    obsp: 'connectivities', 'distances'

in_f1 = "SampleA_cluster_patho.annot_region.txt"
in_f2 = "SampleB_cluster_patho.annot_region.txt"
in_f3 = "SampleC_cluster_patho.annot_region.txt"
in_f4 = "SampleD_cluster_patho.annot_region.txt"

a_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f1), index_col="Barcode")
b_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f2), index_col="Barcode")
c_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f3), index_col="Barcode")
d_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f4), index_col="Barcode")

adata.obs['pathology'] = a_annot["patho_diag"]
adata.obs['category'] = a_annot["category"]
adata.obs['cluster'] = a_annot["graph-based"]

bdata.obs['pathology'] = b_annot["patho_diag"]
bdata.obs['category'] = b_annot["category"]
bdata.obs['cluster'] = b_annot["graph-based"]

cdata.obs['pathology'] = c_annot["patho_diag"]
cdata.obs['category'] = c_annot["category"]
cdata.obs['cluster'] = c_annot["graph-based"]

ddata.obs['pathology'] = d_annot["patho_diag"]
ddata.obs['category'] = d_annot["category"]
ddata.obs['cluster'] = d_annot["graph-based"]

sample_name1="A"
sample_name2="B"
sample_name3="C"
sample_name4="D"

adata.obs['sample'] = sample_name1
bdata.obs['sample'] = sample_name2
cdata.obs['sample'] = sample_name3
ddata.obs['sample'] = sample_name4

adata_tumor = adata[adata.obs['category']=="tumor"]
bdata_tumor = bdata[bdata.obs['category']=="tumor"]
cdata_tumor = cdata[cdata.obs['category']=="tumor"]
ddata_tumor = ddata[ddata.obs['category']=="tumor"]

sc.pp.filter_cells(adata_tumor, min_counts=500)
sc.pp.filter_cells(bdata_tumor, min_counts=500)
sc.pp.filter_cells(cdata_tumor, min_counts=500)
sc.pp.filter_cells(ddata_tumor, min_counts=500)

sc.pp.filter_genes(adata_tumor, min_cells=2)
sc.pp.filter_genes(bdata_tumor, min_cells=2)
sc.pp.filter_genes(cdata_tumor, min_cells=2)
sc.pp.filter_genes(ddata_tumor, min_cells=2)

filtered out 4 cells that have less than 500 counts

def set_param(input_data, sample):
  scale = input_data.uns['spatial'][f"{sample}"]['scalefactors']['tissue_hires_scalef']
  img = sq.im.ImageContainer(input_data.uns['spatial'][f"{sample}"]['images']['hires'],
                           scale=scale, 
                           library_id=f"{sample}")
  img.show()
  return scale, img

scale_a, img_a = set_param(adata_tumor, sample_name1)
scale_b, img_b = set_param(bdata_tumor, sample_name2)
scale_c, img_c = set_param(cdata_tumor, sample_name3)
scale_d, img_d = set_param(ddata_tumor, sample_name4)

Adding image layer `image`
Adding image layer `image`
Adding image layer `image`
Adding image layer `image`

fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_a.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_a["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_b["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_a["image"][:, :, 0, 0]<0.6)
plt.tight_layout()

fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_b.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_b["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_b["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_b["image"][:, :, 0, 0]<0.58)
plt.tight_layout()

fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_c.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_c["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_c["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_c["image"][:, :, 0, 0]<0.78)
plt.tight_layout()

fig, axes = plt.subplots(1, 4, figsize=(20, 4))
img_d.show("image", cmap="gray", ax=axes[0])
axes[1].imshow(img_d["image"][:, :, 0, 0])
_ = sns.histplot(np.array(img_d["image"]).flatten(), ax=axes[2])
axes[3].imshow(img_d["image"][:, :, 0, 0]<0.65)
plt.tight_layout()

def segmentation_features(img, input_data, thresh):
  sq.im.process(img=img, layer="image", method="smooth")
  sq.im.segment(
      img=img,
      layer="image",
      method="watershed",
      channel=0,
      thresh=thresh, 
      geq=False
      )

  # define image layer to use for segmentation
  features_kwargs = {
      "segmentation": {
        "label_layer": "segmented_watershed",
        "props": ["label", "centroid"],
        "channels": [1, 2],
        }
      }

# calculate segmentation features
  sq.im.calculate_image_features(
      adata=input_data,
      img=img,
      layer="image",
      key_added="image_features",
      features_kwargs=features_kwargs,
      features="segmentation",
      mask_circle=True,
      n_jobs=2
      )

segmentation_features(img_a, adata_tumor, 0.6)
segmentation_features(img_b, bdata_tumor, 0.58)
segmentation_features(img_c, cdata_tumor, 0.78)
segmentation_features(img_d, ddata_tumor, 0.65)

Processing image using `smooth` method
Adding image layer `image`
Finish (0:00:00)
Overwriting image layer `image_smooth`
Segmenting an image of shape `(1907, 2000, 1, 3)` using `SegmentationWatershed`
Adding image layer `image`
Finish (0:01:31)
Adding image layer `segmented_watershed`
Calculating features `['segmentation']` using `2` core(s)

Adding `adata.obsm['image_features']`
Finish (0:00:13)
Processing image using `smooth` method
Adding image layer `image`
Finish (0:00:00)
Overwriting image layer `image_smooth`
Segmenting an image of shape `(1789, 2000, 1, 3)` using `SegmentationWatershed`
Adding image layer `image`
Finish (0:01:01)
Adding image layer `segmented_watershed`
Calculating features `['segmentation']` using `2` core(s)

Adding `adata.obsm['image_features']`
Finish (0:00:04)
Processing image using `smooth` method
Adding image layer `image`
Finish (0:00:00)
Overwriting image layer `image_smooth`
Segmenting an image of shape `(1788, 2000, 1, 3)` using `SegmentationWatershed`
Adding image layer `image`
Finish (0:01:05)
Adding image layer `segmented_watershed`
Calculating features `['segmentation']` using `2` core(s)

Adding `adata.obsm['image_features']`
Finish (0:00:14)
Processing image using `smooth` method
Adding image layer `image`
Finish (0:00:00)
Overwriting image layer `image_smooth`
Segmenting an image of shape `(1978, 2000, 1, 3)` using `SegmentationWatershed`
Adding image layer `image`
Finish (0:01:29)
Adding image layer `segmented_watershed`
Calculating features `['segmentation']` using `2` core(s)

Adding `adata.obsm['image_features']`
Finish (0:00:07)

adata_tumor.obs["cell_count"] = adata_tumor.obsm["image_features"]["segmentation_label"]
bdata_tumor.obs["cell_count"] = bdata_tumor.obsm["image_features"]["segmentation_label"]
cdata_tumor.obs["cell_count"] = cdata_tumor.obsm["image_features"]["segmentation_label"]
ddata_tumor.obs["cell_count"] = ddata_tumor.obsm["image_features"]["segmentation_label"]

in_f = "oscc_sc_data.h5ad"
adata_sc = sc.read_h5ad(datadir + in_f)
sc.tl.rank_genes_groups(adata_sc, groupby="label", method="wilcoxon", use_raw=False)
markers_df = pd.DataFrame(adata_sc.uns["rank_genes_groups"]["names"]).iloc[0:100, :]
markers_df

ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:10)

genes_sc = np.unique(markers_df.melt().value.values)
genes_st = adata_tumor.var_names.values
markers = list(set(genes_sc).intersection(set(genes_st)))
len(markers)

338

import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

data_ = None
data_ = bdata_tumor.copy()

ad_map = None
annotation_list = list()
df_all_genes = pd.DataFrame()
  
tg.pp_adatas(adata_sc, data_, genes=markers)

ad_map = tg.map_cells_to_space(adata_sc=adata_sc,     # scRNA-seq
                               adata_sp=data_,          # spatial data 
                               mode="constrained",
                               cluster_label='label',    # .obs field w cell types
                               target_count=data_.obs.cell_count.sum(),
                               density_prior=np.array(data_.obs.cell_count) / data_.obs.cell_count.sum(),
                               scale=True,
                               num_epochs=10000,
                               device=device,
                               learning_rate=0.1,
                               random_state=42
                               )

tg.project_cell_annotations(adata_map=ad_map, adata_sp=data_, annotation="label", threshold=0.5)
annotation_list = list(pd.unique(adata_sc.obs['label']))
data_.obs = pd.concat([data_.obs, data_.obsm["tangram_ct_pred"]], axis=1)

sc.pl.spatial(data_,
              color=annotation_list,
              vmax="p85",
              color_map="viridis",
              legend_fontsize=20,
              )

tg.plot_training_scores(ad_map, bins=20, alpha=.5)
ad_ge = tg.project_genes(adata_map=ad_map, adata_sc=adata_sc)
df_all_genes = tg.compare_spatial_geneexp(ad_ge, data_, adata_sc)
tg.plot_auc(df_all_genes)

tg.create_segment_cell_df(data_)
tg.count_cell_annotations(
    ad_map,
    adata_sc,
    data_,
    annotation="label",
    )

data_segment = tg.deconvolve_cell_annotations(data_)
sc.pl.spatial(
    data_segment[(data_segment.obs["cluster"] == "CRC" ) | (data_segment.obs["cluster"] == "CAF") | (data_segment.obs["cluster"] == "T cell")],
    color="cluster",
    size=0.05,
    show=True,
    frameon=True,
    alpha_img=0.05,
    legend_fontsize=20,
    )

plt.show()

Score: 0.692, KL reg: 0.038, Count reg: 1858.553, Lambda f reg: 1131.242
Score: 0.850, KL reg: 0.003, Count reg: 0.584, Lambda f reg: 202.166
Score: 0.852, KL reg: 0.003, Count reg: 0.060, Lambda f reg: 76.547
Score: 0.853, KL reg: 0.003, Count reg: 0.360, Lambda f reg: 57.193
Score: 0.853, KL reg: 0.003, Count reg: 0.210, Lambda f reg: 47.202
Score: 0.853, KL reg: 0.003, Count reg: 0.036, Lambda f reg: 39.972
Score: 0.853, KL reg: 0.003, Count reg: 0.180, Lambda f reg: 34.049
Score: 0.853, KL reg: 0.003, Count reg: 0.425, Lambda f reg: 30.705
Score: 0.853, KL reg: 0.003, Count reg: 0.052, Lambda f reg: 28.616
Score: 0.853, KL reg: 0.003, Count reg: 0.091, Lambda f reg: 26.902
Score: 0.853, KL reg: 0.003, Count reg: 0.663, Lambda f reg: 24.871
Score: 0.853, KL reg: 0.003, Count reg: 0.160, Lambda f reg: 22.332
Score: 0.853, KL reg: 0.003, Count reg: 0.803, Lambda f reg: 20.649
Score: 0.853, KL reg: 0.003, Count reg: 0.306, Lambda f reg: 18.362
Score: 0.853, KL reg: 0.003, Count reg: 0.467, Lambda f reg: 17.860
Score: 0.854, KL reg: 0.003, Count reg: 0.097, Lambda f reg: 18.176
Score: 0.854, KL reg: 0.003, Count reg: 0.453, Lambda f reg: 17.781
Score: 0.854, KL reg: 0.003, Count reg: 0.199, Lambda f reg: 17.061
Score: 0.854, KL reg: 0.003, Count reg: 0.689, Lambda f reg: 15.504
Score: 0.854, KL reg: 0.003, Count reg: 0.829, Lambda f reg: 14.351
Score: 0.854, KL reg: 0.003, Count reg: 0.044, Lambda f reg: 14.575
Score: 0.854, KL reg: 0.003, Count reg: 0.089, Lambda f reg: 13.723
Score: 0.854, KL reg: 0.003, Count reg: 0.622, Lambda f reg: 12.515
Score: 0.854, KL reg: 0.003, Count reg: 0.424, Lambda f reg: 12.695
Score: 0.854, KL reg: 0.003, Count reg: 0.191, Lambda f reg: 12.913
Score: 0.854, KL reg: 0.003, Count reg: 0.544, Lambda f reg: 12.545
Score: 0.854, KL reg: 0.003, Count reg: 0.668, Lambda f reg: 11.423
Score: 0.854, KL reg: 0.003, Count reg: 0.071, Lambda f reg: 10.677
Score: 0.854, KL reg: 0.003, Count reg: 0.522, Lambda f reg: 9.563
Score: 0.854, KL reg: 0.003, Count reg: 0.388, Lambda f reg: 9.688
Score: 0.854, KL reg: 0.003, Count reg: 0.224, Lambda f reg: 8.851
Score: 0.854, KL reg: 0.003, Count reg: 0.158, Lambda f reg: 8.867
Score: 0.854, KL reg: 0.003, Count reg: 0.298, Lambda f reg: 7.769
Score: 0.854, KL reg: 0.003, Count reg: 0.224, Lambda f reg: 7.837
Score: 0.854, KL reg: 0.003, Count reg: 0.144, Lambda f reg: 7.912
Score: 0.854, KL reg: 0.003, Count reg: 0.056, Lambda f reg: 7.994
Score: 0.854, KL reg: 0.003, Count reg: 0.187, Lambda f reg: 7.859
Score: 0.854, KL reg: 0.003, Count reg: 0.093, Lambda f reg: 7.949
Score: 0.854, KL reg: 0.003, Count reg: 0.183, Lambda f reg: 7.855
Score: 0.854, KL reg: 0.003, Count reg: 0.130, Lambda f reg: 7.905
Score: 0.854, KL reg: 0.003, Count reg: 0.197, Lambda f reg: 7.831
Score: 0.854, KL reg: 0.003, Count reg: 0.676, Lambda f reg: 6.359
Score: 0.854, KL reg: 0.003, Count reg: 0.611, Lambda f reg: 6.421
Score: 0.854, KL reg: 0.003, Count reg: 0.541, Lambda f reg: 6.489
Score: 0.854, KL reg: 0.003, Count reg: 0.461, Lambda f reg: 6.565
Score: 0.854, KL reg: 0.003, Count reg: 0.370, Lambda f reg: 6.654
Score: 0.854, KL reg: 0.003, Count reg: 0.043, Lambda f reg: 6.187
Score: 0.854, KL reg: 0.003, Count reg: 0.654, Lambda f reg: 5.371
Score: 0.854, KL reg: 0.003, Count reg: 0.601, Lambda f reg: 5.421
Score: 0.854, KL reg: 0.003, Count reg: 0.544, Lambda f reg: 5.477
Score: 0.854, KL reg: 0.003, Count reg: 0.480, Lambda f reg: 5.538
Score: 0.854, KL reg: 0.003, Count reg: 0.409, Lambda f reg: 5.608
Score: 0.854, KL reg: 0.003, Count reg: 0.313, Lambda f reg: 5.701
Score: 0.854, KL reg: 0.003, Count reg: 0.269, Lambda f reg: 4.749
Score: 0.854, KL reg: 0.003, Count reg: 0.220, Lambda f reg: 4.796
Score: 0.854, KL reg: 0.003, Count reg: 0.166, Lambda f reg: 4.849
Score: 0.854, KL reg: 0.003, Count reg: 0.105, Lambda f reg: 4.908
Score: 0.854, KL reg: 0.003, Count reg: 0.035, Lambda f reg: 4.976
Score: 0.854, KL reg: 0.003, Count reg: 0.214, Lambda f reg: 4.796
Score: 0.854, KL reg: 0.003, Count reg: 0.480, Lambda f reg: 3.533
Score: 0.854, KL reg: 0.003, Count reg: 0.454, Lambda f reg: 3.558
Score: 0.854, KL reg: 0.003, Count reg: 0.427, Lambda f reg: 3.585
Score: 0.854, KL reg: 0.003, Count reg: 0.396, Lambda f reg: 3.615
Score: 0.854, KL reg: 0.003, Count reg: 0.362, Lambda f reg: 3.647
Score: 0.854, KL reg: 0.003, Count reg: 0.326, Lambda f reg: 3.683
Score: 0.854, KL reg: 0.003, Count reg: 0.287, Lambda f reg: 3.722
Score: 0.854, KL reg: 0.003, Count reg: 0.243, Lambda f reg: 3.764
Score: 0.854, KL reg: 0.003, Count reg: 0.195, Lambda f reg: 3.812
Score: 0.854, KL reg: 0.003, Count reg: 0.139, Lambda f reg: 3.867
Score: 0.854, KL reg: 0.003, Count reg: 0.020, Lambda f reg: 4.014
Score: 0.854, KL reg: 0.003, Count reg: 0.705, Lambda f reg: 2.303
Score: 0.854, KL reg: 0.003, Count reg: 0.695, Lambda f reg: 2.313
Score: 0.854, KL reg: 0.003, Count reg: 0.682, Lambda f reg: 2.325
Score: 0.854, KL reg: 0.003, Count reg: 0.667, Lambda f reg: 2.339
Score: 0.854, KL reg: 0.003, Count reg: 0.653, Lambda f reg: 2.353
Score: 0.854, KL reg: 0.003, Count reg: 0.637, Lambda f reg: 2.369
Score: 0.854, KL reg: 0.003, Count reg: 0.620, Lambda f reg: 2.386
Score: 0.854, KL reg: 0.003, Count reg: 0.601, Lambda f reg: 2.404
Score: 0.854, KL reg: 0.003, Count reg: 0.581, Lambda f reg: 2.424
Score: 0.854, KL reg: 0.003, Count reg: 0.559, Lambda f reg: 2.445
Score: 0.854, KL reg: 0.003, Count reg: 0.536, Lambda f reg: 2.468
Score: 0.854, KL reg: 0.003, Count reg: 0.510, Lambda f reg: 2.494
Score: 0.854, KL reg: 0.003, Count reg: 0.482, Lambda f reg: 2.521
Score: 0.854, KL reg: 0.003, Count reg: 0.451, Lambda f reg: 2.552
Score: 0.854, KL reg: 0.003, Count reg: 0.418, Lambda f reg: 2.585
Score: 0.854, KL reg: 0.003, Count reg: 0.380, Lambda f reg: 2.622
Score: 0.854, KL reg: 0.003, Count reg: 0.339, Lambda f reg: 2.664
Score: 0.854, KL reg: 0.003, Count reg: 0.291, Lambda f reg: 2.711
Score: 0.854, KL reg: 0.003, Count reg: 0.235, Lambda f reg: 2.767
Score: 0.854, KL reg: 0.003, Count reg: 0.078, Lambda f reg: 2.014
Score: 0.854, KL reg: 0.003, Count reg: 0.514, Lambda f reg: 1.490
Score: 0.854, KL reg: 0.003, Count reg: 0.504, Lambda f reg: 1.499
Score: 0.854, KL reg: 0.003, Count reg: 0.495, Lambda f reg: 1.508
Score: 0.854, KL reg: 0.003, Count reg: 0.484, Lambda f reg: 1.518
Score: 0.854, KL reg: 0.003, Count reg: 0.474, Lambda f reg: 1.529
Score: 0.854, KL reg: 0.003, Count reg: 0.462, Lambda f reg: 1.540
Score: 0.854, KL reg: 0.003, Count reg: 0.449, Lambda f reg: 1.553
Score: 0.854, KL reg: 0.003, Count reg: 0.436, Lambda f reg: 1.566
Score: 0.854, KL reg: 0.003, Count reg: 0.421, Lambda f reg: 1.581
Score: 0.854, KL reg: 0.003, Count reg: 0.404, Lambda f reg: 1.597

<Figure size 800x640 with 0 Axes>

	B cell	CAF	CRC	DC	EC	MAF	MSC	Mac	Mst	Myo	T cell
0	SSR4	SERPING1	KRT5	LAMP3	PECAM1	TAGLN	NNMT	HLA-DRA	TPSB2	NEB	PTPRC
1	MZB1	DCN	KRT17	CCR7	S1PR1	TNS1	CHRNA1	HLA-DRB1	TPSAB1	ACTC1	TMSB4X
2	DERL3	C1R	FXYD3	BIRC3	HYAL2	MYL9	RASD1	HLA-DQB1	CPA3	DES	ARHGDIB
3	FKBP11	C1S	PERP	LY75	CDH5	ADAMTS1	CD63	CD74	TPSD1	FLNC	STK17B
4	HERPUD1	COL1A2	DSP	CD74	ENG	CALD1	MEG3	HLA-DPA1	KIT	H19	B2M
...	...	...	...	...	...	...	...	...	...	...	...
95	MALAT1	PRKCDBP	TKT	PAN3	COL4A1	MAP1B	CLU	CECR1	PAG1	APOBEC2	DAND5
96	ICAM2	TIMP3	ANXA8L1	TMSB4X	PRSS23	EDNRA	IP6K3	TLR2	CDK15	PADI2	L1TD1
97	RRBP1	DIO2	TNS4	HCK	SPRY1	GSN	MT1M	SRGN	MLPH	MLIP	CYTIP
98	RNF122	ARID5B	SNRPE	GLS	DARC	HSPB8	FOXO3	MSR1	LAX1	PGAM2	AKAP5
99	KLHL6	CD248	KRT6B	STARD7	SCARF1	MOCS1	IFITM2	PPT1	RIN3	TBX15	AGMAT