Ken Furudate

In [ ]:
import scanpy as sc
import stlearn as st

import scvi
import scib

import numpy as np
import pandas as pd

from tqdm import tqdm
from matplotlib import pyplot as plt
%matplotlib inline

import os
from pathlib import Path

import seaborn as sns

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import warnings
warnings.filterwarnings('ignore')
In [ ]:
scvi.settings.seed = 42
Global seed set to 42
In [ ]:
sample_name1="A"
sample_name2="B"
sample_name3="C"
In [ ]:
datadir="/data/spatial/"
In [ ]:
in_f1 = "SampleA_cluster_patho.annot_region.txt"
in_f2 = "SampleB_cluster_patho.annot_region.txt"
in_f3 = "SampleC_cluster_patho.annot_region.txt"
In [ ]:
adata = st.Read10X(datadir + f"{sample_name1}/")
adata.var_names_make_unique()
adata.var_names
adata
In [ ]:
bdata = st.Read10X(datadir + f"{sample_name2}/")
bdata.var_names_make_unique()
bdata.var_names
bdata
In [ ]:
cdata = st.Read10X(datadir + f"{sample_name3}/")
cdata.var_names_make_unique()
cdata.var_names
cdata
In [ ]:
a_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f1), index_col="Barcode")
b_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f2), index_col="Barcode")
c_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f3), index_col="Barcode")
In [ ]:
adata.obs['sample'] = "A"
bdata.obs['sample'] = "B"
cdata.obs['sample'] = "C"
In [ ]:
adata_tumor = adata[adata.obs['category']=="tumor"]
bdata_tumor = bdata[bdata.obs['category']=="tumor"]
cdata_tumor = cdata[cdata.obs['category']=="tumor"]

adata_tumor
Out[ ]:
View of AnnData object with n_obs × n_vars = 984 × 17943
    obs: 'in_tissue', 'array_row', 'array_col', 'imagecol', 'imagerow', 'pathology', 'category', 'cluster', 'sample'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'
In [ ]:
adata_uns_spatial = adata_tumor.uns["spatial"]
bdata_uns_spatial = bdata_tumor.uns["spatial"]
cdata_uns_spatial = cdata_tumor.uns["spatial"]
In [ ]:
sc.pp.filter_cells(adata_tumor, min_counts=500)
sc.pp.filter_cells(bdata_tumor, min_counts=500)
sc.pp.filter_cells(cdata_tumor, min_counts=500)

sc.pp.filter_genes(adata_tumor, min_cells=3)
sc.pp.filter_genes(bdata_tumor, min_cells=3)
sc.pp.filter_genes(cdata_tumor, min_cells=3)
filtered out 1 cells that have less than 500 counts
Trying to set attribute `.obs` of view, copying.
Trying to set attribute `.obs` of view, copying.
filtered out 18 cells that have less than 500 counts
Trying to set attribute `.obs` of view, copying.
filtered out 213 genes that are detected in less than 3 cells
filtered out 738 genes that are detected in less than 3 cells
filtered out 136 genes that are detected in less than 3 cells
In [ ]:
data = adata_tumor.concatenate(bdata_tumor)
In [ ]:
data = data.concatenate(cdata_tumor)
In [ ]:
data.layers["counts"] = data.X.copy()
In [ ]:
sc.pp.normalize_total(data, target_sum=1e4)
sc.pp.log1p(data)
data.raw = data  # keep full dimension safe
normalizing counts per cell
    finished (0:00:00)
In [ ]:
sc.pp.highly_variable_genes(
    data,
    flavor="seurat_v3",
    n_top_genes=4000,
    layer="counts",
    batch_key="sample",
    subset=True
    )
If you pass `n_top_genes`, all cutoffs are ignored.
extracting highly variable genes
--> added
    'highly_variable', boolean vector (adata.var)
    'highly_variable_rank', float vector (adata.var)
    'means', float vector (adata.var)
    'variances', float vector (adata.var)
    'variances_norm', float vector (adata.var)
In [ ]:
scvi.data.setup_anndata(data, 
                        layer="counts", 
                        batch_key="sample"
                        )
INFO     Using batches from adata.obs["sample"]                                              
INFO     No label_key inputted, assuming all cells have same label                           
INFO     Using data from adata.layers["counts"]                                              
INFO     Successfully registered anndata object containing 3637 cells, 4000 vars, 3 batches, 
         1 labels, and 0 proteins. Also registered 0 extra categorical covariates and 0 extra
         continuous covariates.                                                              
INFO     Please do not further modify adata until model is trained.                          
In [ ]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
device
Out[ ]:
'cuda'
In [ ]:
model = scvi.model.SCVI(data,
                        n_hidden=128,
                        n_latent=30,
                        n_layers=2,
                        dropout_rate=0.2,
                        dispersion="gene",
                        gene_likelihood="nb"
                        )
model.to_device(device)
model.train(use_gpu=True)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Epoch 400/400: 100%|██████████| 400/400 [02:45<00:00,  2.42it/s, loss=3.14e+03, v_num=1]
In [ ]:
scvi.data.view_anndata_setup(model.adata)
Anndata setup with scvi-tools version 0.14.5.
              Data Summary              
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
┃             Data              Count ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩
│            Cells              3637  │
│             Vars              4000  │
│            Labels               1   │
│           Batches               3   │
│           Proteins              0   │
│ Extra Categorical Covariates    0   │
│ Extra Continuous Covariates     0   │
└──────────────────────────────┴───────┘
             SCVI Data Registry              
┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃     Data          scvi-tools Location    ┃
┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│       X         adata.layers['counts']   │
│ batch_indices  adata.obs['_scvi_batch']  │
│    labels      adata.obs['_scvi_labels'] │
└───────────────┴───────────────────────────┘
                        Label Categories                        
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃      Source Location       Categories  scvi-tools Encoding ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ adata.obs['_scvi_labels']      0                0          │
└───────────────────────────┴────────────┴─────────────────────┘
                     Batch Categories                     
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
┃   Source Location    Categories  scvi-tools Encoding ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
│ adata.obs['sample']      A                0          │
│                          B                1          │
│                          C                2          │
└─────────────────────┴────────────┴─────────────────────┘
In [ ]:
model.save(outdir+f"scvi.train.model.{Version}", overwrite=True, save_anndata=True)
... storing 'pathology' as categorical
... storing 'category' as categorical
... storing 'cluster' as categorical
... storing 'sample' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
... storing 'feature_types' as categorical
... storing 'genome' as categorical
In [ ]:
data.obsm["X_scvi"] = model.get_latent_representation()
In [ ]:
data.layers["scvi_expr"] = model.get_normalized_expression(data)
In [ ]:
try:
  sc.pp.neighbors(data, use_rep="X_scvi")
  sc.tl.leiden(data, key_added='leiden', resolution=0.5)
  sc.tl.umap(data)
computing neighbors
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:11)
running Leiden clustering
    finished: found 12 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:00)
In [ ]:
data
Out[ ]:
AnnData object with n_obs × n_vars = 3637 × 4000
    obs: 'in_tissue', 'array_row', 'array_col', 'imagecol', 'imagerow', 'pathology', 'category', 'cluster', 'sample', 'n_counts', 'batch', '_scvi_batch', '_scvi_labels', 'leiden'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells-0-0', 'n_cells-1-0', 'n_cells-1', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'highly_variable_nbatches'
    uns: 'log1p', 'hvg', '_scvi', 'neighbors', 'leiden', 'umap'
    obsm: 'spatial', 'X_scvi', 'X_umap'
    layers: 'counts', 'scvi_expr'
    obsp: 'distances', 'connectivities'
In [ ]:
sc.set_figure_params(facecolor="white", figsize=(12, 8), dpi_save=300)
sc.pl.umap(data, 
           color=["sample", "pathology", "leiden"],
           frameon=False,
           ncols=1,
           save=f"{DAY}_{sample_name}.After_batch_effect_correction.spatialplot.{Version}.pdf"
           )
WARNING: saving figure to file figures/umap20220220_A2B2C2_tumor.After_batch_effect_correction.spatialplot.v20.pdf
In [ ]:
sc.tl.embedding_density(data, groupby="sample", key_added="sample_density")

sc.set_figure_params(facecolor="white", figsize=(10, 8), dpi_save=300)
sc.pl.embedding_density(data, 
                        key="sample_density",
                        save=f"{DAY}_{sample_name}.sampledensity.batch_effect_correction.{Version}.pdf")
computing density on 'umap'
--> added
    'sample_density', densities (adata.obs)
    'sample_density_params', parameter (adata.uns)
WARNING: saving figure to file figures/sample_density_20220220_A2B2C2_tumor.sampledensity.batch_effect_correction.v20.pdf

scVI DE

The change mode follows protocol described in Boyeau et al.

In [ ]:
# one vs all, over all groups
full_de_res = model.differential_expression(groupby='leiden', 
                                            mode='change', # default
                                            fdr_target=0.1)
full_de_res.head(20)
DE...: 100%|██████████| 12/12 [00:52<00:00,  4.41s/it]
Out[ ]:
proba_de proba_not_de bayes_factor scale1 scale2 pseudocounts delta lfc_mean lfc_median lfc_std lfc_min lfc_max raw_mean1 raw_mean2 non_zeros_proportion1 non_zeros_proportion2 raw_normalized_mean1 raw_normalized_mean2 is_de_fdr_0.1 comparison group1 group2
IGHD 0.9614 0.0386 3.215138 5.014777e-05 0.000155 0.0 0.25 -1.961476 -2.218686 4.679548 -12.442281 13.102363 0.373913 0.302417 0.163478 0.214892 0.613425 1.429468 True 0 vs Rest 0 Rest
KRT1 0.9612 0.0388 3.209762 1.328588e-05 0.000056 0.0 0.25 -2.762277 -2.990346 3.740437 -11.404661 8.144420 0.071304 0.184194 0.062609 0.141737 0.125630 0.490895 True 0 vs Rest 0 Rest
IGLJ6 0.9574 0.0426 3.112367 2.107955e-06 0.000008 0.0 0.25 -1.984414 -2.068377 3.130071 -12.411051 8.842839 0.015652 0.016982 0.015652 0.016329 0.017529 0.075899 True 0 vs Rest 0 Rest
TNNI1 0.9572 0.0428 3.107474 3.670819e-04 0.000227 0.0 0.25 1.782276 1.949461 4.909887 -13.054656 12.058186 3.626084 1.035924 0.570435 0.291313 4.967675 2.516599 True 0 vs Rest 0 Rest
SCNN1G 0.9568 0.0432 3.097754 2.088196e-06 0.000013 0.0 0.25 -2.366047 -2.489151 2.380850 -9.531041 5.713491 0.019130 0.037231 0.019130 0.033638 0.028537 0.113567 True 0 vs Rest 0 Rest
NCCRP1 0.9564 0.0436 3.088119 3.245742e-05 0.000098 0.0 0.25 -2.199116 -2.482291 4.045037 -12.061623 10.157679 0.186087 0.260614 0.139130 0.171130 0.330292 0.912463 True 0 vs Rest 0 Rest
OR1A2 0.9556 0.0444 3.069100 1.031632e-06 0.000007 0.0 0.25 -2.211638 -2.214776 2.158102 -9.141550 5.730095 0.003478 0.017636 0.003478 0.016656 0.003897 0.063990 True 0 vs Rest 0 Rest
ACTC1 0.9548 0.0452 3.050405 2.031874e-04 0.000111 0.0 0.25 1.122663 1.213439 3.782610 -10.068547 9.856674 1.886959 0.481710 0.467826 0.222077 2.365514 1.137099 True 0 vs Rest 0 Rest
IGHG4 0.9538 0.0462 3.027474 5.267077e-04 0.001180 0.0 0.25 -2.009769 -2.003742 5.203235 -12.787839 14.891382 5.542608 5.183572 0.422609 0.589157 5.509517 10.980305 True 0 vs Rest 0 Rest
IGLV9-49 0.9536 0.0464 3.022945 2.598359e-05 0.000129 0.0 0.25 -2.360946 -2.420740 3.946391 -11.608186 10.199312 0.206956 0.628672 0.085217 0.191378 0.233483 1.104792 True 0 vs Rest 0 Rest
NAA11 0.9532 0.0468 3.013941 1.065899e-06 0.000008 0.0 0.25 -1.962109 -2.047962 3.067196 -10.694289 9.241531 0.010435 0.014696 0.010435 0.013390 0.009031 0.095068 True 0 vs Rest 0 Rest
ACAN 0.9532 0.0468 3.013941 4.309137e-07 0.000003 0.0 0.25 -2.434399 -2.467591 2.752172 -10.397037 7.943695 0.003478 0.006858 0.003478 0.006205 0.007703 0.020119 True 0 vs Rest 0 Rest
CHIT1 0.9530 0.0470 3.009467 2.191571e-06 0.000009 0.0 0.25 -1.990829 -2.111579 2.539829 -9.544723 6.760153 0.010435 0.034291 0.010435 0.031352 0.013659 0.095675 True 0 vs Rest 0 Rest
IGLC1 0.9528 0.0472 3.005011 2.073162e-04 0.000596 0.0 0.25 -2.049844 -2.378872 4.033913 -10.686590 12.256248 1.895654 2.242341 0.372174 0.555193 2.079543 5.583817 True 0 vs Rest 0 Rest
CBLL2 0.9520 0.0480 2.987364 2.197803e-06 0.000013 0.0 0.25 -1.821943 -1.919060 2.468560 -8.889462 7.225986 0.017391 0.026780 0.017391 0.026127 0.019305 0.116162 True 0 vs Rest 0 Rest
DES 0.9520 0.0480 2.987364 6.165525e-04 0.000327 0.0 0.25 1.998889 2.131765 4.721220 -12.210159 12.086747 6.502599 1.616600 0.613913 0.339974 8.122251 3.658348 True 0 vs Rest 0 Rest
HAVCR1 0.9518 0.0482 2.982996 2.777010e-06 0.000006 0.0 0.25 -1.968219 -2.368101 2.826574 -9.780298 7.036253 0.015652 0.018289 0.015652 0.016656 0.031786 0.059277 True 0 vs Rest 0 Rest
CKM 0.9514 0.0486 2.974311 1.391557e-04 0.000215 0.0 0.25 1.697646 1.937233 4.156939 -13.632011 10.582188 1.177394 0.671783 0.500870 0.259634 1.457761 2.155419 True 0 vs Rest 0 Rest
GPA33 0.9514 0.0486 2.974311 1.714058e-06 0.000007 0.0 0.25 -1.927718 -2.083632 2.597942 -8.731029 8.907793 0.006957 0.018289 0.006957 0.016982 0.011737 0.065187 True 0 vs Rest 0 Rest
IFNA8 0.9508 0.0492 2.961410 2.628014e-06 0.000009 0.0 0.25 -1.650592 -1.822725 2.898028 -11.695060 8.233589 0.015652 0.020248 0.015652 0.018615 0.026451 0.092085 True 0 vs Rest 0 Rest
In [ ]:
from copy import deepcopy
markers = {}

cats = data.obs.leiden.cat.categories

dup_lst = []

for i, c in enumerate(cats):
    cnt = 0
    gene_lst = []
    cid = "{} vs Rest".format(c)
    cell_type_df = full_de_res.loc[full_de_res.comparison == cid]
    cell_type_df = cell_type_df[cell_type_df["is_de_fdr_0.1"] == True]
    cell_type_df = cell_type_df.sort_values("lfc_mean", ascending=False)

    # those genes with higher expression in group 1
    cell_type_df = cell_type_df[cell_type_df.lfc_mean > 0]

    # significance
    cell_type_df = cell_type_df[cell_type_df["bayes_factor"] >= 2]

    # genes with sufficient expression
    cell_type_df = cell_type_df[cell_type_df["non_zeros_proportion1"] >= 0.1]

    while len(gene_lst) < 5:
      if cell_type_df.index[cnt] in dup_lst: 
        cnt += 1
      else: 
        gene_lst.append(cell_type_df.index[cnt])
        cnt += 1

    markers[c] = deepcopy(gene_lst)
    dup_lst += deepcopy(gene_lst)
    


markers
Out[ ]:
{'0': ['DES', 'TNNI1', 'CKM', 'SOX11', 'COL11A1'],
 '1': ['IGHD', 'NCCRP1', 'SPIB', 'CD22', 'XIRP2'],
 '10': ['SAMD11', 'KRT4', 'CYP7A1', 'CCR5', 'OR4N2'],
 '11': ['TCAP', 'ACTC1', 'KLHL40', 'TRIM72', 'MYLPF'],
 '2': ['FLG', 'MFAP5', 'KRTAP2-3', 'CPA4', 'CNTNAP2'],
 '3': ['PAEP', 'CCL21', 'NLRP14', 'HMGA2', 'ACTN2'],
 '4': ['PI3', 'SERPINB3', 'S100A9', 'TMPRSS11D', 'MSLN'],
 '5': ['VPREB3', 'DUXA', 'EIF5AL1', 'XCL1', 'LIPJ'],
 '6': ['IGLV9-49', 'IGHG4', 'IGLV3-1', 'IGHA1', 'IGHV5-10-1'],
 '7': ['IGLV6-57', 'KRT13', 'IGKC', 'IGHG2', 'MPPED2'],
 '8': ['SPINK6', 'CXCL14', 'IGHV6-1', 'PTPRZ1', 'KRT1'],
 '9': ['STRIP2', 'FGFBP1', 'BLNK', 'CLCA2', 'LRRC15']}
In [ ]:
sc.tl.dendrogram(data, 
                 groupby='leiden',
                 cor_method='pearson', # default
                 linkage_method='complete', # default
                 inplace=True
                 )
WARNING: You’re trying to run this on 4000 dimensions of `.X`, if you really want this, set `use_rep='X'`.
         Falling back to preprocessing with `sc.pp.pca` and default params.
computing PCA
    with n_comps=50
    finished (0:00:08)
Storing dendrogram info using `.uns['dendrogram_leiden']`
In [ ]:
sc.set_figure_params(facecolor="white", figsize=(20, 8), dpi_save=300)

import matplotlib.font_manager
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif']
plt.rcParams["font.size"] = 16


sc.pl.matrixplot(
    data, 
    markers,
    groupby='leiden',
    standard_scale="var",
    layer="scvi_expr",
    dendrogram=True,
    save=f"{DAY}_{sample_name}.scvi.DE.leiden.matrixplot.{Version}.pdf"
    )
WARNING: saving figure to file figures/matrixplot_20220220_A2B2C2_tumor.scvi.DE.leiden.matrixplot.v20.pdf
In [ ]:
# First, we need input_df

select_col1 = "leiden"
select_col2 = "sample"
input_df = data.obs[[select_col1, select_col2 ]]
input_df
Out[ ]:
leiden sample
AAACAGCTTTCAGAAG-1-0-0 2 A
AAACAGGGTCTATATT-1-0-0 2 A
AAACCGGGTAGGTACC-1-0-0 2 A
AAACCTCATGAAGTTG-1-0-0 2 A
AAACTTGCAAACGTAT-1-0-0 2 A
... ... ...
TTGTTAGCAAATTCGA-1-1 6 C
TTGTTCAGTGTGCTAC-1-1 8 C
TTGTTGTGTGTCAAGA-1-1 8 C
TTGTTTCACATCCAGG-1-1 1 C
TTGTTTCATTAGTCTA-1-1 5 C

3637 rows × 2 columns

In [ ]:
# One hot enchoding
one_hot_df = pd.get_dummies(input_df, columns=[select_col2])
one_hot_df.columns = ["cluster", "sample_A", "sample_B", "sample_C"]
one_hot_df
Out[ ]:
cluster sample_A sample_B sample_C
AAACAGCTTTCAGAAG-1-0-0 2 1 0 0
AAACAGGGTCTATATT-1-0-0 2 1 0 0
AAACCGGGTAGGTACC-1-0-0 2 1 0 0
AAACCTCATGAAGTTG-1-0-0 2 1 0 0
AAACTTGCAAACGTAT-1-0-0 2 1 0 0
... ... ... ... ...
TTGTTAGCAAATTCGA-1-1 6 0 0 1
TTGTTCAGTGTGCTAC-1-1 8 0 0 1
TTGTTGTGTGTCAAGA-1-1 8 0 0 1
TTGTTTCACATCCAGG-1-1 1 0 0 1
TTGTTTCATTAGTCTA-1-1 5 0 0 1

3637 rows × 4 columns

In [ ]:
one_hot_df = one_hot_df.groupby(by="cluster").sum()
one_hot_df
Out[ ]:
sample_A sample_B sample_C
cluster
0 257.0 137.0 181.0
1 164.0 30.0 282.0
2 168.0 172.0 11.0
3 87.0 73.0 177.0
4 47.0 57.0 219.0
5 29.0 23.0 270.0
6 35.0 3.0 245.0
7 17.0 20.0 229.0
8 5.0 7.0 243.0
9 59.0 3.0 107.0
10 27.0 25.0 115.0
11 88.0 22.0 3.0
In [ ]:
sns.set(font_scale=1.5, style='white')
plt.figure(figsize=(20, 10), dpi=300)
one_hot_df.plot.bar(stacked=True)
plt.legend(fontsize=16, loc='upper right', bbox_to_anchor=(1, 1))
plt.xticks(rotation=0)
plt.xlabel('cluster')
sns.despine()
plt.show()
<Figure size 6000x3000 with 0 Axes>
In [ ]:
input_df = one_hot_df.copy()
clusters = list(input_df.index)

for cluster_ in clusters:
  Sum_cluser = one_hot_df.sum(axis=1)[cluster_]
  N_of_cluster = one_hot_df.iloc[cluster_]
  input_df.iloc[cluster_] = N_of_cluster/Sum_cluser
  input_df.iloc[cluster_] *= 100
  
input_df
Out[ ]:
sample_A sample_B sample_C
cluster
0 44.695652 23.826087 31.478261
1 34.453782 6.302521 59.243697
2 47.863248 49.002849 3.133903
3 25.816024 21.661721 52.522255
4 14.551084 17.647059 67.801858
5 9.006211 7.142857 83.850932
6 12.367491 1.060071 86.572438
7 6.390977 7.518797 86.090226
8 1.960784 2.745098 95.294118
9 34.911243 1.775148 63.313609
10 16.167665 14.970060 68.862275
11 77.876106 19.469027 2.654867
In [ ]:
input_df_sort = input_df.sort_values(by="sample_C")
input_df_sort 
Out[ ]:
sample_A sample_B sample_C
cluster
11 77.876106 19.469027 2.654867
2 47.863248 49.002849 3.133903
0 44.695652 23.826087 31.478261
3 25.816024 21.661721 52.522255
1 34.453782 6.302521 59.243697
9 34.911243 1.775148 63.313609
4 14.551084 17.647059 67.801858
10 16.167665 14.970060 68.862275
5 9.006211 7.142857 83.850932
7 6.390977 7.518797 86.090226
6 12.367491 1.060071 86.572438
8 1.960784 2.745098 95.294118
In [ ]:
sns.set(font_scale=1.5, style='white')
plt.figure(figsize=(20, 10), dpi=300)
input_df_sort.plot.bar(stacked=True)
plt.legend(fontsize=16, loc='upper right', bbox_to_anchor=(1.2, 1))
plt.xticks(rotation=0)
plt.xlabel('cluster')
sns.despine()
plt.show()
<Figure size 6000x3000 with 0 Axes>
In [ ]:
# First, we need input_df

select_col1 = "leiden"
select_col2 = "pathology"
input_df = data.obs[[select_col1, select_col2 ]]
input_df
Out[ ]:
leiden pathology
AAACAGCTTTCAGAAG-1-0-0 2 CA_PD
AAACAGGGTCTATATT-1-0-0 2 CA_PD
AAACCGGGTAGGTACC-1-0-0 2 CA_PD_&_fibrosis
AAACCTCATGAAGTTG-1-0-0 2 CA_PD_&_fibrosis
AAACTTGCAAACGTAT-1-0-0 2 CA_PD_&_fibrosis
... ... ...
TTGTTAGCAAATTCGA-1-1 6 CA_PD
TTGTTCAGTGTGCTAC-1-1 8 CA_PD
TTGTTGTGTGTCAAGA-1-1 8 CA_PD
TTGTTTCACATCCAGG-1-1 1 CA_PD
TTGTTTCATTAGTCTA-1-1 5 CA_PD

3637 rows × 2 columns

In [ ]:
# One hot enchoding
one_hot_df = pd.get_dummies(input_df, columns=[select_col2])
one_hot_df.columns = ["cluster", "CA_MD_to_WD", "CA_PD", "CA_PD_&_fibrosis", "CA_PD_&_muscle", "CA_fibrosis_&_lymphocyte"]
one_hot_df["cluster"] = one_hot_df["cluster"].astype(int)
# check data type
print(one_hot_df.dtypes)
one_hot_df = one_hot_df.groupby(by="cluster").sum()
one_hot_df
cluster                     int64
CA_MD_to_WD                 uint8
CA_PD                       uint8
CA_PD_&_fibrosis            uint8
CA_PD_&_muscle              uint8
CA_fibrosis_&_lymphocyte    uint8
dtype: object
Out[ ]:
CA_MD_to_WD CA_PD CA_PD_&_fibrosis CA_PD_&_muscle CA_fibrosis_&_lymphocyte
cluster
0 34.0 102.0 260.0 106.0 73.0
1 109.0 156.0 83.0 77.0 51.0
2 2.0 152.0 191.0 6.0 0.0
3 24.0 26.0 149.0 1.0 137.0
4 146.0 87.0 48.0 42.0 0.0
5 17.0 70.0 37.0 12.0 186.0
6 113.0 118.0 23.0 7.0 22.0
7 180.0 22.0 30.0 7.0 27.0
8 124.0 108.0 7.0 0.0 16.0
9 2.0 90.0 52.0 6.0 19.0
10 45.0 71.0 42.0 1.0 8.0
11 0.0 4.0 52.0 55.0 2.0
In [ ]:
sns.set(font_scale=1.5, style='white')
plt.figure(figsize=(20, 10), dpi=300)
one_hot_df.plot.bar(stacked=True)
plt.legend(fontsize=16, loc='upper right', bbox_to_anchor=(1, 1))
plt.xticks(rotation=0)
plt.xlabel('cluster')
sns.despine()
plt.show()
<Figure size 6000x3000 with 0 Axes>
In [ ]:
adata = data[data.obs['sample']=="A"]
bdata = data[data.obs['sample']=="B"]
cdata = data[data.obs['sample']=="C"]
In [ ]:
adata.uns["spatial"] = adata_uns_spatial
bdata.uns["spatial"] = bdata_uns_spatial
cdata.uns["spatial"] = cdata_uns_spatial
Trying to set attribute `._uns` of view, copying.
Trying to set attribute `._uns` of view, copying.
Trying to set attribute `._uns` of view, copying.
In [ ]:
sc.pl.spatial(adata=adata, 
              color=['cluster', 'leiden', 'pathology'],
              save=f"{DAY}_{sample_name1}.cluster_pathlogy_category.spatialplot.{Version}.pdf"
              )
WARNING: saving figure to file figures/show20220220_A2.cluster_pathlogy_category.spatialplot.v20.pdf
In [ ]:
sc.pl.spatial(adata=bdata, 
              color=['cluster', 'leiden', 'pathology'],
              save=f"{DAY}_{sample_name2}.cluster_pathlogy_category.spatialplot.{Version}.pdf"
              )
WARNING: saving figure to file figures/show20220220_B2.cluster_pathlogy_category.spatialplot.v20.pdf
In [ ]:
sc.pl.spatial(adata=cdata, 
              color=['cluster', 'leiden', 'pathology'],
              save=f"{DAY}_{sample_name3}.cluster_pathlogy_category.spatialplot.{Version}.pdf"
              )
WARNING: saving figure to file figures/show20220220_C2.cluster_pathlogy_category.spatialplot.v20.pdf