Ken Furudate
import scanpy as sc
import squidpy as sq
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib.font_manager
plt.rcParams['font.sans-serif'] = ['Arial'] + plt.rcParams['font.sans-serif']
plt.rcParams["font.size"] = 20
plt.rcParams['pdf.fonttype'] = 42
from sklearn.utils import resample
from tqdm import tqdm
from collections import Counter
from collections import defaultdict
import os
from pathlib import Path
import pickle
def pickle_load(path):
with open(path, mode='rb') as f:
data = pickle.load(f)
return data
import seaborn as sns
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
import warnings
warnings.filterwarnings('ignore')
sc.logging.print_header()
print(f"squidpy=={sq.__version__}")
datadir = "/data/spatial/"
in_f = "integrated_data.h5ad"
data = sc.read_h5ad(datadir + in_f)
data
adata = data[data.obs['sample']=="A"]
bdata = data[data.obs['sample']=="B"]
cdata = data[data.obs['sample']=="C"]
adata = adata[adata.obs['category']=="tumor"]
bdata = bdata[bdata.obs['category']=="tumor"]
cdata = cdata[cdata.obs['category']=="tumor"]
adata.obs["region"] = adata.obs["leiden"].copy().astype(str)
adata.obs.replace({"region": {"0": "Predominant_pri"}}, inplace=True)
adata.obs.replace({"region": {"2": "Predominant_pri"}}, inplace=True)
adata.obs.replace({"region": {"11": "Predominant_pri"}}, inplace=True)
adata.obs.replace({"region": {"1": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"3": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"4": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"5": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"6": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"7": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"8": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"9": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"10": "Metastatic_pri"}}, inplace=True)
bdata.obs["region"] = bdata.obs["leiden"].copy().astype(str)
bdata.obs.replace({"region": {"0": "Predominant_pri"}}, inplace=True)
bdata.obs.replace({"region": {"2": "Predominant_pri"}}, inplace=True)
bdata.obs.replace({"region": {"11": "Predominant_pri"}}, inplace=True)
bdata.obs.replace({"region": {"1": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"3": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"4": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"5": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"6": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"7": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"8": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"9": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"10": "Metastatic_pri"}}, inplace=True)
adata.uns["spatial"] = pickle_load(datadir + 'adata_uns_spatial.pickle')
bdata.uns["spatial"] = pickle_load(datadir + 'bdata_uns_spatial.pickle')
cdata.uns["spatial"] = pickle_load(datadir + 'cdata_uns_spatial.pickle')
ddata.uns["spatial"] = pickle_load(datadir + 'ddata_uns_spatial.pickle')
sample_lst = ["A", "B", "C"]
def set_param(input_data, sample):
scale = input_data.uns['spatial'][f"{sample}"]['scalefactors']['tissue_hires_scalef']
img = sq.im.ImageContainer(input_data.uns['spatial'][f"{sample}"]['images']['hires'],
scale=scale,
library_id=f"{sample}")
img.show()
return scale, img
scale_a, img_a = set_param(adata, "A")
scale_b, img_b = set_param(bdata, "B")
scale_c, img_c = set_param(cdata, "C")
sc.pl.spatial(adata=adata,
color=["integrated spatial transcriptome cluster", 'integration_analysis'],
na_in_legend=False,
)
sc.pl.spatial(adata=bdata,
color=["integrated spatial transcriptome cluster", 'integration_analysis'],
na_in_legend=False,
)
count_a = pd.read_table("Fig.3B_SampleA.txt")
count_b = pd.read_table("Fig.3B_SampleB.txt")
count_c = pd.read_table("Fig.3B_SampleC.txt")
count_a
count_b
count_c
sampleA_ = data.obs[data.obs["sample"] == "A"].copy()
sampleA_["Unnamed: 0"] = sampleA_.index
sampleA_
data_a_merge = pd.merge(count_a, sampleA_, on="Unnamed: 0", how='left')
data_a_merge
sampleB_ = data.obs[data.obs["sample"] == "B"].copy()
sampleB_["Unnamed: 0"] = sampleB_.index
sampleB_
data_b_merge = pd.merge(count_b, sampleB_, on="Unnamed: 0", how='left')
data_b_merge
sampleC_ = data.obs[data.obs["sample"] == "C"].copy()
sampleC_["Unnamed: 0"] = sampleC_.index
sampleC_
data_c_merge = pd.merge(count_c, sampleC_, on="Unnamed: 0", how='left')
data_c_merge
data_ab_merge = pd.concat([data_a_merge, data_b_merge])
data_ab_merge
cell_type = [
"OSCC cell", "CAF", "MAF"
]
analysis_df = data_ab_merge[cell_type+["leiden"]]
analysis_df.reset_index(inplace=True, drop=True)
analysis_df
# Remove non-OSCC cell
input_df = analysis_df.copy()
drop_idx = []
for idx, cnt_ in enumerate(analysis_df["OSCC cell"]):
if int(cnt_) == 0:
drop_idx.append(idx)
input_df.drop(index=drop_idx, inplace=True)
input_df.reset_index(drop=True, inplace=True)
input_df
cond_lst = []
for clu in input_df["leiden"]:
if int(clu) == 0:
cond_lst.append("Predominant_pri")
elif int(clu) == 2:
cond_lst.append("Predominant_pri")
elif int(clu) == 11:
cond_lst.append("Predominant_pri")
else:
cond_lst.append("Metastatic_pri")
input_df["area"] = cond_lst
input_df
input_df[cell_type] = input_df[cell_type].where(input_df[cell_type] < 1, 1)
input_df
input_df["colocalization"] = input_df[cell_type].sum(axis=1)
input_df
select_col1 = "colocalization"
select_col2 = "area"
input_df2 = input_df[[select_col1, select_col2]]
input_df2
input_df3 = input_df2[(input_df2["colocalization"]!=2)]
input_df3["colocalization"].replace(1, 0, inplace=True)
input_df3["colocalization"].replace(3, 1, inplace=True)
input_df3
Predominant_pri_lst = list(input_df3[input_df3["area"]=="Predominant_pri"]["colocalization"])
Metastatic_pri_lst = list(input_df3[input_df3["area"]=="Metastatic_pri"]["colocalization"])
def BootStrap(input_lst, n_iter=10000):
resamples = []
for i in tqdm(range(n_iter)):
resample_lst = resample(input_lst, replace=True, n_samples=len(input_lst), random_state=i)
resamples.append(np.mean(resample_lst))
return resamples
resample_means_0 = BootStrap(Predominant_pri_lst)
conf_prob = np.array([0.025, 0.975])
resample_conf_0 = np.percentile(resample_means_0, conf_prob*100)
resample_means_1 = BootStrap(Metastatic_pri_lst)
resample_conf_1 = np.percentile(resample_means_1, conf_prob*100)
bins = np.linspace(0.35, 0.75, 40)
plt.hist([resample_means_0, resample_means_1], bins=bins, ec='k', color=['#377EB8', '#E41A1C'])
plt.show()
def BootStrap(input_lst, n_iter=10000):
resamples_0 = []
resamples_1 = []
prob_df = pd.DataFrame()
frequency_lst = []
for i in tqdm(range(n_iter)):
resample_lst = resample(input_lst, replace=True, n_samples=len(input_lst), random_state=i)
cnt_resamples = Counter(resample_lst)
for k_, v_ in cnt_resamples.items():
if k_ == 0:
resamples_0.append(v_)
elif k_ == 1:
resamples_1.append(v_)
prob_df = pd.DataFrame({"OSCC cells alone":resamples_0, "high colocalization":resamples_1})
return prob_df
Predominant_pri_df = BootStrap(Predominant_pri_lst)
Predominant_pri_df
conf_prob = np.array([0.025, 0.975])
resample_conf_0 = np.percentile(Predominant_pri_df["OSCC cells alone"], conf_prob*100)
resample_conf_0
resample_conf_1 = np.percentile(Predominant_pri_df["high colocalization"], conf_prob*100)
resample_conf_1
Metastatic_pri_df = BootStrap(Metastatic_pri_lst)
Metastatic_pri_df
resample_conf_2 = np.percentile(Metastatic_pri_df["OSCC cells alone"], conf_prob*100)
resample_conf_2
resample_conf_3 = np.percentile(Metastatic_pri_df["high colocalization"], conf_prob*100)
resample_conf_3
res_df = pd.DataFrame(0, index=[0, 1], columns=["Predominant_pri","Metastatic_pri"])
res_df
res_df.iloc[0,0] = round(Predominant_pri_df["OSCC cells alone"].mean())
res_df.iloc[1,0] = round(Predominant_pri_df["high colocalization"].mean())
res_df.iloc[0,1] = round(Metastatic_pri_df["OSCC cells alone"].mean())
res_df.iloc[1,1] = round(Metastatic_pri_df["high colocalization"].mean())
res_df
sns.set(font_scale=2, style='white')
error_bar_set = dict(lw = 1, capthick = 1, capsize = 20)
res_df.plot.bar(color=['#377EB8', '#E41A1C'],
yerr=[[resample_conf_0[1]-res_df.iloc[0,0],
resample_conf_1[1]-res_df.iloc[1,0]],
[resample_conf_2[1]-res_df.iloc[0,1],
resample_conf_3[1]-res_df.iloc[1,1]]],
error_kw=error_bar_set
)
plt.legend("")
plt.xticks(ticks=[0,1],
labels=['OSCC cells alone', 'OSCC cells with \n CAF and MAF'],
rotation=0
)
plt.xlabel('')
plt.ylabel('Number of cell \n colocalization per spot')
sns.despine()
plt.show()
res_df
fisher.test(res_df)