Ken Furudate
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib.font_manager
plt.rcParams['font.sans-serif'] = ['Arial'] + plt.rcParams['font.sans-serif']
plt.rcParams["font.size"] = 40
import os
import seaborn as sns
from pathlib import Path
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
import warnings
warnings.filterwarnings('ignore')
sc.logging.print_header()
sample_name1="A"
sample_name2="B"
sample_name3="C"
sample_name4="D"
datadir="/data/spatial/"
in_f1 = "SampleA_cluster_patho.annot_region.txt"
in_f2 = "SampleB_cluster_patho.annot_region.txt"
in_f3 = "SampleC_cluster_patho.annot_region.txt"
in_f4 = "SampleD_cluster_patho.annot_region.txt"
adata = sc.read_visium(datadir + f"{sample_name1}/",
genome=None,
count_file='filtered_feature_bc_matrix.h5',
library_id=None,
load_images=True,
source_image_path=datadir + f"{sample_name1}/spatial")
adata.var_names_make_unique()
adata.var_names
adata
bdata = sc.read_visium(datadir + f"{sample_name2}/",
genome=None,
count_file='filtered_feature_bc_matrix.h5',
library_id=None,
load_images=True,
source_image_path=datadir + f"{sample_name2}/spatial")
bdata.var_names_make_unique()
bdata.var_names
bdata
cdata = sc.read_visium(datadir + f"{sample_name3}/",
genome=None,
count_file='filtered_feature_bc_matrix.h5',
library_id=None,
load_images=True,
source_image_path=datadir + f"{sample_name3}/spatial")
cdata.var_names_make_unique()
cdata.var_names
cdata
ddata = sc.read_visium(datadir + f"{sample_name4}/",
genome=None,
count_file='filtered_feature_bc_matrix.h5',
library_id=None,
load_images=True,
source_image_path=datadir + f"{sample_name4}/spatial")
ddata.var_names_make_unique()
ddata.var_names
ddata
a_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f1), index_col="Barcode")
b_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f2), index_col="Barcode")
c_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f3), index_col="Barcode")
d_annot = pd.read_table(os.path.join(datadir, "patho_annot", in_f4), index_col="Barcode")
adata.obs['pathology'] = a_annot["patho_diag"]
adata.obs['category'] = a_annot["category"]
adata.obs['cluster'] = a_annot["graph-based"]
bdata.obs['pathology'] = b_annot["patho_diag"]
bdata.obs['category'] = b_annot["category"]
bdata.obs['cluster'] = b_annot["graph-based"]
cdata.obs['pathology'] = c_annot["patho_diag"]
cdata.obs['category'] = c_annot["category"]
cdata.obs['cluster'] = c_annot["graph-based"]
ddata.obs['pathology'] = d_annot["patho_diag"]
ddata.obs['category'] = d_annot["category"]
ddata.obs['cluster'] = d_annot["graph-based"]
adata.var['mt'] = adata.var_names.str.startswith('mt-')
adata.var['hb'] = adata.var_names.str.contains(("^Hb.*-"))
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt','hb'], percent_top=None, log1p=False, inplace=True)
bdata.var['mt'] = bdata.var_names.str.startswith('mt-')
bdata.var['hb'] = bdata.var_names.str.contains(("^Hb.*-"))
sc.pp.calculate_qc_metrics(bdata, qc_vars=['mt','hb'], percent_top=None, log1p=False, inplace=True)
cdata.var['mt'] = cdata.var_names.str.startswith('mt-')
cdata.var['hb'] = cdata.var_names.str.contains(("^Hb.*-"))
sc.pp.calculate_qc_metrics(cdata, qc_vars=['mt','hb'], percent_top=None, log1p=False, inplace=True)
ddata.var['mt'] = ddata.var_names.str.startswith('mt-')
ddata.var['hb'] = ddata.var_names.str.contains(("^Hb.*-"))
sc.pp.calculate_qc_metrics(ddata, qc_vars=['mt', 'hb'], percent_top=None, log1p=False, inplace=True)
sc.pl.violin(adata=adata,
keys=['total_counts', 'n_genes_by_counts'],
jitter=0.3,
size=1.2,
ylabel="UMI count",
xlabel=""
)
sc.pl.spatial(adata=adata,
color=['total_counts', 'n_genes_by_counts'],
color_map="viridis",
vmax=[30000, 8000],
)
sc.pl.violin(adata=bdata,
keys=['total_counts', 'n_genes_by_counts'],
jitter=0.3,
size=1.2,
ylabel="UMI count",
xlabel=""
)
sc.pl.spatial(adata=bdata,
color=['total_counts', 'n_genes_by_counts'],
color_map="viridis",
vmax=[30000, 8000],
)
sc.pl.violin(adata=cdata,
keys=['total_counts', 'n_genes_by_counts'],
jitter=0.3,
size=1.2,
ylabel="UMI count",
xlabel=""
)
sc.pl.spatial(adata=cdata,
color=['total_counts', 'n_genes_by_counts'],
color_map="viridis",
vmax=[20000, 6000],
)
sc.pl.violin(adata=ddata,
keys=['total_counts', 'n_genes_by_counts'],
jitter=0.3,
size=1.2,
ylabel="UMI count",
xlabel=""
)
sc.pl.spatial(adata=ddata,
color=['total_counts', 'n_genes_by_counts'],
color_map="viridis",
vmax=[20000, 6000],
)
def draw_boxplot(input_df, x_axis, y_axis, sample):
input_df_ = input_df[input_df.obs['category']!="uncategorized"].copy()
rotation = 0
order = None
if x_axis == "category":
order =["non_tumor", "peritumor", "tumor"]
elif x_axis == "cluster":
import re
def atoi(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
return [ atoi(c) for c in re.split(r'(\d+)', text) ]
order = sorted(list(set(input_df_.obs["cluster"])), key=natural_keys)
rotation = 45
elif x_axis == "pathology":
rotation = 90
order = None
sns.set(font_scale=2, style='white')
#plt.figure(figsize=(10, 10))
ax = sns.boxplot(data=input_df_.obs, x=x_axis, y=y_axis, order=order)
sns.stripplot(data=input_df_.obs, x=x_axis, y=y_axis, color='k',size=1.5, ax=ax, order=order)
plt.xlabel('')
plt.xticks(rotation=rotation)
#plt.savefig(outdir +f'{DAY}_{sample}.{x_axis}.{y_axis}.{Version}.pdf', bbox_inches='tight')
plt.show()
sample_lst = ["A", "B", "C", "D"]
x_axis_lst = ["category", "pathology", "cluster"]
y_axis_lst = ["total_counts", "n_genes_by_counts"]
for i in range(len(sample_lst)):
sample_ = sample_lst[i]
if sample_ == "A":
data_ = adata.copy()
if sample_ == "B":
data_ = bdata.copy()
if sample_ == "C":
data_ = cdata.copy()
if sample_ == "D":
data_ = ddata.copy()
for j in range(len(x_axis_lst)):
x_ax_ = x_axis_lst[j]
for k in range(len(y_axis_lst)):
y_ax_ = y_axis_lst[k]
print(sample_)
draw_boxplot(input_df=data_, x_axis=x_ax_, y_axis=y_ax_, sample=sample_)