Ken Furudate
import scanpy as sc
import anndata as ad
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib.font_manager
plt.rcParams['font.sans-serif'] = ['Arial'] + plt.rcParams['font.sans-serif']
plt.rcParams["font.size"] = 20
import os
from copy import copy
import seaborn as sns
from pathlib import Path
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
import warnings
warnings.filterwarnings('ignore')
print(f"scanpy=={sc.__version__}")
sc.set_figure_params(facecolor="white", figsize=(8, 8), dpi_save=300)
sc.settings.verbosity = 3
datadir="/data/singlec-ell/"
in_f1 = "oscc_all.exp.txt"
in_f2 = "oscc_scRNA-seq_meta_data.txt"
df = pd.read_table(os.path.join(datadir, in_f1), index_col="Unnamed: 0")
df
adata = sc.AnnData(df)
adata
annot = pd.read_table(os.path.join(datadir, in_f2), index_col="Unnamed: 0")
adata.obs = annot
adata.obs
sc.pl.highest_expr_genes(adata, n_top=20)
# Prefiltering
sc.pp.filter_cells(adata, min_genes=100)
sc.pp.filter_genes(adata, min_cells=2)
mito_genes = adata.var_names.str.startswith('mt-')
adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1) / np.sum(adata.X, axis=1)
adata.obs['n_counts'] = adata.X.sum(axis=1)
adata.var['mt'] = adata.var_names.str.startswith('mt-')
adata.var['hb'] = adata.var_names.str.contains(("^Hb.*-"))
sc.pp.calculate_qc_metrics(adata,
qc_vars=['mt','hb'],
percent_top=None,
log1p=False,
inplace=True
)
sc.pl.violin(adata=adata,
keys=['total_counts', 'n_genes_by_counts'],
jitter=0.4,rotation= 45, ylabel="UMI count", xlabel="",
)
sc.pl.violin(adata,
['n_genes', 'n_counts', 'percent_mito'],
jitter=0.4,
ylabel="UMI count",
xlabel="",
multi_panel=True
)
sc.pp.highly_variable_genes(adata, flavor="seurat")
sc.pl.highly_variable_genes(adata)
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca_variance_ratio(adata, log=False)
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=20)
sc.tl.umap(adata)
cell_type_colors = {'B cell' : '#E41A1C',
'CAF' : '#377EB8',
'OSCC cell' : '#4DAF4A',
'DC' : '#984EA3',
'EC' : '#F29403',
'MAF' : '#F781BF',
'MSC' : '#BC9DCC',
'Mac' : '#A65628',
'Mst' : '#54B0E4',
'Myo' : '#222F75',
'T cell' : '#1B9E77'
}
sc.pl.umap(adata,
color="cell_types",
palette=cell_type_colors,
)
sample_colors ={
'oscc5': (0.5490196078431373, 0.6352941176470588, 0.3215686274509804, 1.0),
'oscc6': (0.7098039215686275, 0.8117647058823529, 0.4196078431372549, 1.0),
'oscc7': (0.807843137254902, 0.8588235294117647, 0.611764705882353, 1.0),
'oscc8': (0.2235294117647059, 0.23137254901960785, 0.4745098039215686, 1.0),
'oscc10': (0.5490196078431373, 0.42745098039215684, 0.19215686274509805, 1.0),
'oscc12': (0.3215686274509804, 0.32941176470588235, 0.6392156862745098, 1.0),
'oscc13': (0.7411764705882353, 0.6196078431372549, 0.2235294117647059, 1.0),
'oscc16': (0.4196078431372549, 0.43137254901960786, 0.8117647058823529, 1.0),
'oscc17': (0.611764705882353, 0.6196078431372549, 0.8705882352941177, 1.0),
'oscc18': (0.9058823529411765, 0.7294117647058823, 0.3215686274509804, 1.0),
'oscc20': (0.9058823529411765, 0.796078431372549, 0.5803921568627451, 1.0),
'oscc22': (0.38823529411764707, 0.4745098039215686, 0.2235294117647059, 1.0),
'oscc23': (0.5176470588235295, 0.23529411764705882, 0.2235294117647059, 1.0),
'oscc24': (0.6784313725490196, 0.28627450980392155, 0.2901960784313726, 1.0),
'oscc25': (0.8392156862745098, 0.3803921568627451, 0.4196078431372549, 1.0),
'oscc26': (0.9058823529411765, 0.5882352941176471, 0.611764705882353, 1.0),
'oscc28': (0.4823529411764706, 0.2549019607843137, 0.45098039215686275, 1.0),
}
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.pl.umap(adata,
color="Sample_ID",
palette=sample_colors,
)
condition_colors = {"N0_pri":"#377EB8B3",
"LNM_met":"#99cc00",
"LNM_pri":"#E41A1CB3"}
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.pl.umap(adata,
color="condition",
palette=condition_colors,
)
reds = copy(plt.cm.Reds)
reds.set_under("lightgray")
sc.pl.umap(adata,
color=['MYH11', 'MYL9', 'ACTA2'],
cmap=reds,
vmin=0.00001,
)
sc.pl.violin(adata,
['MYH11', 'MYL9', 'ACTA2'],
groupby='cell_types',
rotation=90,
)
sc.pl.correlation_matrix(adata,
groupby='cell_types'
)
sc.tl.rank_genes_groups(adata, 'cell_types', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=10, sharey=False)
sc.pl.rank_genes_groups_heatmap(adata,
n_genes=5,
swap_axes=True,
figsize=(25,20),
show_gene_labels=True
)