%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import glob
import os
from collections import OrderedDict
import pickle
import h5py
from matlas.performance_metrics.performance_metrics import plot_performances
root = "/mnt/lab_data/kundaje/users/msharmin/NSC_ATAC_PEAKS_to_share"
cdf = plot_performances(
root=root,
foldcounts=10,
model_class="clb_basset_classification",
metric_name="auprc",
plot=False
)
df = cdf.reset_index()
df.rename(columns={'index':'celltype'}, inplace=True)
df = df[['celltype', 'fold', 'auprc', 'Imbalance ratio', 'num_positives', 'num_negatives',
'recall_at_fdr_50', 'recall_at_fdr_20', 'recall_at_fdr_10'
]]
from matplotlib import pyplot as plt
import seaborn as sns
fig, axes = plt.subplots(1, 2, figsize=(16,6))
ax_cell = axes[0]
ax_fold = axes[1]
sns.boxplot(x="celltype", y='auprc', data=df, ax=ax_cell)
ax_cell.set_xticklabels(ax_cell.get_xticklabels(), rotation=90)
ax_cell.set_title("10-fold Model performances for each celltype")
sns.boxplot(x="fold", y='auprc', data=df, ax=ax_fold)
ax_fold.set_title("Model performances of celltypes across fold")
fig.show()
from matplotlib import pyplot as plt
plt.scatter(df['Imbalance ratio'].values, df['auprc'].values)
plt.xlabel('Imbalance ratio')
plt.ylabel('auprc')
plt.title('Dependence of Model performance on Imbalance ratio')
plt.show()
from vdom.helpers import (h1, p, li, img, div, b, br, ul, img, a,
details, summary,
table, thead, th, tr, tbody, td, ol)
from IPython.display import display
from IPython.display import HTML
from matlas.reports import prepare_sorted_table
df_coltypes = {'celltype': 'string'}
keep = df.columns.values[1:]
for colname in keep:
df_coltypes[colname] = 'number'
df[colname] = df[colname].astype(str)
html_str = prepare_sorted_table((df, df_coltypes))
metadata = HTML(html_str)
item = HTML("<details>" + summary(b("Click here for Metadata of Model and Performances")).to_html() + html_str + "</details>")
instructions = summary(b("Following link contains a sortable table of deep learning model information for each sample. "))
display(instructions, item)
df = pd.read_csv("{}/gw_peaks/peak_counts.txt".format(root), index_col=0, sep="\t")
keep = df.columns.values
tab = table(thead([th(colname) for colname in keep]),
tbody([
tr([td(str(row[colname])) for colname in keep]) for i, row in df.iterrows()
]
)
)
display(summary(b("Number of peaks per celltype")))
display(tab)
from vdom.helpers import (b, summary, p, a, details)
from IPython.display import display
import numpy as np
import pandas as pd
from matlas.genome_data import *
root = "/mnt/lab_data/kundaje/users/msharmin/NSC_ATAC_PEAKS_to_share"
df = pd.read_csv("{}/gw_peaks/peak_counts.txt".format(root), index_col=0, sep="\t")
items_with_report = []
for celltype in df['celltype'].values:
mitra_report_dest = "{0}/report/nsc_reports/{1}.html".format(MITRA_HTTP_PREFIX, celltype)
items_with_report.append(p(a(celltype, href=mitra_report_dest)))
explanations = [' (differential regions w.r.t. aNSC_Young)', ' (differential regions w.r.t. aNSC_Old)',
' (differential regions w.r.t. qNSC_Young)', ' (differential regions w.r.t. qNSC_Old)',
' (differential regions w.r.t. qNSC_Young)', ' (differential regions w.r.t. aNSC_Young)']
for i, celltype in enumerate(['aNSC_Old', 'aNSC_Young',
'qNSC_Old', 'qNSC_Young',
'aNSC_Young_Q_A', 'qNSC_Young_Q_A']):
mitra_report_dest = "{0}/report/nsc_reports/{1}.html".format(MITRA_HTTP_PREFIX, celltype)
items_with_report.append(p(a(celltype+explanations[i], href=mitra_report_dest)))
display(
details(
summary(b("Motif Reports")),
summary(items_with_report),
attributes={"open":"true"}
)
)