import matplotlib
import matplotlib.pyplot as plt
plt.ioff()
import os
import sys
sys.path.append("/oak/stanford/groups/akundaje/manyu/C2H2_ZNF_project/utils/modisco_utils/")
from modisco_results_utils import *
from reports import *
sys.path.append(os.path.abspath("/oak/stanford/groups/akundaje/manyu/softwares/tfmodisco_tf_models/src/"))
sys.path.append(os.path.abspath("/oak/stanford/groups/akundaje/manyu/softwares/tfmodisco_tf_models/notebooks/reports/"))
from tfmodisco.run_tfmodisco import import_shap_scores, import_tfmodisco_results
from motif.read_motifs import pfm_info_content, trim_motif_by_ic
import motif.moods as moods
import plot.viz_sequence as viz_sequence
from util import figure_to_vdom_image, import_peak_table
import h5py
import pandas as pd
import numpy as np
import modisco
import sklearn.decomposition
import umap
import matplotlib.font_manager as font_manager
import vdom.helpers as vdomh
from IPython.display import display
import tqdm
import io
import base64
import urllib
import deepdish as dd
import argparse
from reports import SaveAllModiscoResults
import vdom.helpers as vdomh
from vdom.helpers import h1, p, img, div, b
import sys
sys.path.append('/oak/stanford/groups/akundaje/manyu/C2H2_ZNF_project/utils')
from generalized_sequence_alignment import *
from plot_utils import *
import h5py
sys.path.append(os.path.abspath("/oak/stanford/groups/akundaje/manyu/softwares/tfmodisco_tf_models/src/"))
sys.path.append(os.path.abspath("/oak/stanford/groups/akundaje/manyu/softwares/tfmodisco_tf_models/notebooks/reports/"))
from util import figure_to_vdom_image
# Plotting defaults
font_manager.fontManager.ttflist.extend(
font_manager.createFontList(
font_manager.findSystemFonts(fontpaths="/oak/stanford/groups/akundaje/manyu/utils/fonts/")
)
)
plot_params = {
"figure.titlesize": 22,
"axes.titlesize": 22,
"axes.labelsize": 20,
"legend.fontsize": 18,
"xtick.labelsize": 16,
"ytick.labelsize": 16,
"font.family": "Roboto",
"font.weight": "bold"
}
plt.rcParams.update(plot_params)
# %load_ext autoreload
# %autoreload 2
# Define parameters/fetch arguments
dataset = os.environ["DATASET"]
tf_dataset = os.environ["TF_DATASET"]
tf_true_name = os.environ["TF_TRUE_NAME"]
savedir = os.environ["SAVEDIR"]
ob = SaveAllModiscoResults(tf=tf_dataset,dataset=dataset,savedir=savedir)
ob.save_main_patterns()
ob.plot_motif_heterogeneity()
ob.save_profiles_and_seqlet_histograms()
def generate_b1h_aligned_table(dataset,tf_name,tf_true_name):
plt.ioff()
results_base = '/oak/stanford/groups/akundaje/manyu/C2H2_ZNF_project/train_profile_models_2020/modisco_reports'
motifs_saved_file = '{}/{}/{}/main_patterns/all_motifs.h5'.format(results_base,dataset,tf_name)
filtered_motif_file = '{}/{}/{}/filtered_patterns_list.txt'.format(results_base,dataset,tf_name)
assert(os.path.exists(filtered_motif_file))
assert(os.path.exists(motifs_saved_file))
alignments_dir = '{}/{}/{}/b1h_aligments'.format(results_base,dataset,tf_name)
aligned_results_savefile = '{}/b1h_alignment.h5'.format(alignments_dir)
header = vdomh.thead(
vdomh.tr(
vdomh.th("Pattern number:", style={"text-align": "center"}),
vdomh.th("n-Seqlets", style={"text-align": "center"}),
vdomh.th("Alignment", style={"text-align": "center"})
)
)
f = h5py.File(aligned_results_savefile,'r')
g = h5py.File(motifs_saved_file,'r')
table_rows = []
for motif_idx in f.keys():
b1h_aligned = f[motif_idx]['b1h_aligned'].value
pfm_aligned = f[motif_idx]['pfm_aligned'].value
n_seqlets = g[motif_idx]['n_seqlets'].value
fig,ax = plot_motifs_list([b1h_aligned,pfm_aligned],ic_scale=True)
table_rows.append(vdomh.tr(
vdomh.td(motif_idx),
vdomh.td(str(n_seqlets)),
vdomh.td(figure_to_vdom_image(fig))
))
motif_table = vdomh.table(header,vdomh.tbody(*table_rows))
display(motif_table)
f.close()
g.close()
generate_b1h_aligned_table(dataset,tf_dataset,tf_true_name)
!pwd