In [1]:
import matplotlib
import matplotlib.pyplot as plt
plt.ioff()
import os
import sys
sys.path.append("/oak/stanford/groups/akundaje/manyu/C2H2_ZNF_project/utils/modisco_utils/")
from modisco_results_utils import *
from reports import *
sys.path.append(os.path.abspath("/oak/stanford/groups/akundaje/manyu/softwares/tfmodisco_tf_models/src/"))
sys.path.append(os.path.abspath("/oak/stanford/groups/akundaje/manyu/softwares/tfmodisco_tf_models/notebooks/reports/"))
from tfmodisco.run_tfmodisco import import_shap_scores, import_tfmodisco_results
from motif.read_motifs import pfm_info_content, trim_motif_by_ic
import motif.moods as moods
import plot.viz_sequence as viz_sequence
from util import figure_to_vdom_image, import_peak_table
import h5py
import pandas as pd
import numpy as np
import modisco
import sklearn.decomposition
import umap

import matplotlib.font_manager as font_manager
import vdom.helpers as vdomh
from IPython.display import display
import tqdm
import io
import base64
import urllib
import deepdish as dd
import argparse
from reports import SaveAllModiscoResults
import vdom.helpers as vdomh
from vdom.helpers import h1, p, img, div, b
import sys
sys.path.append('/oak/stanford/groups/akundaje/manyu/C2H2_ZNF_project/utils')
from generalized_sequence_alignment import *
from plot_utils import *
import h5py 
sys.path.append(os.path.abspath("/oak/stanford/groups/akundaje/manyu/softwares/tfmodisco_tf_models/src/"))
sys.path.append(os.path.abspath("/oak/stanford/groups/akundaje/manyu/softwares/tfmodisco_tf_models/notebooks/reports/"))
from util import figure_to_vdom_image

    # Plotting defaults
font_manager.fontManager.ttflist.extend(
    font_manager.createFontList(
        font_manager.findSystemFonts(fontpaths="/oak/stanford/groups/akundaje/manyu/utils/fonts/")
    )
)
plot_params = {
    "figure.titlesize": 22,
    "axes.titlesize": 22,
    "axes.labelsize": 20,
    "legend.fontsize": 18,
    "xtick.labelsize": 16,
    "ytick.labelsize": 16,
    "font.family": "Roboto",
    "font.weight": "bold"
}
plt.rcParams.update(plot_params)
/oak/stanford/groups/akundaje/manyu/C2H2_ZNF_project/utils/modisco_utils/modisco_results_utils.py:32: MatplotlibDeprecationWarning: 
The createFontList function was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use FontManager.addfont instead.
  font_manager.findSystemFonts(fontpaths="/oak/stanford/groups/akundaje/manyu/utils/fonts/")
/oak/stanford/groups/akundaje/manyu/anaconda3/envs/basepairmodels_py3.6.8/lib/python3.6/site-packages/ipykernel_launcher.py:47: MatplotlibDeprecationWarning: 
The createFontList function was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use FontManager.addfont instead.
In [2]:
# %load_ext autoreload
# %autoreload 2

Fetch Environment variables

In [3]:
# Define parameters/fetch arguments
dataset = os.environ["DATASET"]
tf_dataset = os.environ["TF_DATASET"]
tf_true_name = os.environ["TF_TRUE_NAME"]
savedir = os.environ["SAVEDIR"]

Reports on TF motifs and subcluster diversity:

In [4]:
ob = SaveAllModiscoResults(tf=tf_dataset,dataset=dataset,savedir=savedir)
Loading shap scores 

Loading tfm object 

Motifs and subpattern diversity

In [5]:
ob.save_main_patterns()
Saving main patterns
Saving pattern: 0
In [6]:
ob.plot_motif_heterogeneity()

Pattern 1/1

/oak/stanford/groups/akundaje/manyu/C2H2_ZNF_project/utils/modisco_utils/reports.py:642: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  emb_fig, ax = plt.subplots()
SubpatternSeqletsEmbeddingsCWMPFM
Agg.2887
0596
1502
2487
3304
4290
5283
6178
7111
860
947
1017
1112

Profile heatmaps and motif distribution around peak summits

In [7]:
ob.save_profiles_and_seqlet_histograms()
Pattern 1
Total Seqlets 2887

Display B1H - aligned motifs:

In [8]:
def generate_b1h_aligned_table(dataset,tf_name,tf_true_name):
    

    plt.ioff()
    results_base = '/oak/stanford/groups/akundaje/manyu/C2H2_ZNF_project/train_profile_models_2020/modisco_reports'
    motifs_saved_file = '{}/{}/{}/main_patterns/all_motifs.h5'.format(results_base,dataset,tf_name)
    filtered_motif_file = '{}/{}/{}/filtered_patterns_list.txt'.format(results_base,dataset,tf_name)
    assert(os.path.exists(filtered_motif_file))
    assert(os.path.exists(motifs_saved_file))
    
    alignments_dir = '{}/{}/{}/b1h_aligments'.format(results_base,dataset,tf_name)
    aligned_results_savefile  = '{}/b1h_alignment.h5'.format(alignments_dir)
    
    header = vdomh.thead(
    vdomh.tr(
        vdomh.th("Pattern number:", style={"text-align": "center"}),
        vdomh.th("n-Seqlets", style={"text-align": "center"}),
        vdomh.th("Alignment", style={"text-align": "center"})

    )
)
    
    f = h5py.File(aligned_results_savefile,'r')
    g = h5py.File(motifs_saved_file,'r')
    
    table_rows = []
    for motif_idx in f.keys():
        b1h_aligned = f[motif_idx]['b1h_aligned'].value 
        pfm_aligned = f[motif_idx]['pfm_aligned'].value
        n_seqlets = g[motif_idx]['n_seqlets'].value
        fig,ax = plot_motifs_list([b1h_aligned,pfm_aligned],ic_scale=True)
        table_rows.append(vdomh.tr(
            vdomh.td(motif_idx),
            vdomh.td(str(n_seqlets)),
            vdomh.td(figure_to_vdom_image(fig))
        ))
    motif_table = vdomh.table(header,vdomh.tbody(*table_rows))
    display(motif_table)
    
    f.close()
    g.close()
    
In [9]:
generate_b1h_aligned_table(dataset,tf_dataset,tf_true_name)
/oak/stanford/groups/akundaje/manyu/anaconda3/envs/basepairmodels_py3.6.8/lib/python3.6/site-packages/ipykernel_launcher.py:28: H5pyDeprecationWarning: dataset.value has been deprecated. Use dataset[()] instead.
/oak/stanford/groups/akundaje/manyu/anaconda3/envs/basepairmodels_py3.6.8/lib/python3.6/site-packages/ipykernel_launcher.py:29: H5pyDeprecationWarning: dataset.value has been deprecated. Use dataset[()] instead.
/oak/stanford/groups/akundaje/manyu/anaconda3/envs/basepairmodels_py3.6.8/lib/python3.6/site-packages/ipykernel_launcher.py:30: H5pyDeprecationWarning: dataset.value has been deprecated. Use dataset[()] instead.
Pattern number:n-SeqletsAlignment
02887
In [10]:
!pwd
/oak/stanford/groups/akundaje/manyu/C2H2_ZNF_project/train_profile_models_2020/trained_models/train_related_scripts/run_alex_reports
In [ ]: