# Filepaths and Hard-coded Defaults
proj_root = "/users/kcochran/projects/procap_models/"
sequence_path = proj_root + "genomes/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta"
chrom_sizes = proj_root + "genomes/hg38.chrom.sizes.withrRNA"
in_window = 2114
out_window = 1000
# stuff to get from config file
with open("2022-07-04_06-07-54_run1_modisco_config_CALU3_profile.txt") as config_f:
config_dict = {line.split()[0] : line.strip().split()[1] for line in config_f}
modisco_out_path = config_dict["modisco_out_path"]
scoring_type = config_dict["scoring_type"]
score_center_size = int(config_dict["score_center_size"])
profile_display_center_size = int(config_dict["profile_display_center_size"])
# digest what's in config file
assay_type, model_type, cell, accession, modisco_dir_base = modisco_out_path.split("/")[-5:]
ts_part1, ts_part2, run_str, _ = modisco_dir_base.split("_")
timestamp = ts_part1 + "_" + ts_part2
run = int(run_str.replace("run", ""))
print(modisco_out_path)
print("cell_type:", cell, accession)
print("timestamp:", timestamp)
print("run:", run)
print("scoring_type:", scoring_type)
print("score_center_size:", score_center_size)
print("profile_display_center_size:", profile_display_center_size)
/users/kcochran//projects/procap_models/modisco_out/procap/bpnetlite_basic_v2/CALU3/ENCSR935RNW/2022-07-04_06-07-54_run1_modisco cell_type: CALU3 ENCSR935RNW timestamp: 2022-07-04_06-07-54 run: 1 scoring_type: profile score_center_size: 1000 profile_display_center_size: 400
data_dir = proj_root + "/data/procap/processed/" + cell + "/" + accession + "/"
plus_bw_path = data_dir + "final.5prime.pos.bigWig"
minus_bw_path = data_dir + "final.5prime.neg.bigWig"
val_peak_path = data_dir + "peaks_uni_and_bi_train_and_val.bed.gz"
val_save_dir = proj_root + "model_out/" + assay_type + "/" + model_type + "/" + cell + "/" + accession + "/"
val_save_path = val_save_dir + timestamp + "_run" + str(run) + "_train_and_val"
attr_save_path = val_save_dir.replace("model_out", "deepshap_out") + timestamp + "_run" + str(run) + "_deepshap"
if not modisco_out_path.endswith("/"):
modisco_out_path = modisco_out_path + "/"
# task-specific filepaths
import os
assert scoring_type in ["profile", "counts"], scoring_type
if scoring_type == "profile":
scores_path = attr_save_path + "_prof.npy"
onehot_scores_path = attr_save_path + "_prof_onehot.npy"
modisco_obj_path = modisco_out_path + "results_allChroms_prof_slice" + str(score_center_size) + ".hdf5"
seqlet_path = modisco_out_path + "seqlets_prof.txt"
else:
scores_path = attr_save_path + "_count.npy"
onehot_scores_path = attr_save_path + "_count_onehot.npy"
modisco_obj_path = modisco_out_path + "results_allChroms_count_slice" + str(score_center_size) + ".hdf5"
seqlet_path = modisco_out_path + "seqlets_count.txt"
assert(os.path.exists(scores_path)), scores_path
assert(os.path.exists(onehot_scores_path)), onehot_scores_path
# Imports, Plotting Defaults
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
plot_params = {
"figure.titlesize": 22,
"axes.titlesize": 22,
"axes.labelsize": 20,
"legend.fontsize": 18,
"xtick.labelsize": 16,
"ytick.labelsize": 16,
"font.weight": "bold"
}
plt.rcParams.update(plot_params)
from IPython.display import display
import tqdm
tqdm.tqdm_notebook()
import numpy as np
from view_modisco_results_utils import *
/users/kcochran/miniconda3/envs/procap/lib/python3.7/site-packages/ipykernel_launcher.py:19: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0 Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
# Load in True Profiles and Sequences
import sys
sys.path.append('../1_train_models')
from data_loading import extract_peaks
one_hot_seqs, true_profs = extract_peaks(sequence_path,
plus_bw_path, minus_bw_path, val_peak_path, in_window, out_window,
max_jitter=0, verbose=True)
one_hot_seqs = one_hot_seqs.swapaxes(1,2)
one_hot_seqs = one_hot_seqs[:, (in_window // 2 - score_center_size // 2):(in_window // 2 + score_center_size // 2), :]
Reading FASTA: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:18<00:00, 1.27it/s] Loading Peaks: 42440it [01:25, 498.51it/s]
# Load in Coordinates of Examples
coords = load_coords(val_peak_path, in_window)
# Import SHAP scores, predicted profiles
hyp_scores = np.load(scores_path).swapaxes(1,2)
hyp_scores = hyp_scores[:, (in_window // 2 - score_center_size // 2):(in_window // 2 + score_center_size // 2), :]
pred_profs = np.exp(np.load(val_save_path + ".profs.npy"))
# Load modisco results object
tfm_obj = import_tfmodisco_results(modisco_obj_path, hyp_scores, one_hot_seqs)
motif_pfms, motif_hcwms, motif_cwms, \
motif_pfms_short, num_seqlets, \
motif_seqlets, num_metaclusters = plot_all_metaclusters(tfm_obj, one_hot_seqs, hyp_scores,
true_profs, pred_profs, coords,
in_window, out_window,
score_center_size,
profile_display_center_size)
15356 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
9294 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
7635 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
5747 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
5081 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
4155 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
3508 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
2248 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
1998 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
1654 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
1567 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
1394 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
1341 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
750 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
747 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
626 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
575 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
461 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
319 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
179 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
173 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
155 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
128 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
51 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
36 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
21 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
21 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
91 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
34 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
26 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
24 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
21 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
run_and_plot_tomtom(modisco_out_path, motif_pfms, motif_hcwms, motif_pfms_short, num_metaclusters)
The output directory '/users/kcochran//projects/procap_models/modisco_out/procap/bpnetlite_basic_v2/CALU3/ENCSR935RNW/2022-07-04_06-07-54_run1_modisco/tomtom' already exists. Its contents will be overwritten. Processing query 1 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.974985 Processing query 2 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.982119 Processing query 3 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.969821 Processing query 4 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.986014 Processing query 5 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=1 Processing query 6 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.984986 Processing query 7 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.95846 Processing query 8 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.994092 Processing query 9 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.95285 Processing query 10 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.99435 Processing query 11 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.979116 Processing query 12 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.969283 Processing query 13 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=1 Processing query 14 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.957667 Processing query 15 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=1 Processing query 16 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.96545 Processing query 17 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.966464 Processing query 18 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=1 Processing query 19 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=1 Processing query 20 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.954502 Processing query 21 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.998346 Processing query 22 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.981903 Processing query 23 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=1 Processing query 24 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.962674 Processing query 25 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.974239 Processing query 26 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=1 Processing query 27 out of 27 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.997061 Can't locate HtmlMonolithWr.pm: /root/lib/perl/HtmlMonolithWr.pm: Permission denied at /software/meme/4.11.2/bin/tomtom_xml_to_html line 48. Warning: tomtom_xml_to_html exited abnormally and may have failed to create HTML output.
Motif ID | q-val | PWM |
---|---|---|
MA0079.5-SP1 | 0.0155889 | |
MA0516.3-SP2 | 0.0155889 | |
MA0742.2-KLF12 | 0.0155889 | |
MA1511.2-KLF10 | 0.0155889 | |
MA1818.1-Zm00001d052229 | 0.0202029 |
Motif ID | q-val | PWM |
---|---|---|
MA1708.1-ETV7 | 0.263892 | |
MA1221.1-RAP2-6 | 0.263892 | |
MA1247.1-ERF087 | 0.263892 | |
MA1225.1-ERF5 | 0.263892 | |
MA1958.1-HOXD12::ELK1 | 0.263892 |
Motif ID | q-val | PWM |
---|---|---|
MA1513.1-KLF15 | 5.50158e-05 | |
MA0746.2-SP3 | 0.0001767 | |
MA0599.1-KLF5 | 0.0001767 | |
MA1564.1-SP9 | 0.0001767 | |
MA1961.1-PATZ1 | 0.0001767 |
Motif ID | q-val | PWM |
---|---|---|
MA0076.2-ELK4 | 4.6323e-05 | |
MA0750.2-ZBTB7A | 4.6323e-05 | |
MA0026.1-Eip74EF | 4.6323e-05 | |
MA0764.3-ETV4 | 0.000110984 | |
MA0759.2-ELK3 | 0.00013318 |
Motif ID | q-val | PWM |
---|---|---|
MA0314.2-HAP3 | 3.31232e-05 | |
MA0060.3-NFYA | 3.31232e-05 | |
MA1644.1-NFYC | 3.31232e-05 | |
MA0502.2-NFYB | 0.00708111 | |
MA0316.1-HAP5 | 0.0113831 |
Motif ID | q-val | PWM |
---|---|---|
MA1988.1-Atf3 | 7.24464e-05 | |
MA0489.2-Jun | 0.000130246 | |
MA1928.1-BNC2 | 0.000193325 | |
MA1448.1-fos-1 | 0.000219012 | |
MA0099.3-FOS::JUN | 0.000302937 |
Motif ID | q-val | PWM |
---|---|---|
MA1475.1-CREB3L4 | 0.000730204 | |
MA1438.1-atf-7 | 0.000915829 | |
MA1346.1-TGA10 | 0.00175037 | |
MA0609.2-CREM | 0.00175037 | |
MA1348.1-TGA9 | 0.00175037 |
Motif ID | q-val | PWM |
---|---|---|
MA0506.2-Nrf1 | 1.95985e-05 | |
MA1412.1-TSAR2 | 0.406194 | |
MA1560.1-SOHLH2 | 0.406194 | |
MA1826.1-bHLH145 | 0.406194 |
Motif ID | q-val | PWM |
---|---|---|
MA0535.1-Mad | 0.000690477 | |
MA1818.1-Zm00001d052229 | 0.00129126 | |
MA1820.1-Zm00001d024324 | 0.00129126 | |
MA1819.1-Zm00001d005892 | 0.00129126 | |
MA1833.1-Zm00001d049364 | 0.00129126 |
No TOMTOM matches passing threshold
Motif ID | q-val | PWM |
---|---|---|
MA1513.1-KLF15 | 0.00735434 | |
MA0742.2-KLF12 | 0.00735434 | |
MA1511.2-KLF10 | 0.00735434 | |
MA0079.5-SP1 | 0.00735434 | |
MA0516.3-SP2 | 0.00735434 |
Motif ID | q-val | PWM |
---|---|---|
MA0748.2-YY2 | 1.40496e-05 | |
MA1819.1-Zm00001d005892 | 4.04086e-05 | |
MA1833.1-Zm00001d049364 | 4.04086e-05 | |
MA1821.1-Zm00001d020595 | 7.32796e-05 | |
MA1832.1-Zm00001d002364 | 7.32796e-05 |
Motif ID | q-val | PWM |
---|---|---|
MA0108.2-TBP | 0.000851541 |
Motif ID | q-val | PWM |
---|---|---|
MA0139.1-CTCF | 9.94794e-08 | |
MA1929.1-CTCF | 2.71582e-06 | |
MA1930.1-CTCF | 2.71582e-06 | |
MA1102.2-CTCFL | 6.68715e-06 | |
MA0531.1-CTCF | 6.49032e-05 |
Motif ID | q-val | PWM |
---|---|---|
MA1573.2-Thap11 | 2.30879e-08 | |
MA0088.2-ZNF143 | 0.0200412 | |
MA1716.1-ZNF76 | 0.0300004 | |
MA1625.1-Stat5b | 0.314796 | |
MA0525.2-TP63 | 0.376267 |
Motif ID | q-val | PWM |
---|---|---|
MA0153.2-HNF1B | 2.49254e-05 | |
MA0046.2-HNF1A | 2.49254e-05 | |
MA0186.1-Dfd | 0.00219425 | |
MA1198.1-HAT2 | 0.0855313 | |
MA0203.1-Scr | 0.0877376 |
Motif ID | q-val | PWM |
---|---|---|
MA1833.1-Zm00001d049364 | 0.000498525 | |
MA1961.1-PATZ1 | 0.000852339 | |
MA1820.1-Zm00001d024324 | 0.000852339 | |
MA1713.1-ZNF610 | 0.000852339 | |
MA1513.1-KLF15 | 0.000852339 |
Motif ID | q-val | PWM |
---|---|---|
MA0527.1-ZBTB33 | 2.68474e-07 |
Motif ID | q-val | PWM |
---|---|---|
MA0379.1-MOT2 | 0.367171 |
Motif ID | q-val | PWM |
---|---|---|
MA1833.1-Zm00001d049364 | 0.000237566 | |
MA1819.1-Zm00001d005892 | 0.000354848 | |
MA1820.1-Zm00001d024324 | 0.00036996 | |
MA1817.1-Zm00001d020267 | 0.00036996 | |
MA1832.1-Zm00001d002364 | 0.000454617 |
Motif ID | q-val | PWM |
---|---|---|
MA0600.2-RFX2 | 3.02498e-07 | |
MA0509.3-RFX1 | 3.02498e-07 | |
MA0510.2-RFX5 | 3.02498e-07 | |
MA0798.3-RFX3 | 3.02498e-07 | |
MA1554.1-RFX7 | 0.0326337 |
Motif ID | q-val | PWM |
---|---|---|
MA0018.4-CREB1 | 0.000560801 | |
MA0488.1-JUN | 0.000937485 | |
MA0492.1-JUND | 0.00175052 | |
MA1475.1-CREB3L4 | 0.00175052 | |
MA1069.2-TGA6 | 0.00175052 |
No TOMTOM matches passing threshold
Motif ID | q-val | PWM |
---|---|---|
MA0506.2-Nrf1 | 0.000919615 | |
MA1880.1-Hey | 0.000919615 | |
MA1976.1-ZNF320 | 0.00132244 | |
MA1650.1-ZBTB14 | 0.00663839 | |
MA1877.1-Hes-b | 0.0143771 |
Motif ID | q-val | PWM |
---|---|---|
MA1833.1-Zm00001d049364 | 8.15337e-07 | |
MA2022.1-LOB | 2.8127e-06 | |
MA1239.1-ERF104 | 2.8127e-06 | |
MA1262.1-ERF2 | 6.60049e-06 | |
MA1257.1-ERF9 | 8.3076e-06 |
Motif ID | q-val | PWM |
---|---|---|
MA1684.1-Foxn1 | 0.34168 | |
MA1508.1-IKZF1 | 0.34168 |
Motif ID | q-val | PWM |
---|---|---|
MA1573.2-Thap11 | 0.0291774 | |
MA1716.1-ZNF76 | 0.0291774 | |
MA0399.1-SUT1 | 0.0365294 | |
MA0518.1-Stat4 | 0.0468401 | |
MA1285.1-TCP9 | 0.0602642 |
The output directory '/users/kcochran//projects/procap_models/modisco_out/procap/bpnetlite_basic_v2/CALU3/ENCSR935RNW/2022-07-04_06-07-54_run1_modisco/tomtom' already exists. Its contents will be overwritten. Processing query 1 out of 5 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.965142 Processing query 2 out of 5 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.989611 Processing query 3 out of 5 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.986862 Processing query 4 out of 5 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.968501 Processing query 5 out of 5 Estimating pi_0 from all 3912 observed p-values. Estimating pi_0. Estimated pi_0=0.942118 Can't locate HtmlMonolithWr.pm: /root/lib/perl/HtmlMonolithWr.pm: Permission denied at /software/meme/4.11.2/bin/tomtom_xml_to_html line 48. Warning: tomtom_xml_to_html exited abnormally and may have failed to create HTML output.
Motif ID | q-val | PWM |
---|---|---|
MA1892.1-Klf5-like | 0.000154338 | |
MA0516.3-SP2 | 0.000154338 | |
MA0685.2-SP4 | 0.000154338 | |
MA0742.2-KLF12 | 0.000171891 | |
MA0079.5-SP1 | 0.000195002 |
Motif ID | q-val | PWM |
---|---|---|
MA1141.1-FOS::JUND | 0.00613298 | |
MA1448.1-fos-1 | 0.00613298 | |
MA1130.1-FOSL2::JUN | 0.00984781 | |
MA0591.1-Bach1::Mafk | 0.0134008 | |
MA1137.1-FOSL1::JUNB | 0.0134008 |
Motif ID | q-val | PWM |
---|---|---|
MA0916.1-Ets21C | 0.000287125 | |
MA0156.3-FEV | 0.00191169 | |
MA0641.1-ELF4 | 0.00269273 | |
MA0759.2-ELK3 | 0.00335445 | |
MA1483.2-ELF2 | 0.00351154 |
Motif ID | q-val | PWM |
---|---|---|
MA0506.2-Nrf1 | 0.000374622 | |
MA1880.1-Hey | 0.00407735 | |
MA1650.1-ZBTB14 | 0.0142805 | |
MA1893.1-Klf6-7-like | 0.0635705 | |
MA1713.1-ZNF610 | 0.0635705 |
Motif ID | q-val | PWM |
---|---|---|
MA0609.2-CREM | 0.000879226 | |
MA1348.1-TGA9 | 0.000879226 | |
MA1820.1-Zm00001d024324 | 0.00455812 | |
MA1817.1-Zm00001d020267 | 0.00455812 | |
MA0967.1-BZIP60 | 0.00455812 |