# Filepaths and Hard-coded Defaults
proj_root = "/home/users/kcochran/oak/kcochran/procap_models/"
sequence_path = proj_root + "genomes/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta"
chrom_sizes = proj_root + "genomes/hg38.chrom.sizes.withrRNA"
in_window = 2114
out_window = 1000
# stuff to get from config file
with open("2022-07-26_15-18-44_run1_modisco_config_K562_counts.txt") as config_f:
config_dict = {line.split()[0] : line.strip().split()[1] for line in config_f}
modisco_out_path = config_dict["modisco_out_path"]
scoring_type = config_dict["scoring_type"]
score_center_size = int(config_dict["score_center_size"])
profile_display_center_size = int(config_dict["profile_display_center_size"])
train_val_type = config_dict["train_val_type"]
# digest what's in config file
if not modisco_out_path.endswith("/"):
modisco_out_path = modisco_out_path + "/"
assay_type, model_type, cell, accession, modisco_dir_base = modisco_out_path.split("/")[-6:-1]
ts_part1, ts_part2, run_str, _ = modisco_dir_base.split("_")
timestamp = ts_part1 + "_" + ts_part2
run = int(run_str.replace("run", ""))
print(modisco_out_path)
print("cell_type:", cell, accession)
print("timestamp:", timestamp)
print("run:", run)
print("scoring_type:", scoring_type)
print("score_center_size:", score_center_size)
print("profile_display_center_size:", profile_display_center_size)
/home/users/kcochran/oak/kcochran/procap_models/modisco_out/procap/bpnetlite_basic_v2_umap/K562/ENCSR261KBX/2022-07-26_15-18-44_run1_modisco/ cell_type: K562 ENCSR261KBX timestamp: 2022-07-26_15-18-44 run: 1 scoring_type: counts score_center_size: 1000 profile_display_center_size: 400
data_dir = proj_root + "/data/procap/processed/" + cell + "/" + accession + "/"
plus_bw_path = data_dir + "final.5prime.pos.bigWig"
minus_bw_path = data_dir + "final.5prime.neg.bigWig"
val_peak_path = data_dir + "peaks_uni_and_bi_" + train_val_type + ".bed.gz"
val_save_dir = proj_root + "model_out/" + assay_type + "/" + model_type + "/" + cell + "/" + accession + "/"
val_save_path = val_save_dir + timestamp + "_run" + str(run) + "_" + train_val_type
attr_save_path = val_save_dir.replace("model_out", "deepshap_out") + timestamp + "_run" + str(run) + "_deepshap"
# task-specific filepaths
import os
assert scoring_type in ["profile", "counts"], scoring_type
if scoring_type == "profile":
score_type_short = "prof"
else:
score_type_short = "count"
scores_path = attr_save_path + "_" + score_type_short + ".npy"
onehot_scores_path = attr_save_path + "_" + score_type_short + "_onehot.npy"
modisco_obj_path = modisco_out_path + "results_allChroms_" + score_type_short + "_slice" + str(score_center_size) + ".hdf5"
seqlet_path = modisco_out_path + "seqlets_" + score_type_short + ".txt"
tomtom_dir = modisco_out_path + "tomtom_" + score_type_short
assert(os.path.exists(scores_path)), scores_path
assert(os.path.exists(onehot_scores_path)), onehot_scores_path
# Imports, Plotting Defaults
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
plot_params = {
"figure.titlesize": 22,
"axes.titlesize": 22,
"axes.labelsize": 20,
"legend.fontsize": 18,
"xtick.labelsize": 16,
"ytick.labelsize": 16,
"font.weight": "bold"
}
plt.rcParams.update(plot_params)
from IPython.display import display
import tqdm
tqdm.tqdm_notebook()
import numpy as np
from view_modisco_results_utils import *
from tomtom_utils import *
/home/users/kcochran/miniconda3/envs/procap/lib/python3.7/site-packages/ipykernel_launcher.py:19: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0 Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
# Load in True Profiles and Sequences
import sys
sys.path.append('../1_train_models')
from data_loading import extract_peaks
one_hot_seqs, true_profs = extract_peaks(sequence_path,
plus_bw_path, minus_bw_path, val_peak_path, in_window, out_window,
max_jitter=0, verbose=True)
one_hot_seqs = one_hot_seqs.swapaxes(1,2)
one_hot_seqs = one_hot_seqs[:, (in_window // 2 - score_center_size // 2):(in_window // 2 + score_center_size // 2), :]
Reading FASTA: 100%|██████████| 24/24 [00:09<00:00, 2.43it/s] Loading Peaks: 27000it [00:17, 1577.42it/s]
# Load in Coordinates of Examples
coords = load_coords(val_peak_path, in_window)
# Import SHAP scores, predicted profiles
hyp_scores = np.load(scores_path).swapaxes(1,2)
hyp_scores = hyp_scores[:, (in_window // 2 - score_center_size // 2):(in_window // 2 + score_center_size // 2), :]
pred_profs = np.exp(np.load(val_save_path + ".profs.npy"))
# Load modisco results object
tfm_obj = import_tfmodisco_results(modisco_obj_path, hyp_scores, one_hot_seqs)
motif_pfms, motif_hcwms, motif_cwms, \
motif_pfms_short, num_seqlets, \
motif_seqlets, num_metaclusters = plot_all_metaclusters(tfm_obj, one_hot_seqs, hyp_scores,
true_profs, pred_profs, coords,
in_window, out_window,
score_center_size,
profile_display_center_size)
9434 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
7826 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
6364 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
5823 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
3199 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
1645 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
1311 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
1282 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
1063 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
986 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
904 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
888 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
825 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
810 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
434 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
376 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
154 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
77 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
51 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
45 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
38 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
36 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
30 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
27 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
245 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
217 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
207 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
190 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
179 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
167 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
148 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
140 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
114 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
68 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
62 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
39 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
39 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
22 seqlets
Sequence (PFM) | |
Hypothetical contributions (hCWM) | |
Actual contributions (CWM) |
run_and_plot_tomtom(modisco_out_path, motif_pfms, motif_hcwms, motif_pfms_short, num_metaclusters, tomtom_dir)
Motif ID | q-val | PWM |
---|---|---|
MA0764.3 | 0.000162441 | |
MA1854.1 | 0.000162441 | |
MA0916.1 | 0.000162441 | |
MA0759.2 | 0.000162441 | |
MA0156.3 | 0.000294336 |
Motif ID | q-val | PWM |
---|---|---|
MA1513.1 | 7.05562e-05 | |
MA0742.2 | 0.000241713 | |
MA0741.1 | 0.000266099 | |
MA0685.2 | 0.000266099 | |
MA0079.5 | 0.000266099 |
Motif ID | q-val | PWM |
---|---|---|
MA0314.2 | 6.63569e-05 | |
MA0060.3 | 6.99574e-05 | |
MA1644.1 | 6.99574e-05 | |
MA0502.2 | 0.00822663 | |
MA0316.1 | 0.0200282 |
Motif ID | q-val | PWM |
---|---|---|
MA0506.2 | 0.00226446 | |
MA1826.1 | 0.280905 | |
MA0963.1 | 0.483915 | |
MA1411.1 | 0.483915 | |
MA0566.1 | 0.483915 |
Motif ID | q-val | PWM |
---|---|---|
MA0967.1 | 0.00539641 | |
MA1899.1 | 0.0204083 | |
MA1127.1 | 0.0226253 | |
MA1129.1 | 0.0226253 | |
MA1131.1 | 0.0226253 |
Motif ID | q-val | PWM |
---|---|---|
MA1573.2 | 2.18943e-09 | |
MA0088.2 | 0.0188641 | |
MA1716.1 | 0.0222196 | |
MA1625.1 | 0.258082 | |
MA0519.1 | 0.424395 |
Motif ID | q-val | PWM |
---|---|---|
MA0748.2 | 0.000291442 | |
MA0998.1 | 0.000291442 | |
MA0975.1 | 0.000682161 | |
MA0997.1 | 0.00121529 | |
MA1004.1 | 0.00124277 |
Motif ID | q-val | PWM |
---|---|---|
MA0062.3 | 1.22126e-06 | |
MA0598.3 | 3.51474e-06 | |
MA1992.1 | 3.51474e-06 | |
MA0473.3 | 6.69464e-06 | |
MA0474.3 | 6.69464e-06 |
Motif ID | q-val | PWM |
---|---|---|
MA0139.1 | 9.40082e-08 | |
MA1929.1 | 9.03979e-06 | |
MA1930.1 | 4.08527e-05 | |
MA1102.2 | 4.08527e-05 | |
MA0531.1 | 4.08527e-05 |
Motif ID | q-val | PWM |
---|---|---|
MA0501.1 | 0.000563504 | |
MA0591.1 | 0.000563504 | |
MA0150.2 | 0.000661069 | |
MA0089.2 | 0.000924499 | |
MA1448.1 | 0.00102698 |
Motif ID | q-val | PWM |
---|---|---|
MA1818.1 | 0.00155592 | |
MA1832.1 | 0.00155592 | |
MA1819.1 | 0.00155592 | |
MA1821.1 | 0.0017066 | |
MA1004.1 | 0.00309407 |
Motif ID | q-val | PWM |
---|---|---|
MA0527.1 | 0.000205718 |
Motif ID | q-val | PWM |
---|---|---|
MA0361.1 | 0.202231 | |
MA1713.1 | 0.395047 |
Motif ID | q-val | PWM |
---|---|---|
MA1053.1 | 2.36025e-05 | |
MA1051.1 | 0.000610616 | |
MA0531.1 | 0.00132933 | |
MA1102.2 | 0.00188639 | |
MA0975.1 | 0.00337625 |
Motif ID | q-val | PWM |
---|---|---|
MA0531.1 | 6.49531e-05 | |
MA0139.1 | 0.000503732 | |
MA1929.1 | 0.0014458 | |
MA1930.1 | 0.00159874 | |
MA1102.2 | 0.00177191 |
Motif ID | q-val | PWM |
---|---|---|
MA0429.1 | 0.00440271 | |
MA0361.1 | 0.017081 | |
MA0544.1 | 0.246562 |
Motif ID | q-val | PWM |
---|---|---|
MA1833.1 | 0.103082 | |
MA1820.1 | 0.103082 | |
MA1257.1 | 0.103082 | |
MA1053.1 | 0.103082 | |
MA1228.1 | 0.103082 |
Motif ID | q-val | PWM |
---|---|---|
MA0640.2 | 0.28707 | |
MA0060.3 | 0.28707 | |
MA1950.1 | 0.28707 | |
MA1946.1 | 0.28707 | |
MA0750.2 | 0.28707 |
Motif ID | q-val | PWM |
---|---|---|
MA1651.1 | 0.0302719 | |
MA1650.1 | 0.0302719 | |
MA1460.1 | 0.0800821 | |
MA1832.1 | 0.0800821 | |
MA1513.1 | 0.0800821 |
Motif ID | q-val | PWM |
---|---|---|
MA0986.1 | 0.00213016 | |
MA1475.1 | 0.00864253 | |
MA1007.1 | 0.00932555 | |
MA1023.1 | 0.00932555 | |
MA1951.1 | 0.0488629 |
Motif ID | q-val | PWM |
---|---|---|
MA1625.1 | 0.200861 | |
MA0532.1 | 0.377664 | |
MA0137.3 | 0.489665 | |
MA0519.1 | 0.489665 | |
MA0107.1 | 0.489665 |
Motif ID | q-val | PWM |
---|---|---|
MA0443.1 | 0.489425 | |
MA0197.2 | 0.489425 | |
MA2002.1 | 0.489425 | |
MA0740.2 | 0.489425 | |
MA1596.1 | 0.489425 |
Motif ID | q-val | PWM |
---|---|---|
MA0527.1 | 0.0875108 |
Motif ID | q-val | PWM |
---|---|---|
MA1066.1 | 0.180416 | |
MA1050.1 | 0.180416 | |
MA1097.1 | 0.295633 | |
MA1095.1 | 0.295633 | |
MA1098.1 | 0.295633 |
Motif ID | q-val | PWM |
---|---|---|
MA1820.1 | 0.000823024 | |
MA1819.1 | 0.0012159 | |
MA1833.1 | 0.00151157 | |
MA1513.1 | 0.00174568 | |
MA0146.2 | 0.00255733 |
Motif ID | q-val | PWM |
---|---|---|
MA1890.1 | 4.82112e-05 | |
MA1893.1 | 0.000145121 | |
MA1892.1 | 0.000145121 | |
MA1833.1 | 0.000295031 | |
MA1961.1 | 0.000541838 |
Motif ID | q-val | PWM |
---|---|---|
MA1890.1 | 6.94638e-07 | |
MA1892.1 | 7.96622e-06 | |
MA1893.1 | 1.78525e-05 | |
MA1961.1 | 3.57157e-05 | |
MA1713.1 | 0.000161049 |
Motif ID | q-val | PWM |
---|---|---|
MA1268.1 | 0.0212701 | |
MA1274.1 | 0.0212701 | |
MA1823.1 | 0.0212701 | |
MA1277.1 | 0.0212701 | |
MA1279.1 | 0.0212701 |
Motif ID | q-val | PWM |
---|---|---|
MA0538.1 | 0.000257144 | |
MA1107.2 | 0.311119 | |
MA1865.1 | 0.311119 |
Motif ID | q-val | PWM |
---|---|---|
MA1281.1 | 6.88853e-07 | |
MA1267.1 | 6.88853e-07 | |
MA1274.1 | 6.88853e-07 | |
MA1268.1 | 1.67476e-06 | |
MA1871.1 | 2.75635e-06 |
Motif ID | q-val | PWM |
---|---|---|
MA1890.1 | 1.18747e-05 | |
MA1892.1 | 0.000145535 | |
MA1893.1 | 0.000165245 | |
MA1880.1 | 0.000221964 | |
MA1833.1 | 0.000378314 |
Motif ID | q-val | PWM |
---|---|---|
MA1961.1 | 0.000168842 | |
MA1890.1 | 0.000168842 | |
MA1650.1 | 0.000168842 | |
MA1893.1 | 0.000229531 | |
MA1892.1 | 0.000259187 |
No TOMTOM matches passing threshold
Motif ID | q-val | PWM |
---|---|---|
MA1890.1 | 2.9359e-06 | |
MA1513.1 | 1.28468e-05 | |
MA1961.1 | 6.7422e-05 | |
MA1893.1 | 6.7422e-05 | |
MA1892.1 | 6.7422e-05 |
Motif ID | q-val | PWM |
---|---|---|
MA1890.1 | 5.88833e-07 | |
MA1892.1 | 2.57685e-05 | |
MA1961.1 | 2.57685e-05 | |
MA1893.1 | 2.57685e-05 | |
MA1513.1 | 0.000191278 |
Motif ID | q-val | PWM |
---|---|---|
MA0502.2 | 0.00113474 | |
MA0316.1 | 0.0113704 | |
MA0314.2 | 0.0113704 | |
MA0060.3 | 0.0275581 | |
MA1644.1 | 0.0275581 |
Motif ID | q-val | PWM |
---|---|---|
MA1115.1 | 0.465146 | |
MA0963.1 | 0.465146 | |
MA0957.1 | 0.465146 | |
MA0507.2 | 0.465146 | |
MA0322.1 | 0.465146 |
Motif ID | q-val | PWM |
---|---|---|
MA1403.1 | 2.21772e-15 | |
MA1404.1 | 6.64357e-13 | |
MA1402.1 | 1.01881e-10 | |
MA1416.1 | 6.38357e-07 | |
MA0205.2 | 0.000149371 |