Goal¶

compute the cutoff within each modisco cluster

Conclusions¶

The 'high' cutoff was at the lowest 5% percentile of the distribution
need to modify the code

TODO¶

[x] append the percentile scores to the table
[x] determine the cuttofs: low, medium, high based on [0-33, 33-66, 66-100] ranges
- (discard seq_match < 0 for the motifs)
[ ] add profile scoring to the funcitons
- figure out which metric to use?
- [ ] add profile match as well as total counts (also add the quantiles comparing both to the seqlet distribution)
[x] for each modisco run, pre-compute these thresholds (make a CLI, embed into the original code)

from basepair.imports import *
from basepair.samplers import top_sum_count, top_max_count
from basepair.plot.tracks import plot_tracks, filter_tracks
from kipoi.data_utils import get_dataset_item
from basepair.modisco.table import ModiscoData
from plotnine import *
import plotnine

model_dir = Path(f"{ddir}/processed//chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")

modisco_dir = model_dir / "modisco/by_peak_tasks/weighted/Oct4"

mr = ModiscoResult(modisco_dir / "modisco.h5")
mr.open()

# load all of modisco data
md = ModiscoData.load(modisco_dir, model_dir / "grad.all.h5")

100%|██████████| 54/54 [13:47<00:00, 15.32s/it]

# load all of modisco data
mr = ModiscoResult(model_dir / "modisco/valid/modisco.h5")
mr.open()

model_dir = Path(f"{ddir}/processed//chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")

modisco_dir = model_dir / "modisco/by_peak_tasks/weighted/Nanog"

dfm = md.get_centroid_seqlet_matches

  0%|          | 0/60 [00:00<?, ?it/s]
  2%|▏         | 1/60 [00:16<16:35, 16.88s/it]
  3%|▎         | 2/60 [00:32<15:37, 16.17s/it]
  5%|▌         | 3/60 [00:47<14:54, 15.70s/it]
  7%|▋         | 4/60 [01:01<14:27, 15.48s/it]
  8%|▊         | 5/60 [01:16<13:59, 15.27s/it]
 10%|█         | 6/60 [01:30<13:38, 15.16s/it]
 12%|█▏        | 7/60 [01:45<13:19, 15.08s/it]
 13%|█▎        | 8/60 [02:01<13:09, 15.18s/it]
 15%|█▌        | 9/60 [02:16<12:52, 15.14s/it]
 17%|█▋        | 10/60 [02:30<12:34, 15.08s/it]
 18%|█▊        | 11/60 [02:45<12:16, 15.03s/it]
 20%|██        | 12/60 [02:59<11:58, 14.97s/it]
 22%|██▏       | 13/60 [03:13<11:41, 14.92s/it]
 23%|██▎       | 14/60 [03:28<11:25, 14.91s/it]
 25%|██▌       | 15/60 [03:43<11:09, 14.88s/it]
 27%|██▋       | 16/60 [03:57<10:53, 14.86s/it]
 28%|██▊       | 17/60 [04:11<10:37, 14.82s/it]
 30%|███       | 18/60 [04:26<10:21, 14.80s/it]
 32%|███▏      | 19/60 [04:40<10:05, 14.77s/it]
 33%|███▎      | 20/60 [04:55<09:51, 14.79s/it]
 35%|███▌      | 21/60 [05:11<09:37, 14.81s/it]
 37%|███▋      | 22/60 [05:25<09:22, 14.81s/it]
 38%|███▊      | 23/60 [05:40<09:07, 14.79s/it]
 40%|████      | 24/60 [05:55<08:52, 14.79s/it]
 42%|████▏     | 25/60 [06:08<08:36, 14.76s/it]
 43%|████▎     | 26/60 [06:23<08:21, 14.75s/it]
 45%|████▌     | 27/60 [06:37<08:06, 14.74s/it]
 47%|████▋     | 28/60 [06:52<07:51, 14.72s/it]
 48%|████▊     | 29/60 [07:06<07:36, 14.72s/it]
 50%|█████     | 30/60 [07:20<07:20, 14.70s/it]
 52%|█████▏    | 31/60 [07:35<07:06, 14.70s/it]
 53%|█████▎    | 32/60 [07:49<06:51, 14.68s/it]
 55%|█████▌    | 33/60 [08:04<06:36, 14.69s/it]
 57%|█████▋    | 34/60 [08:18<06:21, 14.67s/it]
 58%|█████▊    | 35/60 [08:33<06:06, 14.66s/it]
 60%|██████    | 36/60 [08:47<05:51, 14.65s/it]
 62%|██████▏   | 37/60 [09:01<05:36, 14.65s/it]
 63%|██████▎   | 38/60 [09:16<05:22, 14.65s/it]
 65%|██████▌   | 39/60 [09:30<05:07, 14.63s/it]
 67%|██████▋   | 40/60 [09:45<04:52, 14.63s/it]
 68%|██████▊   | 41/60 [09:59<04:37, 14.62s/it]
 70%|███████   | 42/60 [10:14<04:23, 14.62s/it]
 72%|███████▏  | 43/60 [10:28<04:08, 14.61s/it]
 73%|███████▎  | 44/60 [10:42<03:53, 14.61s/it]
 75%|███████▌  | 45/60 [10:57<03:39, 14.60s/it]
 77%|███████▋  | 46/60 [11:12<03:24, 14.63s/it]
 78%|███████▊  | 47/60 [11:28<03:10, 14.64s/it]
 80%|████████  | 48/60 [11:43<02:55, 14.65s/it]
 82%|████████▏ | 49/60 [11:57<02:41, 14.64s/it]
 83%|████████▎ | 50/60 [12:12<02:26, 14.65s/it]
 85%|████████▌ | 51/60 [12:27<02:11, 14.66s/it]
 87%|████████▋ | 52/60 [12:42<01:57, 14.67s/it]
 88%|████████▊ | 53/60 [12:56<01:42, 14.65s/it]
 90%|█████████ | 54/60 [13:11<01:27, 14.65s/it]
 92%|█████████▏| 55/60 [13:25<01:13, 14.64s/it]
 93%|█████████▎| 56/60 [13:40<00:58, 14.65s/it]
 95%|█████████▌| 57/60 [13:55<00:43, 14.66s/it]
 97%|█████████▋| 58/60 [14:11<00:29, 14.68s/it]
 98%|█████████▊| 59/60 [14:26<00:14, 14.68s/it]
100%|██████████| 60/60 [14:40<00:00, 14.68s/it]

dfm =

dfm.head()

# TODO - get the whole table

dfm = md.get_centroid_seqlet_matches()


  0%|          | 0/60 [00:00<?, ?it/s]

  2%|▏         | 1/60 [00:03<03:41,  3.75s/it]

  3%|▎         | 2/60 [00:05<02:27,  2.54s/it]

  5%|▌         | 3/60 [00:05<01:52,  1.97s/it]

  7%|▋         | 4/60 [00:06<01:34,  1.68s/it]

  8%|▊         | 5/60 [00:07<01:17,  1.41s/it]

 10%|█         | 6/60 [00:07<01:05,  1.21s/it]

 12%|█▏        | 7/60 [00:07<00:57,  1.08s/it]

 13%|█▎        | 8/60 [00:07<00:50,  1.03it/s]

 15%|█▌        | 9/60 [00:07<00:44,  1.13it/s]

 17%|█▋        | 10/60 [00:08<00:40,  1.24it/s]

 18%|█▊        | 11/60 [00:08<00:36,  1.34it/s]

 20%|██        | 12/60 [00:08<00:33,  1.43it/s]

 23%|██▎       | 14/60 [00:08<00:28,  1.64it/s]

 27%|██▋       | 16/60 [00:08<00:23,  1.83it/s]

 28%|██▊       | 17/60 [00:10<00:26,  1.65it/s]

 30%|███       | 18/60 [00:10<00:24,  1.69it/s]

 32%|███▏      | 19/60 [00:10<00:23,  1.76it/s]

 33%|███▎      | 20/60 [00:10<00:21,  1.83it/s]

 35%|███▌      | 21/60 [00:11<00:20,  1.90it/s]

 37%|███▋      | 22/60 [00:11<00:19,  1.98it/s]

 38%|███▊      | 23/60 [00:11<00:18,  2.04it/s]

 40%|████      | 24/60 [00:11<00:17,  2.11it/s]

 43%|████▎     | 26/60 [00:12<00:16,  2.11it/s]

 45%|████▌     | 27/60 [00:12<00:15,  2.13it/s]

 47%|████▋     | 28/60 [00:12<00:14,  2.17it/s]

 48%|████▊     | 29/60 [00:13<00:14,  2.21it/s]

 52%|█████▏    | 31/60 [00:14<00:13,  2.15it/s]
 53%|█████▎    | 32/60 [00:15<00:13,  2.09it/s]
 55%|█████▌    | 33/60 [00:15<00:12,  2.10it/s]
 57%|█████▋    | 34/60 [00:15<00:12,  2.14it/s]
 58%|█████▊    | 35/60 [00:16<00:11,  2.17it/s]
 60%|██████    | 36/60 [00:16<00:10,  2.21it/s]
 62%|██████▏   | 37/60 [00:16<00:10,  2.24it/s]
 63%|██████▎   | 38/60 [00:16<00:09,  2.29it/s]
 65%|██████▌   | 39/60 [00:16<00:09,  2.33it/s]
 67%|██████▋   | 40/60 [00:17<00:08,  2.30it/s]
 68%|██████▊   | 41/60 [00:17<00:08,  2.29it/s]
 70%|███████   | 42/60 [00:18<00:07,  2.33it/s]
 72%|███████▏  | 43/60 [00:18<00:07,  2.37it/s]
 73%|███████▎  | 44/60 [00:18<00:06,  2.40it/s]
 75%|███████▌  | 45/60 [00:18<00:06,  2.44it/s]
 77%|███████▋  | 46/60 [00:18<00:05,  2.48it/s]
 78%|███████▊  | 47/60 [00:18<00:05,  2.48it/s]
 80%|████████  | 48/60 [00:19<00:04,  2.50it/s]
 82%|████████▏ | 49/60 [00:19<00:04,  2.53it/s]
 83%|████████▎ | 50/60 [00:19<00:03,  2.56it/s]
 85%|████████▌ | 51/60 [00:19<00:03,  2.58it/s]
 87%|████████▋ | 52/60 [00:19<00:03,  2.61it/s]
 88%|████████▊ | 53/60 [00:20<00:02,  2.64it/s]
 90%|█████████ | 54/60 [00:20<00:02,  2.67it/s]
 93%|█████████▎| 56/60 [00:20<00:01,  2.75it/s]
 95%|█████████▌| 57/60 [00:20<00:01,  2.74it/s]
 97%|█████████▋| 58/60 [00:20<00:00,  2.78it/s]
 98%|█████████▊| 59/60 [00:21<00:00,  2.81it/s]
100%|██████████| 60/60 [00:21<00:00,  2.84it/s]

dfm.to_csv(modisco_dir / "centroid_seqlet_matches.csv.bak")

pattern = 'metacluster_0/pattern_0'

pattern_names = [
    ("Oct4-Sox2", "metacluster_0/pattern_0"),
    ("Errb", "metacluster_0/pattern_1"),
    ("Sox2", "metacluster_0/pattern_2"),
    ("Nanog", "metacluster_0/pattern_3"),
    ("Klf4", "metacluster_2/pattern_0"),
]

Load instances data¶

cached = True

cache_dir = Path(Path(ddir) / "cache/dev/")
cache_dir.mkdir(parents=True, exist_ok=True)
cache_file = cache_dir / "hit-scoring-contrib-only.hdf5"

dfp = pd.read_hdf(cache_file, "/scores")

# add some additional features to dfp
dfp['idx'] = np.arange(len(dfp))
dfp = dfp[dfp.score_seq_match > 0]
# -----------------------------------------------------------------


dfp['log_imp_weighted'] = np.log(1+dfp['imp_weighted'])
dfp['log_match_weighted'] = np.log(1+dfp['match_weighted'])

# partition the scores into 4 categories (according to the median)
dfp_medians = pd.DataFrame({'log_match_weighted': dfp.groupby("tf").log_match_weighted.median(),
                            'log_imp_weighted': dfp.groupby("tf").log_imp_weighted.median()}).reset_index()

log_imp_weighted_median = dfp.tf.map(dfp.groupby("tf").log_imp_weighted.median())
log_match_weighted_median = dfp.tf.map(dfp.groupby("tf").log_match_weighted.median())
dfp['imp_cat'] = (dfp.log_imp_weighted  > log_imp_weighted_median).map({False: 'low', True: 'high'})
dfp['match_cat'] = (dfp.log_match_weighted  > log_match_weighted_median).map({False: 'low', True: 'high'})

dfp_subset = dfp.sample(10000)  # focus on the subset of the data

Steps¶

trim the motif
get the matrix of:
- sequences
- profiles
- contrib scores
scan these sequences using your scores
Plot the histogram of scores
Compare these to the obtained scores

pattern_name = 'metacluster_0/pattern_0'

1. Get an trim the motif¶

pattern = md.mr.get_pattern(pattern_name).trim_seq_ic(0.08)

2. Get the matrix of¶

sequences
profiles
contrib scores

task = 'Oct4'

i, j = md.get_trim_idx(pattern_name)

seq = md.get_seq(pattern_name)[:, i:j]
profile = {task: md.get_profile_wide(pattern_name, task) for task in tasks}
contrib = {task: md.get_imp(pattern_name, task, 'profile')[:, i:j] for task in tasks}

plot_tracks(dict(recomputed=contrib[task].mean(axis=0), orig=pattern.contrib[task]));

from basepair.plot.heatmaps import heatmap_stranded_profile, multiple_heatmap_stranded_profile
from basepair.plot.profiles import plot_stranded_profile

plot_stranded_profile(profile[task].mean(axis=0))

heatmap_stranded_profile(profile[task][np.argsort(-profile[task].sum((1,2)))]);

3. Scan for patterns¶

tasks = md.tasks

match, importance = pattern.scan_importance(contrib, hyp_contrib=None, tasks=tasks, 
                                            n_jobs=1, verbose=True, pad_mode=None)
seq_match = pattern.scan_seq(seq, n_jobs=1, verbose=True, pad_mode=None)

100%|██████████| 5955/5955 [00:00<00:00, 6225.50it/s]
100%|██████████| 5955/5955 [00:00<00:00, 7462.36it/s]
100%|██████████| 5955/5955 [00:00<00:00, 8082.69it/s]
100%|██████████| 5955/5955 [00:00<00:00, 9301.89it/s]
100%|██████████| 5955/5955 [00:00<00:00, 9288.74it/s]
100%|██████████| 5955/5955 [00:00<00:00, 7868.67it/s]
100%|██████████| 5955/5955 [00:00<00:00, 7039.47it/s]
100%|██████████| 5955/5955 [00:00<00:00, 6845.75it/s]
100%|██████████| 5955/5955 [00:00<00:00, 9106.38it/s]
100%|██████████| 5955/5955 [00:00<00:00, 12910.01it/s]

dfm = pattern.get_instances(tasks, match, importance, seq_match, fdr=1, verbose=True)

Keeping 5954/5955 (99.98%) of the instances. Match score cutoff=0.111

print(f"Discarding {np.mean(dfm.score_seq_match < 0):.2%} of points with poor sequence match")

Discarding 0.74% of points with poor sequence match

dfm = dfm[dfm.score_seq_match > 0]

dfm.head()

4. Plot the histogram¶

fig ,axes = plt.subplots(1, 3, figsize=(12, 3))
dfm.match_weighted.plot.hist(100, ax=axes[0]);
axes[0].set_xlabel("Match");
dfm.imp_weighted.plot.hist(100, ax=axes[1]);
axes[1].set_xlabel("Importance");
dfm.score_seq_match.plot.hist(100, ax=axes[2]);
axes[2].set_xlabel("Sequence match");
plt.tight_layout()

5. Compare the previous¶

fig ,axes = plt.subplots(1, 3, figsize=(12, 3))
dfmp = dfp_subset[(dfp_subset.pattern == pattern.name) & (dfp_subset.match_weighted > 0.4)]
dfmp = dfp[(dfp.pattern == pattern.name) & (dfp.match_weighted > 0.2)]
dfmp.match_weighted.plot.hist(100, ax=axes[0]);
axes[0].set_xlabel("Match");
dfmp.imp_weighted.plot.hist(100, ax=axes[1]);
axes[1].set_xlabel("Importance");
dfmp.score_seq_match.plot.hist(100, ax=axes[2]);
axes[2].set_xlabel("Sequence match");
plt.tight_layout()

5. Wrap into a function. Run for all patterns of interest¶

from tqdm import tqdm

def get_all_dfm_seqlets(md, trim_frac=0.08, n_jobs=1):
    return pd.concat([get_dfm_seqlets(md, pattern, trim_frac, n_jobs) 
                      for pattern in tqdm(md.mr.patterns())])

def get_dfm_seqlets(md, pattern_name, trim_frac=0.08, n_jobs=1, verbose=False):
    """
    Args:
      md: ModiscoData
      pattern_name
    """
    tasks = md.tasks

    pattern = md.mr.get_pattern(pattern_name).trim_seq_ic(trim_frac)

    i, j = md.get_trim_idx(pattern_name)

    seq = md.get_seq(pattern_name)[:, i:j]
    profile = {task: md.get_profile_wide(pattern_name, task) for task in tasks}
    contrib = {task: md.get_imp(pattern_name, task, 'profile')[:, i:j] for task in tasks}


    match, importance = pattern.scan_importance(contrib, hyp_contrib=None, tasks=tasks, 
                                                n_jobs=n_jobs, verbose=False, pad_mode=None)
    seq_match = pattern.scan_seq(seq, n_jobs=n_jobs, verbose=False, pad_mode=None)

    dfm = pattern.get_instances(tasks, match, importance, seq_match, fdr=1, verbose=verbose, plot=verbose)
    dfm = dfm[dfm.score_seq_match > 0]
    return dfm

d = pd.Series(np.arange(10))

quantiles = [0, .33, .66, 1.0]
labels = ['low', 'medium', 'high']

percentile_steps = np.arange(0, 1.01, 0.01)

labels = percentile_steps[1:]

dfm = pd.read_csv(f"{modisco_dir}/../Nanog/centroid_seqlet_matches.csv")

q = d.quantile(quantiles)

pd.cut(d, q.values, labels=labels, include_lowest=True, right=True)

0    0.01
1    0.12
2    0.23
3    0.34
4    0.45
5    0.56
6    0.67
7    0.78
8    0.89
9    1.00
dtype: category
Categories (100, float64): [0.01 < 0.02 < 0.03 < 0.04 ... 0.97 < 0.98 < 0.99 < 1.00]

# TODO - get the quantile compared to another distribution - p_seqlet

d.buck

dfm = get_all_dfm_seqlets(md)

100%|██████████| 54/54 [00:23<00:00,  2.29it/s]

md.mr.fpath

PosixPath('/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/by_peak_tasks/weighted/Oct4/modisco.h5')

dfm.tail()

def plot(dfm_seqlets, dfm_orig, title):
    dfm = dfm_seqlets
    fig ,axes = plt.subplots(1, 3, figsize=(12, 3))
    dfm.match_weighted.plot.hist(100, ax=axes[0]);
    axes[0].set_xlabel("Match");
    axes[0].set_title(title);
    dfm.imp_weighted.plot.hist(100, ax=axes[1]);
    axes[1].set_xlabel("Importance");
    dfm.score_seq_match.plot.hist(100, ax=axes[2]);
    axes[2].set_xlabel("Sequence match");
    plt.tight_layout()

    fig ,axes = plt.subplots(1, 3, figsize=(12, 3))
    dfmp = dfm_orig
    if len(dfmp) == 0:
        import pdb
        pdb.set_trace()
    threshold = dfmp[dfmp['match_cat']=='high'].match_weighted.min()
    dfmp.match_weighted.plot.hist(100, ax=axes[0]);
    axes[0].set_xlabel("Match");
    axes[0].axvline(threshold, color='red')
    dfmp.imp_weighted.plot.hist(100, ax=axes[1]);
    threshold = dfmp[dfmp['imp_cat']=='high'].match_weighted.min()
    axes[1].set_xlabel("Importance");
    axes[1].axvline(threshold, color='red')
    dfmp.score_seq_match.plot.hist(100, ax=axes[2]);
    axes[2].set_xlabel("Sequence match");
    plt.tight_layout()

pattern_names = [
    ("Oct4-Sox2", "metacluster_0/pattern_0"),
    ("Errb", "metacluster_0/pattern_1"),
    ("Sox2", "metacluster_0/pattern_2"),
    ("Nanog", "metacluster_0/pattern_3"),
    ("Klf4", "metacluster_2/pattern_0"),
]

for motif, pattern_name in pattern_names:
    print(motif)
    plot(get_dfm_seqlets(md, pattern_name, verbose=True), dfp_subset[(dfp_subset.pattern == pattern_name)], motif)

100%|██████████| 5955/5955 [00:00<00:00, 70901.42it/s]

Oct4-Sox2

100%|██████████| 5955/5955 [00:00<00:00, 48874.33it/s]
100%|██████████| 5955/5955 [00:00<00:00, 69338.98it/s]
100%|██████████| 5955/5955 [00:00<00:00, 42762.16it/s]
100%|██████████| 5955/5955 [00:00<00:00, 44008.37it/s]
100%|██████████| 5955/5955 [00:00<00:00, 29212.89it/s]
100%|██████████| 5955/5955 [00:00<00:00, 42811.78it/s]
100%|██████████| 5955/5955 [00:00<00:00, 47785.01it/s]
100%|██████████| 5955/5955 [00:00<00:00, 38226.21it/s]
100%|██████████| 5955/5955 [00:00<00:00, 70350.84it/s]

Keeping 5954/5955 (99.98%) of the instances. Match score cutoff=0.111

100%|██████████| 1314/1314 [00:00<00:00, 51791.26it/s]
100%|██████████| 1314/1314 [00:00<00:00, 46739.34it/s]
  0%|          | 0/1314 [00:00<?, ?it/s]

Errb

100%|██████████| 1314/1314 [00:00<00:00, 45565.38it/s]
100%|██████████| 1314/1314 [00:00<00:00, 73497.93it/s]
100%|██████████| 1314/1314 [00:00<00:00, 53079.67it/s]
100%|██████████| 1314/1314 [00:00<00:00, 25914.14it/s]
100%|██████████| 1314/1314 [00:00<00:00, 27747.90it/s]
100%|██████████| 1314/1314 [00:00<00:00, 35330.30it/s]
100%|██████████| 1314/1314 [00:00<00:00, 56762.09it/s]
100%|██████████| 1314/1314 [00:00<00:00, 31450.46it/s]

Keeping 1313/1314 (99.92%) of the instances. Match score cutoff=0.074

100%|██████████| 1329/1329 [00:00<00:00, 38516.81it/s]
100%|██████████| 1329/1329 [00:00<00:00, 27655.57it/s]

Sox2

thresholds = {"Klf4": 0.55,
              "Nanog": 0.45,
              "Sox2": 0.55,
              "Errb": 0.4,
              "Oct4-Sox2": 0.4
               }

	pattern	example_idx	pattern_start	pattern_end	strand	pattern_len	match_weighted	match_max	match_max_task	...	imp_max_task	score_seq_match	match/Klf4	match/Nanog	match/Oct4	match/Sox2	imp/Klf4	imp/Nanog	imp/Oct4	imp/Sox2
0	metacluster_0/pattern_0	0	-8	8	+	16	0.487724	0.513217	Sox2	...	Oct4	11.178333	0.483047	0.442861	0.493208	0.513217	1.058072	1.906993	2.764660	2.264558
1	metacluster_0/pattern_0	1	-8	8	+	16	0.350898	0.408190	Oct4	...	Nanog	9.006646	0.337574	0.265708	0.408190	0.337674	1.315062	2.453553	2.082406	1.651657
2	metacluster_0/pattern_0	2	-8	8	+	16	0.517257	0.564088	Oct4	...	Oct4	10.684251	0.519648	0.369167	0.564088	0.550696	0.916660	1.525295	2.227583	1.961780
3	metacluster_0/pattern_0	3	-8	8	+	16	0.496772	0.513121	Sox2	...	Oct4	10.505890	0.503235	0.478229	0.490962	0.513121	1.042461	1.778113	2.363888	2.278919
4	metacluster_0/pattern_0	4	-8	8	+	16	0.506477	0.533154	Oct4	...	Oct4	10.280456	0.485818	0.448926	0.533154	0.521412	1.316672	1.644195	2.220031	1.952706

	pattern	example_idx	pattern_start	pattern_end	strand	pattern_len	match_weighted	match_max	match_max_task	...	imp_max_task	score_seq_match	match/Klf4	match/Nanog	match/Oct4	match/Sox2	imp/Klf4	imp/Nanog	imp/Oct4	imp/Sox2
121	metacluster_11/pattern_2	122	-6	6	+	12	0.484995	0.509699	Sox2	...	Oct4	7.278087	0.480731	0.408242	0.488906	0.509699	0.208588	0.068114	0.371552	0.150789
122	metacluster_11/pattern_2	123	-6	6	+	12	0.467350	0.496965	Sox2	...	Oct4	7.278087	0.479214	0.210490	0.495692	0.496965	0.268136	0.027567	0.307969	0.157112
123	metacluster_11/pattern_2	124	-6	6	+	12	0.375856	0.403317	Oct4	...	Oct4	4.190557	0.333480	0.327003	0.403317	0.376471	0.167445	0.105872	0.273530	0.187099
124	metacluster_11/pattern_2	125	-6	6	+	12	0.449034	0.495944	Oct4	...	Oct4	9.105741	0.365463	0.337639	0.495944	0.470293	0.132499	0.104938	0.184634	0.112395
125	metacluster_11/pattern_2	126	-6	6	+	12	0.405049	0.463169	Sox2	...	Oct4	7.278087	0.453454	-0.027749	0.436225	0.463169	0.174201	0.050294	0.193195	0.108383