import sys
import os
sys.path.append(os.path.abspath("/users/amtseng/tfmodisco/src/"))
from util import figure_to_vdom_image
import motif.read_motifs as read_motifs
from motif.read_motifs import pfm_to_pwm
import plot.viz_sequence as viz_sequence
import numpy as np
import matplotlib.pyplot as plt
import vdom.helpers as vdomh
from IPython.display import display


# Define parameters/fetch arguments
tf_name = os.environ["TFM_TF_NAME"]
multitask_fold = int(os.environ["TFM_MULTITASK_FOLD"])

if "TFM_TASK_INDEX" in os.environ:
    task_index = int(os.environ["TFM_TASK_INDEX"])
    singletask_fold = int(os.environ["TFM_SINGLETASK_FOLD"])
else:
    task_index = None
    singletask_fold = None
    
print("TF name: %s" % tf_name)
print("Multi-task fold: %s" % multitask_fold)
print("Task index: %s" % task_index)
print("Single-task fold: %s" % singletask_fold)

TF name: MAFK
Multi-task fold: 7
Task index: 5
Single-task fold: 9


# Define paths and constants
base_path = "/users/amtseng/tfmodisco/results/classic_motifs/"

multitask_seqlets_dir = os.path.join(
    base_path, "seqlets", "multitask_profile_finetune",
    "%s_multitask_profile_finetune_fold%s" % (tf_name, multitask_fold)
)

if task_index is None:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_taskall" % tf_name)
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_taskall" % tf_name
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_taskall" % tf_name
    )
else:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_task%d" % (tf_name, task_index))
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )
    
    singletask_seqlets_dir = os.path.join(
        base_path, "seqlets", "singletask_profile_finetune",
        "%s_singletask_profile_finetune_fold%s" % (tf_name, singletask_fold),
        "task_%d" % task_index
    )
    singletask_profile_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    singletask_count_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )


def show_peaks_motif_table(results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`.
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        pfms, score_vals = read_motifs.import_dichipmunk_pfms(results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        pfms, score_vals = read_motifs.import_homer_pfms(results_path)
    elif mode == "meme":
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(results_path)
    else:
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(
            os.path.join(results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name, style={"text-align": "center"}),
            vdomh.th("PWM", style={"text-align": "center"})
        )
    )

    body = []
    for i, pfm in enumerate(pfms):
        pwm = pfm_to_pwm(pfm)
        if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
            # Flip to purine-rich version
            pwm = np.flip(pwm, axis=(0, 1))
        fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
        fig.tight_layout()

        body.append(
            vdomh.tr(
                vdomh.td(str(i + 1)),
                vdomh.td(str(score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            )
        )

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")


def show_seqlets_motif_table(profile_results_path, count_results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        p_pfms, p_score_vals = read_motifs.import_dichipmunk_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_dichipmunk_pfms(count_results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        p_pfms, p_score_vals = read_motifs.import_homer_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_homer_pfms(count_results_path)
    elif mode == "meme":
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(count_results_path)
    else:
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(
            os.path.join(profile_results_path, "meme_out")
        )
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(
            os.path.join(count_results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name + " (profile)", style={"text-align": "center"}),
            vdomh.th("PWM (profile)", style={"text-align": "center"}),
            vdomh.th(score_name + " (count)", style={"text-align": "center"}),
            vdomh.th("PWM (count)", style={"text-align": "center"})
        )
    )

    body = []
    for i in range(max(len(p_pfms), len(c_pfms))):
        rows = [vdomh.td(str(i + 1))]
        if i < len(p_pfms):
            pwm = pfm_to_pwm(p_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(p_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            
        if i < len(c_pfms):
            pwm = pfm_to_pwm(c_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(c_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            

        body.append(vdomh.tr(*rows))

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")


show_peaks_motif_table(os.path.join(peaks_path, "dichipmunk"), "dichipmunk")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "dichipmunk"),
    os.path.join(multitask_count_seqlets_path, "dichipmunk"),
    "dichipmunk"
)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "dichipmunk"),
        os.path.join(singletask_count_seqlets_path, "dichipmunk"),
        "dichipmunk"
    )


show_peaks_motif_table(os.path.join(peaks_path, "homer"), "homer")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "homer"),
    os.path.join(multitask_count_seqlets_path, "homer"),
    "homer"
)

/users/amtseng/tfmodisco/src/plot/viz_sequence.py:152: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig = plt.figure(figsize=figsize)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "homer"),
        os.path.join(singletask_count_seqlets_path, "homer"),
        "homer"
    )


show_peaks_motif_table(os.path.join(peaks_path, "memechip"), "memechip")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "meme"),
    os.path.join(multitask_count_seqlets_path, "meme"),
    "meme"
)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "meme"),
        os.path.join(singletask_count_seqlets_path, "meme"),
        "meme"
    )

Motif	Supporting sequences	PWM
1	1445
2	1997
3	2000
4	1997
5	1926
6	1092
7	635
8	293
9	131
10	43

Motif	Supporting sequences (profile)	Supporting sequences (count)
1	15294	16326
2	1639	364
3	59	77
4		88

Motif	Supporting sequences (profile)	Supporting sequences (count)
1	15824	16411
2	1295	638
3		65
4		9

Motif	Log enrichment	PWM
1	-44178.882737
2	-6407.262417
3	-6305.112128
4	-5342.12795
5	-4241.496721
6	-3436.004115
7	-2349.833067
8	-2158.943472
9	-1946.296803
10	-1666.867323
11	-1381.814375
12	-1142.499773
13	-466.982286
14	-342.78158
15	-178.27698
16	-108.793889

Motif	Log enrichment (profile)	Log enrichment (count)
1	-16916.90938	-19402.071429
2	-6129.986492	-5296.079776
3	-4773.911505	-816.517438
4	-1787.821141	-789.302171
5	-1145.337859	-267.334884
6	-1092.4533	-245.907199
7	-903.625136	-229.122402
8	-849.010872	-192.685011
9	-783.098085	-162.290105
10	-719.916161	-72.775645
11	-701.243367	-38.193051
12	-520.365848
13	-518.498878
14	-303.386847
15	-294.528424
16	-240.434349
17	-20.517145
18	-17.599526

Link to results¶

Helper functions¶

Show benchmark motifs¶

Motif	Log enrichment (profile)	Log enrichment (count)
1	-17782.728648	-18372.854221
2	-3406.457866	-3143.741964
3	-3107.951685	-794.789619
4	-1269.820715	-220.801506
5	-1103.541209	-197.794451
6	-990.888383	-134.627019
7	-475.991345	-118.397191
8	-446.72805	-93.141857
9	-423.718007	-91.989639
10	-371.95404	-68.117722
11	-253.845072	-40.009383
12	-253.845072
13	-225.315468
14	-225.315468
15	-187.678461
16	-132.87633
17	-132.87633
18	-128.053934
19	-109.072712
20	-61.033618
21	-39.634429

Motif	E-value	PWM
1	0.0
2	6.4e-29
3	2.2e-23
4	7.2e-20
5	5.6e-11
6	2.4e-10
7	2.5e-06
8	0.00055
9	0.0046
10	1200.0

Motif	E-value (profile)	E-value (count)
1	0.0	0.0
2	6e-48	2.2e-16
3	3.6e-41	2.4e-06
4	2.1e-33	1200.0
5	4.4e-12	510000.0
6	7.4e-10	830000.0
7	6.6e-07	1000000.0
8	8.6e-06	1000000.0
9	0.00077	1100000.0
10	0.075	1100000.0

Motif	E-value (profile)	E-value (count)
1	0.0	0.0
2	1.5e-53	3.1e-48
3	2.4e-23	16000.0
4	2.4e-23	1900000.0
5	2.4e-23	2000000.0
6	2.4e-23	2200000.0
7	2.4e-23	2300000.0
8	2.4e-23	2300000.0
9	2.4e-23	2600000.0
10	2.4e-23	3000000.0