import sys
import os
sys.path.append(os.path.abspath("/users/amtseng/tfmodisco/src/"))
from util import figure_to_vdom_image
import motif.read_motifs as read_motifs
from motif.read_motifs import pfm_to_pwm
import plot.viz_sequence as viz_sequence
import numpy as np
import matplotlib.pyplot as plt
import vdom.helpers as vdomh
from IPython.display import display


# Define parameters/fetch arguments
tf_name = os.environ["TFM_TF_NAME"]
multitask_fold = int(os.environ["TFM_MULTITASK_FOLD"])

if "TFM_TASK_INDEX" in os.environ:
    task_index = int(os.environ["TFM_TASK_INDEX"])
    singletask_fold = int(os.environ["TFM_SINGLETASK_FOLD"])
else:
    task_index = None
    singletask_fold = None
    
print("TF name: %s" % tf_name)
print("Multi-task fold: %s" % multitask_fold)
print("Task index: %s" % task_index)
print("Single-task fold: %s" % singletask_fold)

TF name: GABPA
Multi-task fold: 7
Task index: 4
Single-task fold: 7


# Define paths and constants
base_path = "/users/amtseng/tfmodisco/results/classic_motifs/"

multitask_seqlets_dir = os.path.join(
    base_path, "seqlets", "multitask_profile_finetune",
    "%s_multitask_profile_finetune_fold%s" % (tf_name, multitask_fold)
)

if task_index is None:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_taskall" % tf_name)
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_taskall" % tf_name
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_taskall" % tf_name
    )
else:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_task%d" % (tf_name, task_index))
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )
    
    singletask_seqlets_dir = os.path.join(
        base_path, "seqlets", "singletask_profile_finetune",
        "%s_singletask_profile_finetune_fold%s" % (tf_name, singletask_fold),
        "task_%d" % task_index
    )
    singletask_profile_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    singletask_count_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )


def show_peaks_motif_table(results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`.
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        pfms, score_vals = read_motifs.import_dichipmunk_pfms(results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        pfms, score_vals = read_motifs.import_homer_pfms(results_path)
    elif mode == "meme":
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(results_path)
    else:
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(
            os.path.join(results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name, style={"text-align": "center"}),
            vdomh.th("PWM", style={"text-align": "center"})
        )
    )

    body = []
    for i, pfm in enumerate(pfms):
        pwm = pfm_to_pwm(pfm)
        if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
            # Flip to purine-rich version
            pwm = np.flip(pwm, axis=(0, 1))
        fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
        fig.tight_layout()

        body.append(
            vdomh.tr(
                vdomh.td(str(i + 1)),
                vdomh.td(str(score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            )
        )

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")


def show_seqlets_motif_table(profile_results_path, count_results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        p_pfms, p_score_vals = read_motifs.import_dichipmunk_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_dichipmunk_pfms(count_results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        p_pfms, p_score_vals = read_motifs.import_homer_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_homer_pfms(count_results_path)
    elif mode == "meme":
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(count_results_path)
    else:
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(
            os.path.join(profile_results_path, "meme_out")
        )
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(
            os.path.join(count_results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name + " (profile)", style={"text-align": "center"}),
            vdomh.th("PWM (profile)", style={"text-align": "center"}),
            vdomh.th(score_name + " (count)", style={"text-align": "center"}),
            vdomh.th("PWM (count)", style={"text-align": "center"})
        )
    )

    body = []
    for i in range(max(len(p_pfms), len(c_pfms))):
        rows = [vdomh.td(str(i + 1))]
        if i < len(p_pfms):
            pwm = pfm_to_pwm(p_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(p_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            
        if i < len(c_pfms):
            pwm = pfm_to_pwm(c_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(c_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            

        body.append(vdomh.tr(*rows))

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")


show_peaks_motif_table(os.path.join(peaks_path, "dichipmunk"), "dichipmunk")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "dichipmunk"),
    os.path.join(multitask_count_seqlets_path, "dichipmunk"),
    "dichipmunk"
)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "dichipmunk"),
        os.path.join(singletask_count_seqlets_path, "dichipmunk"),
        "dichipmunk"
    )


show_peaks_motif_table(os.path.join(peaks_path, "homer"), "homer")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "homer"),
    os.path.join(multitask_count_seqlets_path, "homer"),
    "homer"
)

/users/amtseng/tfmodisco/src/plot/viz_sequence.py:152: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig = plt.figure(figsize=figsize)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "homer"),
        os.path.join(singletask_count_seqlets_path, "homer"),
        "homer"
    )


show_peaks_motif_table(os.path.join(peaks_path, "memechip"), "memechip")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "meme"),
    os.path.join(multitask_count_seqlets_path, "meme"),
    "meme"
)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "meme"),
        os.path.join(singletask_count_seqlets_path, "meme"),
        "meme"
    )

Motif	Supporting sequences	PWM
1	2000
2	2000
3	2000
4	1993
5	1398
6	691
7	199
8	36
9	17
10	9

Motif	Supporting sequences (profile)	Supporting sequences (count)
1	11999	12184
2	907	367
3	1847	1037

Motif	Supporting sequences (profile)	Supporting sequences (count)
1	11483	11100
2	1864	1873
3		202
4		59
5		15
6		4

Motif	Log enrichment	PWM
1	-5948.253465
2	-4641.634084
3	-2037.456047
4	-1990.123471
5	-1621.477994
6	-1130.559476
7	-867.722193
8	-799.206508
9	-789.78852
10	-721.199406
11	-646.206798
12	-614.259887
13	-522.286811
14	-449.362971
15	-349.048879
16	-243.11734
17	-202.583585
18	-185.010414
19	-141.023013

Motif	Log enrichment (profile)	Log enrichment (count)
1	-6736.814265	-13320.873569
2	-3079.128383	-3877.837343
3	-960.611248	-904.784286
4	-866.242637	-638.365927
5	-696.538982	-337.268249
6	-660.702685	-336.830939
7	-623.010219	-330.969808
8	-603.161279	-71.033188
9	-524.79096	-49.401978
10	-483.524674	-45.274078
11	-362.970099	-39.668379
12	-316.978397	-29.286586
13	-311.2087
14	-294.332833
15	-283.103882
16	-249.817387
17	-238.863154
18	-227.161886
19	-225.258232
20	-202.350006
21	-201.133371
22	-147.313491
23	-115.683435
24	-41.958787

Link to results¶

Helper functions¶

Show benchmark motifs¶

Motif	Log enrichment (profile)	Log enrichment (count)
1	-8235.705849	-10314.223998
2	-864.359966	-544.747608
3	-448.795244	-392.467246
4	-390.084181	-312.473296
5	-305.369571	-190.62723
6	-223.130437	-166.394196
7	-204.185779	-152.820352
8	-93.779785	-151.835119
9	-93.572945	-73.498474
10	-84.537168	-72.436312
11	-73.981651	-56.072287
12	-63.80065	-48.494723
13	-45.718262	-46.133838
14	-7.098996	-25.820829

Motif	E-value	PWM
1	1.2e-224
2	6.9e-74
3	4.8e-69
4	1.9e-54
5	1.4e-65
6	1.1e-38
7	3.8e-05
8	8.5e-06
9	0.00024
10	0.0067

Motif	E-value (profile)	E-value (count)
1	0.0	0.0
2	1.2e-137	9.3e-88
3	1.7e-79	1500.0
4	2.2e-14	12000.0
5	8.3e-13	3000.0
6	2.6e-07	150000.0
7	7.8e-07	190000.0
8	1.4e-05	910000.0
9	54.0	940000.0
10	170.0	1000000.0

Motif	E-value (profile)	E-value (count)
1	0.0	0.0
2	4.8e-35	260.0
3	3.4e-11	15000.0
4	1300.0	29000.0
5	4800.0	140000.0
6	6900.0	160000.0
7	14000.0	210000.0
8	7600.0	580000.0
9	36000.0	580000.0
10	54000.0	580000.0