import sys
import os
sys.path.append(os.path.abspath("/users/amtseng/tfmodisco/src/"))
from util import figure_to_vdom_image
import motif.read_motifs as read_motifs
from motif.read_motifs import pfm_to_pwm
import plot.viz_sequence as viz_sequence
import numpy as np
import matplotlib.pyplot as plt
import vdom.helpers as vdomh
from IPython.display import display


# Define parameters/fetch arguments
tf_name = os.environ["TFM_TF_NAME"]
multitask_fold = int(os.environ["TFM_MULTITASK_FOLD"])

if "TFM_TASK_INDEX" in os.environ:
    task_index = int(os.environ["TFM_TASK_INDEX"])
    singletask_fold = int(os.environ["TFM_SINGLETASK_FOLD"])
else:
    task_index = None
    singletask_fold = None
    
print("TF name: %s" % tf_name)
print("Multi-task fold: %s" % multitask_fold)
print("Task index: %s" % task_index)
print("Single-task fold: %s" % singletask_fold)

TF name: GABPA
Multi-task fold: 7
Task index: 6
Single-task fold: 1


# Define paths and constants
base_path = "/users/amtseng/tfmodisco/results/classic_motifs/"

multitask_seqlets_dir = os.path.join(
    base_path, "seqlets", "multitask_profile_finetune",
    "%s_multitask_profile_finetune_fold%s" % (tf_name, multitask_fold)
)

if task_index is None:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_taskall" % tf_name)
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_taskall" % tf_name
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_taskall" % tf_name
    )
else:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_task%d" % (tf_name, task_index))
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )
    
    singletask_seqlets_dir = os.path.join(
        base_path, "seqlets", "singletask_profile_finetune",
        "%s_singletask_profile_finetune_fold%s" % (tf_name, singletask_fold),
        "task_%d" % task_index
    )
    singletask_profile_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    singletask_count_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )


def show_peaks_motif_table(results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`.
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        pfms, score_vals = read_motifs.import_dichipmunk_pfms(results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        pfms, score_vals = read_motifs.import_homer_pfms(results_path)
    elif mode == "meme":
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(results_path)
    else:
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(
            os.path.join(results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name, style={"text-align": "center"}),
            vdomh.th("PWM", style={"text-align": "center"})
        )
    )

    body = []
    for i, pfm in enumerate(pfms):
        pwm = pfm_to_pwm(pfm)
        if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
            # Flip to purine-rich version
            pwm = np.flip(pwm, axis=(0, 1))
        fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
        fig.tight_layout()

        body.append(
            vdomh.tr(
                vdomh.td(str(i + 1)),
                vdomh.td(str(score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            )
        )

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")


def show_seqlets_motif_table(profile_results_path, count_results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        p_pfms, p_score_vals = read_motifs.import_dichipmunk_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_dichipmunk_pfms(count_results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        p_pfms, p_score_vals = read_motifs.import_homer_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_homer_pfms(count_results_path)
    elif mode == "meme":
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(count_results_path)
    else:
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(
            os.path.join(profile_results_path, "meme_out")
        )
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(
            os.path.join(count_results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name + " (profile)", style={"text-align": "center"}),
            vdomh.th("PWM (profile)", style={"text-align": "center"}),
            vdomh.th(score_name + " (count)", style={"text-align": "center"}),
            vdomh.th("PWM (count)", style={"text-align": "center"})
        )
    )

    body = []
    for i in range(max(len(p_pfms), len(c_pfms))):
        rows = [vdomh.td(str(i + 1))]
        if i < len(p_pfms):
            pwm = pfm_to_pwm(p_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(p_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            
        if i < len(c_pfms):
            pwm = pfm_to_pwm(c_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(c_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            

        body.append(vdomh.tr(*rows))

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")


show_peaks_motif_table(os.path.join(peaks_path, "dichipmunk"), "dichipmunk")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "dichipmunk"),
    os.path.join(multitask_count_seqlets_path, "dichipmunk"),
    "dichipmunk"
)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "dichipmunk"),
        os.path.join(singletask_count_seqlets_path, "dichipmunk"),
        "dichipmunk"
    )


show_peaks_motif_table(os.path.join(peaks_path, "homer"), "homer")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "homer"),
    os.path.join(multitask_count_seqlets_path, "homer"),
    "homer"
)

/users/amtseng/tfmodisco/src/plot/viz_sequence.py:152: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig = plt.figure(figsize=figsize)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "homer"),
        os.path.join(singletask_count_seqlets_path, "homer"),
        "homer"
    )


show_peaks_motif_table(os.path.join(peaks_path, "memechip"), "memechip")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "meme"),
    os.path.join(multitask_count_seqlets_path, "meme"),
    "meme"
)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "meme"),
        os.path.join(singletask_count_seqlets_path, "meme"),
        "meme"
    )

Motif	Supporting sequences	PWM
1	2000
2	2000
3	2000
4	2000
5	1558
6	1105
7	493
8	212
9	119
10	19

Motif	Supporting sequences (profile)	Supporting sequences (count)
1	11697	10096
2	4928	344
3		628
4		2

Motif	Supporting sequences (profile)	Supporting sequences (count)
1	14797	15802
2	1364	1271
3	423	153
4		69
5		28
6		15

Motif	Log enrichment	PWM
1	-7025.960047
2	-3109.403489
3	-2877.848195
4	-2738.831672
5	-1988.416946
6	-1770.080232
7	-1591.38706
8	-1353.430446
9	-1345.511865
10	-1334.542853
11	-965.569012
12	-871.517103
13	-868.532955
14	-854.468978
15	-815.934739
16	-801.757654
17	-566.634192
18	-344.018989
19	-234.21327
20	-208.236916

Motif	Log enrichment (profile)	Log enrichment (count)
1	-8393.654678	-14558.697961
2	-3386.134064	-3131.073495
3	-1134.70508	-599.019754
4	-1001.497101	-510.47697
5	-831.068024	-357.412819
6	-709.408132	-306.71548
7	-661.974125	-132.719807
8	-368.984008	-61.855898
9	-339.499751	-60.727885
10	-163.926783	-57.562632
11	-154.304563	-45.075549
12	-143.592172	-45.039976
13	-111.452166	-36.557142
14	-104.724721
15	-95.209835

Link to results¶

Helper functions¶

Show benchmark motifs¶

Motif	Log enrichment (profile)	Log enrichment (count)
1	-11714.999656	-16071.112993
2	-2951.541607	-3644.097702
3	-1904.906974	-3504.521267
4	-1545.261506	-2764.645413
5	-1506.644505	-1072.003693
6	-941.820815	-772.459373
7	-890.817509	-405.611921
8	-448.20169	-256.690076
9	-355.940165	-163.064612
10	-104.533568	-140.904283
11	-88.681534	-110.054801
12	-29.866989

Motif	E-value	PWM
1	1.3e-183
2	9.2e-80
3	1.6e-75
4	5.6e-25
5	6.5e-25
6	2.9e-22
7	2.3e-10
8	3.1e-06
9	2.2e-06
10	0.00028

Motif	E-value (profile)	E-value (count)
1	0.0	0.0
2	4.3e-89	2.9e-105
3	2.6e-80	1.6e-16
4	3e-78	0.2
5	8.4e-75	200000.0
6	1.5e-21	210000.0
7	4.4e-07	280000.0
8	8.7e-05	360000.0
9	6.3e-06	360000.0
10	0.35	380000.0

Motif	E-value (profile)	E-value (count)
1	0.0	0.0
2	3.8e-118	2.3e-58
3	2e-29	1e-61
4	7.5e-28	2.1e-37
5	3.7e-05	8100.0
6	4.4e-05	100000.0
7	1.9e-07	380000.0
8	280.0	820000.0
9	4300.0	2300000.0
10	17000.0	2400000.0