import sys
import os
sys.path.append(os.path.abspath("/users/amtseng/tfmodisco/src/"))
from util import figure_to_vdom_image
import motif.read_motifs as read_motifs
from motif.read_motifs import pfm_to_pwm
import plot.viz_sequence as viz_sequence
import numpy as np
import matplotlib.pyplot as plt
import vdom.helpers as vdomh
from IPython.display import display


# Define parameters/fetch arguments
tf_name = os.environ["TFM_TF_NAME"]
multitask_fold = int(os.environ["TFM_MULTITASK_FOLD"])

if "TFM_TASK_INDEX" in os.environ:
    task_index = int(os.environ["TFM_TASK_INDEX"])
    singletask_fold = int(os.environ["TFM_SINGLETASK_FOLD"])
else:
    task_index = None
    singletask_fold = None
    
print("TF name: %s" % tf_name)
print("Multi-task fold: %s" % multitask_fold)
print("Task index: %s" % task_index)
print("Single-task fold: %s" % singletask_fold)

TF name: JUND
Multi-task fold: 7
Task index: 3
Single-task fold: 7


# Define paths and constants
base_path = "/users/amtseng/tfmodisco/results/classic_motifs/"

multitask_seqlets_dir = os.path.join(
    base_path, "seqlets", "multitask_profile_finetune",
    "%s_multitask_profile_finetune_fold%s" % (tf_name, multitask_fold)
)

if task_index is None:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_taskall" % tf_name)
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_taskall" % tf_name
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_taskall" % tf_name
    )
else:
    peaks_path = os.path.join(base_path, "peaks", tf_name, "%s_peaks_task%d" % (tf_name, task_index))
    multitask_profile_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    multitask_count_seqlets_path = os.path.join(
        multitask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )
    
    singletask_seqlets_dir = os.path.join(
        base_path, "seqlets", "singletask_profile_finetune",
        "%s_singletask_profile_finetune_fold%s" % (tf_name, singletask_fold),
        "task_%d" % task_index
    )
    singletask_profile_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_profile_task%d" % (tf_name, task_index)
    )
    singletask_count_seqlets_path = os.path.join(
        singletask_seqlets_dir,
        "%s_seqlets_count_task%d" % (tf_name, task_index)
    )


def show_peaks_motif_table(results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`.
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        pfms, score_vals = read_motifs.import_dichipmunk_pfms(results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        pfms, score_vals = read_motifs.import_homer_pfms(results_path)
    elif mode == "meme":
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(results_path)
    else:
        score_name = "E-value"
        pfms, score_vals = read_motifs.import_meme_pfms(
            os.path.join(results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name, style={"text-align": "center"}),
            vdomh.th("PWM", style={"text-align": "center"})
        )
    )

    body = []
    for i, pfm in enumerate(pfms):
        pwm = pfm_to_pwm(pfm)
        if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
            # Flip to purine-rich version
            pwm = np.flip(pwm, axis=(0, 1))
        fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
        fig.tight_layout()

        body.append(
            vdomh.tr(
                vdomh.td(str(i + 1)),
                vdomh.td(str(score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            )
        )

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")


def show_seqlets_motif_table(profile_results_path, count_results_path, mode):
    """
    Shows a table of motifs from the given results path.
    `mode` is either `dichipmunk`, `homer`, `meme`, or `memechip`
    """
    assert mode in ("dichipmunk", "homer", "meme", "memechip")
    if mode == "dichipmunk":
        score_name = "Supporting sequences"
        p_pfms, p_score_vals = read_motifs.import_dichipmunk_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_dichipmunk_pfms(count_results_path)
    elif mode == "homer":
        score_name = "Log enrichment"
        p_pfms, p_score_vals = read_motifs.import_homer_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_homer_pfms(count_results_path)
    elif mode == "meme":
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(profile_results_path)
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(count_results_path)
    else:
        score_name = "E-value"
        p_pfms, p_score_vals = read_motifs.import_meme_pfms(
            os.path.join(profile_results_path, "meme_out")
        )
        c_pfms, c_score_vals = read_motifs.import_meme_pfms(
            os.path.join(count_results_path, "meme_out")
        )
        
    colgroup = vdomh.colgroup(
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"}),
        vdomh.col(style={"width": "5%"}),
        vdomh.col(style={"width": "40%"})
    )
    header = vdomh.thead(
        vdomh.tr(
            vdomh.th("Motif", style={"text-align": "center"}),
            vdomh.th(score_name + " (profile)", style={"text-align": "center"}),
            vdomh.th("PWM (profile)", style={"text-align": "center"}),
            vdomh.th(score_name + " (count)", style={"text-align": "center"}),
            vdomh.th("PWM (count)", style={"text-align": "center"})
        )
    )

    body = []
    for i in range(max(len(p_pfms), len(c_pfms))):
        rows = [vdomh.td(str(i + 1))]
        if i < len(p_pfms):
            pwm = pfm_to_pwm(p_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(p_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            
        if i < len(c_pfms):
            pwm = pfm_to_pwm(c_pfms[i])
            if np.sum(pwm[:, [0, 2]]) < 0.5 * np.sum(pwm):
                # Flip to purine-rich version
                pwm = np.flip(pwm, axis=(0, 1))
            fig = viz_sequence.plot_weights(pwm, figsize=(20, 4), return_fig=True)
            fig.tight_layout()
            rows.extend([
                vdomh.td(str(c_score_vals[i])),
                vdomh.td(figure_to_vdom_image(fig))
            ])
        else:
            rows.extend([vdomh.td(), vdomh.td()])
            

        body.append(vdomh.tr(*rows))

    display(vdomh.table(colgroup, header, vdomh.tbody(*body)))
    plt.close("all")


show_peaks_motif_table(os.path.join(peaks_path, "dichipmunk"), "dichipmunk")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "dichipmunk"),
    os.path.join(multitask_count_seqlets_path, "dichipmunk"),
    "dichipmunk"
)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "dichipmunk"),
        os.path.join(singletask_count_seqlets_path, "dichipmunk"),
        "dichipmunk"
    )


show_peaks_motif_table(os.path.join(peaks_path, "homer"), "homer")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "homer"),
    os.path.join(multitask_count_seqlets_path, "homer"),
    "homer"
)

/users/amtseng/tfmodisco/src/plot/viz_sequence.py:152: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig = plt.figure(figsize=figsize)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "homer"),
        os.path.join(singletask_count_seqlets_path, "homer"),
        "homer"
    )


show_peaks_motif_table(os.path.join(peaks_path, "memechip"), "memechip")


show_seqlets_motif_table(
    os.path.join(multitask_profile_seqlets_path, "meme"),
    os.path.join(multitask_count_seqlets_path, "meme"),
    "meme"
)


if task_index is not None:
    show_seqlets_motif_table(
        os.path.join(singletask_profile_seqlets_path, "meme"),
        os.path.join(singletask_count_seqlets_path, "meme"),
        "meme"
    )

Motif	Supporting sequences	PWM
1	1973
2	2000
3	2000
4	2000
5	1590
6	1037
7	309
8	204
9	113
10	69

Motif	Supporting sequences (profile)	Supporting sequences (count)
1	15249	12196
2	1973	4736
3	876	1886

Motif	Supporting sequences (profile)	Supporting sequences (count)
1	16525	10507
2	890	7336
3	413	621
4	207	57
5		71

Motif	Log enrichment	PWM
1	-39237.414075
2	-5805.914146
3	-5697.907448
4	-3693.369344
5	-3665.749334
6	-3640.358357
7	-3090.081214
8	-2375.924607
9	-1891.328485
10	-1606.540541
11	-1543.077613
12	-1296.749967
13	-1089.95991
14	-830.225688
15	-780.462404
16	-593.696972
17	-476.933091
18	-278.51172
19	-133.351758

Motif	Log enrichment (profile)	Log enrichment (count)
1	-40207.748201	-54024.383719
2	-1595.04051	-1653.178645
3	-1019.701547	-1416.189433
4	-1005.213916	-809.044276
5	-630.3491	-332.138962
6	-329.971283	-327.902643
7	-293.215544	-205.145716
8	-186.066875	-189.837288
9	-160.031236	-174.722936
10	-156.169914	-136.626896
11	-131.661023	-125.930835
12	-117.392488	-125.930835
13	-116.642653	-101.683795
14	-94.262702	-98.04754
15	-89.744826	-67.209118
16	-76.433149	-51.076628
17	-55.168214

Link to results¶

Helper functions¶

Show benchmark motifs¶

Motif	Log enrichment (profile)	Log enrichment (count)
1	-36274.194071	-43764.365037
2	-1334.826157	-2919.228164
3	-902.532045	-1533.12943
4	-494.686175	-891.873076
5	-480.662227	-529.744394
6	-403.301165	-379.140818
7	-269.952124	-304.156683
8	-210.010832	-175.279059
9	-186.444471	-140.754701
10	-184.47794	-127.508451
11	-98.860839	-116.906274
12	-94.303612	-108.931775
13	-89.784146	-101.765257
14	-72.727783
15	-47.06605
16	-46.350615
17	-28.243803

Motif	E-value	PWM
1	0.0
2	5.4e-173
3	7.2e-34
4	7.6e-26
5	1.2e-23
6	2.7e-20
7	5.3e-17
8	0.0018
9	8.7e-06
10	0.044

Motif	E-value (profile)	E-value (count)
1	0.0	0.0
2	7.1e-24	3.5e-43
3	0.0013	1.7e-31
4	13.0	6.3e-11
5	750.0	1.6e-10
6	7200.0	1.4
7	15000.0	5.8
8	29000.0	350.0
9	470000.0	1000.0
10	1000000.0	17000.0

Motif	E-value (profile)	E-value (count)
1	0.0	0.0
2	5.9e-16	7.3e-18
3	5.1e-10	0.00079
4	0.86	23.0
5	2.5	430.0
6	1.6	3600.0
7	17.0	5400.0
8	400.0	74000.0
9	7800.0	130000.0
10	53000.0	150000.0