%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np 
import glob
import os
from collections import OrderedDict
import pickle
import h5py

from matlas.performance_metrics.performance_metrics import plot_performances
root = "/mnt/lab_data/kundaje/users/msharmin/NSC_ATAC_PEAKS_to_share"
cdf = plot_performances(
    root=root,
    foldcounts=10,
    model_class="clb_basset_classification",
    metric_name="auprc",
    plot=False
)
df = cdf.reset_index()
df.rename(columns={'index':'celltype'}, inplace=True)
df = df[['celltype', 'fold', 'auprc', 'Imbalance ratio', 'num_positives', 'num_negatives',
         'recall_at_fdr_50', 'recall_at_fdr_20', 'recall_at_fdr_10'
        ]]

from matplotlib import pyplot as plt
import seaborn as sns

fig, axes = plt.subplots(1, 2, figsize=(16,6))
ax_cell = axes[0]
ax_fold = axes[1]

sns.boxplot(x="celltype", y='auprc', data=df, ax=ax_cell)
ax_cell.set_xticklabels(ax_cell.get_xticklabels(), rotation=90)
ax_cell.set_title("10-fold Model performances for each celltype")

sns.boxplot(x="fold", y='auprc', data=df, ax=ax_fold)
ax_fold.set_title("Model performances of celltypes across fold")
fig.show()

from matplotlib import pyplot as plt
plt.scatter(df['Imbalance ratio'].values, df['auprc'].values)
plt.xlabel('Imbalance ratio')
plt.ylabel('auprc')
plt.title('Dependence of Model performance on Imbalance ratio')
plt.show()

from vdom.helpers import (h1, p, li, img, div, b, br, ul, img, a, 
                          details, summary,
                          table, thead, th, tr, tbody, td, ol)

from IPython.display import display
from IPython.display import HTML

from matlas.reports import prepare_sorted_table
df_coltypes = {'celltype': 'string'}
keep = df.columns.values[1:]
for colname in keep:
    df_coltypes[colname] = 'number'
    df[colname] = df[colname].astype(str)
html_str = prepare_sorted_table((df, df_coltypes))
metadata = HTML(html_str)
item = HTML("<details>" + summary(b("Click here for Metadata of Model and Performances")).to_html() + html_str + "</details>")

instructions = summary(b("Following link contains a sortable table of deep learning model information for each sample. "))
display(instructions, item)

df = pd.read_csv("{}/gw_peaks/peak_counts.txt".format(root), index_col=0, sep="\t")
keep = df.columns.values
tab = table(thead([th(colname) for colname in keep]),
   tbody([
           tr([td(str(row[colname])) for colname in keep]) for i, row in df.iterrows()
         ]
        )
  )
display(summary(b("Number of peaks per celltype")))
display(tab)

from vdom.helpers import (b, summary, p, a, details)
from IPython.display import display
import numpy as np
import pandas as pd
from matlas.genome_data import *

root = "/mnt/lab_data/kundaje/users/msharmin/NSC_ATAC_PEAKS_to_share"
df = pd.read_csv("{}/gw_peaks/peak_counts.txt".format(root), index_col=0, sep="\t")

items_with_report = []
for celltype in df['celltype'].values:
    mitra_report_dest = "{0}/report/nsc_reports/{1}.html".format(MITRA_HTTP_PREFIX, celltype)
    items_with_report.append(p(a(celltype, href=mitra_report_dest)))
    
explanations = [' (differential regions w.r.t. aNSC_Young)', ' (differential regions w.r.t. aNSC_Old)', 
                ' (differential regions w.r.t. qNSC_Young)', ' (differential regions w.r.t. qNSC_Old)', 
                ' (differential regions w.r.t. qNSC_Young)', ' (differential regions w.r.t. aNSC_Young)']
for i, celltype in enumerate(['aNSC_Old', 'aNSC_Young', 
                              'qNSC_Old', 'qNSC_Young', 
                              'aNSC_Young_Q_A', 'qNSC_Young_Q_A']):
    mitra_report_dest = "{0}/report/nsc_reports/{1}.html".format(MITRA_HTTP_PREFIX, celltype)
    items_with_report.append(p(a(celltype+explanations[i], href=mitra_report_dest)))

display(
    details(
        summary(b("Motif Reports")), 
        summary(items_with_report),
        attributes={"open":"true"}
    )
)

celltype	fold	auprc	Imbalance ratio	num_positives	num_negatives	recall_at_fdr_50	recall_at_fdr_20	recall_at_fdr_10
O_Ast	0	0.3947056545307368	0.004738177373241835	18482.0	3882174.0	0.3513147927713451	0.14846878043501785	0.09842008440644953
O_Endo	0	0.20746558015134864	0.001472118822079683	5751.0	3900863.0	0.20431229351417146	0.11284993914101896	0.07233524604416622
O_NPC	0	0.4467214718238568	0.004407262307078148	17195.0	3884320.0	0.4207036929339925	0.1656295434719395	0.10660075603373073
O_aNSC	0	0.44758316826618216	0.00590076425746579	23006.0	3875811.0	0.444014604885682	0.15961053638181344	0.09441015387290272
O_qNSC	0	0.3528951441371785	0.0047252536247581545	18432.0	3882311.0	0.2960611979166667	0.1321072048611111	0.08349609375
Y_Ast	0	0.3524800266722345	0.0050516174740169026	19701.0	3880238.0	0.3226232170955789	0.13684584538855896	0.08908177249885793
Y_Endo	0	0.2116507077950218	0.0018433561986308472	7200.0	3898719.0	0.1897222222222222	0.08166666666666668	0.035277777777777776
Y_NPC	0	0.4541262477647871	0.0058161037106418505	22677.0	3876325.0	0.4374917317105437	0.15456189090267672	0.09851391277505844
Y_aNSC	0	0.4752009472057148	0.004723826496854671	18427.0	3882436.0	0.4492321050632224	0.1782167471644869	0.11526564280675095
Y_qNSC	0	0.32230747741148685	0.004635118669650958	18081.0	3882790.0	0.2681267629002821	0.12195121951219512	0.070681931309109
O_Ast	1	0.4589767967027184	0.005921987465521124	28765.0	4828557.0	0.4340691812967147	0.18901442725534506	0.12018077524769685
O_Endo	1	0.1902003168251522	0.001686720746932301	8209.0	4858631.0	0.194055305152881	0.054574247776830315	0.00048727006943598484
O_NPC	1	0.4737479455052102	0.006067970159602809	29474.0	4827834.0	0.4620682635543191	0.18735156409038475	0.11756124041528128
O_aNSC	1	0.4937825693103644	0.007618694840379473	36981.0	4817000.0	0.491441551066764	0.20369919688488686	0.11811470755252695
O_qNSC	1	0.42595917091386665	0.00616703498256359	29952.0	4826839.0	0.3873197115384616	0.16519764957264954	0.11017628205128203
Y_Ast	1	0.4440966331135148	0.005985655592146915	29072.0	4827873.0	0.4310676940011007	0.19221243808475508	0.12878370941111722
Y_Endo	1	0.24170424939479335	0.0024585665453244893	11961.0	4853069.0	0.2236435080678873	0.10801772427054594	0.057938299473288185
Y_NPC	1	0.49348779512132207	0.008088497263824143	39253.0	4813688.0	0.4911726492242631	0.19835426591598093	0.11395307365042162
Y_aNSC	1	0.5182877710466166	0.006431211793252386	31233.0	4825239.0	0.5059392309416323	0.22489034034514774	0.14350206512342714
Y_qNSC	1	0.3869937599921725	0.006027459984385758	29276.0	4827828.0	0.35059434348954777	0.15534909140592978	0.10206312337751057
O_Ast	2	0.4154704793961865	0.005452944586944694	17409.0	3175178.0	0.41461313113906606	0.15463266126716066	0.07082543511976562
O_Endo	2	0.1711397159276582	0.001939496587743145	6202.0	3191535.0	0.17929700096742987	0.04966139954853273	0.0008061915511125442
O_NPC	2	0.4787647567463306	0.0043575808288982745	13920.0	3180513.0	0.5005028735632184	0.16379310344827586	0.003232758620689655
O_aNSC	2	0.4838998863954862	0.005595161280218507	17863.0	3174717.0	0.5030509992722387	0.18921793651682248	0.0015115042266136704
O_qNSC	2	0.4158461947424184	0.004953744384409813	15819.0	3177523.0	0.4104557810228207	0.0007585814526834819	0.0694102029205386
Y_Ast	2	0.3835057608848144	0.005375299993798093	17161.0	3175405.0	0.3745702464891323	0.0006992599498863702	0.0015733348872443331
Y_Endo	2	0.25665905251496673	0.0018646705133769704	5963.0	3191921.0	0.24517860137514674	0.101291296327352	0.030186147912124768
Y_NPC	2	0.4798619997092476	0.005770342799899009	18421.0	3173937.0	0.4996471418489768	0.000651430432658379	0.059605884588241675
Y_aNSC	2	0.5017257262537616	0.004949124689133596	15805.0	3177689.0	0.5227459664663081	0.2022144890857324	0.08769376779500157
Y_qNSC	2	0.3780638479720128	0.005304769782638178	16937.0	3175850.0	0.3690145834563382	0.12918462537639488	0.06429710102143238
O_Ast	3	0.408679137144636	0.004961845160081514	26747.0	5363788.0	0.37297640856918535	0.15627920888323926	0.09589860545107863
O_Endo	3	0.2029852200401112	0.001469833500980043	7936.0	5391315.0	0.19909274193548387	0.09627016129032258	0.04649697580645161
O_NPC	3	0.4573198122875909	0.004841462102635064	26101.0	5365039.0	0.4459599249070917	0.17041492663116356	0.09137580935596336
O_aNSC	3	0.4589631712082473	0.006178743304118711	33290.0	5354537.0	0.4477020126164013	0.16948032442174826	0.09543406428356864
O_qNSC	3	0.3806734322292474	0.005107785365412025	27532.0	5362671.0	0.3228969925904402	0.14194391980241175	0.09251053319773354
Y_Ast	3	0.35021868668248696	0.005679872751253085	30606.0	5357895.0	0.3131738874730445	0.1370973011827746	0.08468927661242892
Y_Endo	3	0.2112790812502857	0.0020849756329820394	11254.0	5386411.0	0.19157632841656302	0.0698418340145726	0.026390616669628573
Y_NPC	3	0.4565318038653352	0.006329572081503193	34101.0	5353467.0	0.4442684965250286	0.16152019002375298	0.09501187648456057
Y_aNSC	3	0.4832899140308371	0.0053160333889865925	28653.0	5361268.0	0.4726904687118277	0.18734512965483546	0.11569469165532405
Y_qNSC	3	0.34148836162317	0.005432965408264713	29280.0	5360042.0	0.29689207650273225	0.12636612021857924	0.07653688524590165
O_Ast	4	0.44560688488167466	0.00352474678795836	24054.0	6800266.0	0.4109919348133367	0.20387461544857405	0.0018707907208780246
O_Endo	4	0.19809002808462065	0.0012761608958128368	8718.0	6822709.0	0.21461344345033265	0.03808212892865336	0.00034411562284927734
O_NPC	4	0.4788358943081775	0.003712319763893357	25333.0	6798702.0	0.4887695890735405	0.20037105751391465	0.11226463506098763
O_aNSC	4	0.4846445200561863	0.004641892704152092	31663.0	6789477.0	0.492688627104191	0.2052869279600796	0.11767678362757793
O_qNSC	4	0.4037007179607099	0.0037124622990829274	25333.0	6798440.0	0.35495203884261634	0.0004736904432953065	0.0017763391623573997
Y_Ast	4	0.39361727219373344	0.0038379695914153296	26187.0	6796952.0	0.3584221178447321	0.1813113376866384	0.1079161415969756
Y_Endo	4	0.24305818406661936	0.0016009444913674177	10935.0	6819408.0	0.2385916780978509	0.10242341106538637	nan
Y_NPC	4	0.4793270842930175	0.004911874799315584	33501.0	6786909.0	0.4806423688845109	0.1898450792513656	0.10235515357750516
Y_aNSC	4	0.5042181814593383	0.004057640330819703	27685.0	6795246.0	0.5008488351092649	0.22105833483836013	0.0013003431461080007
Y_qNSC	4	0.3785334217743249	0.003735242691854348	25488.0	6798165.0	0.34247489014438165	0.16635279347143753	0.0017655367231638418
O_Ast	5	0.4588828644273277	0.005372230508782396	27924.0	5169917.0	0.4381177481736141	0.1791648760922504	0.10281478298238077
O_Endo	5	0.22090275526506373	0.0015054216496110467	7839.0	5199340.0	0.2384232682740145	0.0020410766679423396	0.001148105625717566
O_NPC	5	0.4637102301154671	0.005368168234025588	27905.0	5170330.0	0.4526787314101415	0.1636982619602222	0.09571761333094428
O_aNSC	5	0.4505363571033397	0.00774240439181115	40203.0	5152370.0	0.4459119966171679	0.1599880605924931	0.08529214237743453
O_qNSC	5	0.42260797640226666	0.00561291979770246	29172.0	5168123.0	0.3825928972987797	0.15357191827780062	0.092280268750857
Y_Ast	5	0.4166721876006429	0.006011887131717064	31238.0	5164801.0	0.3965042576349318	0.1754273641078174	0.1022792752416928
Y_Endo	5	0.2652795097351448	0.00229982970770588	11971.0	5193197.0	0.2666443906106424	0.10191295631108513	0.003007267563277922
Y_NPC	5	0.4657555701198598	0.00743323203876279	38604.0	5154830.0	0.4603927054191276	0.15594238938970054	0.0870635167340172
Y_aNSC	5	0.4931597735759863	0.006251635848562764	32484.0	5163596.0	0.4949513606698682	0.17534786356360055	0.09669375692648688
Y_qNSC	5	0.3786649281361537	0.005733710167281639	29798.0	5167186.0	0.33915027854218405	0.1488690516142023	0.08789180481911538
O_Ast	6	0.4587931950081393	0.005328464873157407	38380.0	7164445.0	0.4423658155289213	0.19666492965085985	0.11912454403335072
O_Endo	6	0.22439000953230687	0.0015558434283216445	11226.0	7204153.0	0.22715125601282735	0.07803313735970069	0.03340459647247461
O_NPC	6	0.4852957465487924	0.004860783204988448	35021.0	7169785.0	0.4849661631592473	0.18380400331229835	0.10448016904143227
O_aNSC	6	0.4970218046591785	0.006140312259612413	44214.0	7156397.0	0.4940516578459312	0.19604650110824626	0.1048536662595558
O_qNSC	6	0.4342289822265458	0.00525662124089247	37864.0	7165242.0	0.40714134798225227	0.17282907246989224	0.10109866892034652
Y_Ast	6	0.4177562721947519	0.005410289797481382	38966.0	7163236.0	0.40550736539547294	0.17684648154801622	0.09885541241081967
Y_Endo	6	0.260963088915025	0.002175871710770295	15695.0	7197506.0	0.2436444727620261	0.0991398534565148	0.04071360305829882
Y_NPC	6	0.4901187827959517	0.006369479830443502	45860.0	7154100.0	0.4848451809856084	0.1825119930222416	0.10401221107719144
Y_aNSC	6	0.516397042113368	0.0053229068663443934	38342.0	7164865.0	0.5193521464712326	0.2076052370768348	0.1171300401648323
Y_qNSC	6	0.3968871899757665	0.005339212200676742	38457.0	7164291.0	0.3712978131419508	0.16795381855058894	0.0985256260238708
O_Ast	7	0.4463703593038915	0.004136809680292137	32047.0	7714744.0	0.4196648672262615	0.18610166318220114	0.09436140668393296
O_Endo	7	0.2135610816621017	0.0010545436639856002	8181.0	7749677.0	0.19471947194719472	0.048038137147048035	0.0187018701870187
O_NPC	7	0.4531975411175309	0.0046078498964169795	35690.0	7709788.0	0.4253572429251891	0.00011207621182404035	0.08498178761557859
O_aNSC	7	0.4963817829691845	0.005537042347027081	42869.0	7699351.0	0.5009680655018778	0.184468963586741	0.09039165830786816
O_qNSC	7	0.4128454390100082	0.004226685200637811	32742.0	7713754.0	0.3628672652861768	0.16113859874167735	0.08163826278174821
Y_Ast	7	0.42245146702727204	0.004231091025065229	32775.0	7713454.0	0.3962166285278413	0.1820289855072464	0.09617086193745232
Y_Endo	7	0.2486238815948653	0.001674672118591749	12988.0	7742560.0	0.22936556821681556	0.07668617185093933	0.03464736680012319
Y_NPC	7	0.4680996751542072	0.0061657918961257096	47724.0	7692401.0	0.4355670103092784	0.16670857430223787	0.08467437767161176
Y_aNSC	7	0.5010412915237656	0.00501252517916772	38817.0	7705184.0	0.4919236417033774	0.1937553133936162	0.10163072880438984
Y_qNSC	7	0.3900158290288029	0.003906337223837835	30265.0	7717402.0	0.3577730051214274	0.16018503221543035	0.07285643482570626
O_Ast	8	0.37839417097871697	0.0050815305993711524	24811.0	4857773.0	0.3268308411591633	0.11253073233646367	0.03845068719519568
O_Endo	8	0.16928979497564067	0.0014150726397821267	6921.0	4883994.0	0.14708857101574918	0.036555411067764776	0.011703511053315995
O_NPC	8	0.43591008564618666	0.005602108923204115	27348.0	4854385.0	0.409755740821998	0.14147286821705427	0.04186777826532104
O_aNSC	8	0.4514840727994896	0.007056043511619217	34423.0	4844090.0	0.4498445806582808	0.14269529093919764	0.05385933823315806
O_qNSC	8	0.3373706107296103	0.005455539493718233	26633.0	4855194.0	0.26410843690158825	0.08966319978973454	0.03818570945819097
Y_Ast	8	0.3454867772518461	0.005417604356534782	26447.0	4855231.0	0.3059326199568949	0.107687072257723	0.03743335728059893
Y_Endo	8	0.18290228560987612	0.0021707365163984387	10613.0	4878511.0	0.1521718646942429	0.029021011966456237	0.011872232168095732
Y_NPC	8	0.45924197123865	0.007205669257895191	35152.0	4843229.0	0.4512403277196176	0.1477014110150205	0.03538916704597178
Y_aNSC	8	0.4590111140990121	0.005992277467012458	29247.0	4851535.0	0.43197592915512706	0.1498957157999111	0.054159400964201465
Y_qNSC	8	0.3100267541135864	0.005461305256257936	26661.0	4855139.0	0.2455271745245865	0.09069427253291323	0.03375717339934736
O_Ast	9	0.4399255841485141	0.005721437580102921	29682.0	5158175.0	0.4228825550838892	0.1766053500437976	0.10316016440940637
O_Endo	9	0.2220144799316899	0.001640915888486981	8529.0	5189178.0	0.2275765036932817	0.08816977371321373	0.0021104467112205423
O_NPC	9	0.4530553989953434	0.005280520675294613	27402.0	5161859.0	0.4242756003211444	0.16349171593314354	0.10181738559229253
O_aNSC	9	0.4724035580677559	0.0068009225512089206	35267.0	5150353.0	0.4745512802336462	0.1750928630164176	0.10012192701392236
O_qNSC	9	0.4144646349758439	0.005543944102611151	28764.0	5159600.0	0.3744958976498401	0.14990960923376442	0.09355444305381727
Y_Ast	9	0.4074023375709497	0.005715484162229536	29650.0	5158012.0	0.38239460370994943	0.16688026981450252	0.1011804384485666
Y_Endo	9	0.2238704309146328	0.002576915281504127	13388.0	5181971.0	0.2061547654616074	0.089632506722438	0.03361219002091425
Y_NPC	9	0.4781505700917706	0.006766066080600801	35088.0	5150791.0	0.4724407204742362	0.15857273141814865	0.09003077975376197
Y_aNSC	9	0.4961077320035375	0.00576880857304822	29929.0	5158144.0	0.4884226001536971	0.19562965685455574	0.1129339436666778
Y_qNSC	9	0.3630583261710111	0.006011140495396168	31181.0	5156021.0	0.3291106763734325	0.13665373143901735	0.08312754562073058

celltype	narrow peaks	idr peaks
O_Astrocyte	68576	38414
O_Endo	20237	7288
O_NPC	69025	33119
O_aNSC	88383	47629
O_qNSC	69525	40389
Y_Astrocyte	71458	38000
Y_Endo	28280	13106
Y_NPC	91406	49465
Y_aNSC	74840	40636
Y_qNSC	70705	35342