In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np 
import glob
import os
from collections import OrderedDict
import pickle
import h5py
In [2]:
from matlas.performance_metrics.performance_metrics import plot_performances
root = "/mnt/lab_data/kundaje/users/msharmin/NSC_ATAC_PEAKS_to_share"
cdf = plot_performances(
    root=root,
    foldcounts=10,
    model_class="clb_basset_classification",
    metric_name="auprc",
    plot=False
)
df = cdf.reset_index()
df.rename(columns={'index':'celltype'}, inplace=True)
df = df[['celltype', 'fold', 'auprc', 'Imbalance ratio', 'num_positives', 'num_negatives',
         'recall_at_fdr_50', 'recall_at_fdr_20', 'recall_at_fdr_10'
        ]]
In [4]:
from matplotlib import pyplot as plt
import seaborn as sns

fig, axes = plt.subplots(1, 2, figsize=(16,6))
ax_cell = axes[0]
ax_fold = axes[1]

sns.boxplot(x="celltype", y='auprc', data=df, ax=ax_cell)
ax_cell.set_xticklabels(ax_cell.get_xticklabels(), rotation=90)
ax_cell.set_title("10-fold Model performances for each celltype")

sns.boxplot(x="fold", y='auprc', data=df, ax=ax_fold)
ax_fold.set_title("Model performances of celltypes across fold")
fig.show()
In [5]:
from matplotlib import pyplot as plt
plt.scatter(df['Imbalance ratio'].values, df['auprc'].values)
plt.xlabel('Imbalance ratio')
plt.ylabel('auprc')
plt.title('Dependence of Model performance on Imbalance ratio')
plt.show()
In [8]:
from vdom.helpers import (h1, p, li, img, div, b, br, ul, img, a, 
                          details, summary,
                          table, thead, th, tr, tbody, td, ol)

from IPython.display import display
from IPython.display import HTML

from matlas.reports import prepare_sorted_table
df_coltypes = {'celltype': 'string'}
keep = df.columns.values[1:]
for colname in keep:
    df_coltypes[colname] = 'number'
    df[colname] = df[colname].astype(str)
html_str = prepare_sorted_table((df, df_coltypes))
metadata = HTML(html_str)
item = HTML("<details>" + summary(b("Click here for Metadata of Model and Performances")).to_html() + html_str + "</details>")

instructions = summary(b("Following link contains a sortable table of deep learning model information for each sample. "))
display(instructions, item)
Following link contains a sortable table of deep learning model information for each sample.
Click here for Metadata of Model and Performances
celltypefoldauprcImbalance rationum_positivesnum_negativesrecall_at_fdr_50recall_at_fdr_20recall_at_fdr_10
O_Ast00.39470565453073680.00473817737324183518482.03882174.00.35131479277134510.148468780435017850.09842008440644953
O_Endo00.207465580151348640.0014721188220796835751.03900863.00.204312293514171460.112849939141018960.07233524604416622
O_NPC00.44672147182385680.00440726230707814817195.03884320.00.42070369293399250.16562954347193950.10660075603373073
O_aNSC00.447583168266182160.0059007642574657923006.03875811.00.4440146048856820.159610536381813440.09441015387290272
O_qNSC00.35289514413717850.004725253624758154518432.03882311.00.29606119791666670.13210720486111110.08349609375
Y_Ast00.35248002667223450.005051617474016902619701.03880238.00.32262321709557890.136845845388558960.08908177249885793
Y_Endo00.21165070779502180.00184335619863084727200.03898719.00.18972222222222220.081666666666666680.035277777777777776
Y_NPC00.45412624776478710.005816103710641850522677.03876325.00.43749173171054370.154561890902676720.09851391277505844
Y_aNSC00.47520094720571480.00472382649685467118427.03882436.00.44923210506322240.17821674716448690.11526564280675095
Y_qNSC00.322307477411486850.00463511866965095818081.03882790.00.26812676290028210.121951219512195120.070681931309109
O_Ast10.45897679670271840.00592198746552112428765.04828557.00.43406918129671470.189014427255345060.12018077524769685
O_Endo10.19020031682515220.0016867207469323018209.04858631.00.1940553051528810.0545742477768303150.00048727006943598484
O_NPC10.47374794550521020.00606797015960280929474.04827834.00.46206826355431910.187351564090384750.11756124041528128
O_aNSC10.49378256931036440.00761869484037947336981.04817000.00.4914415510667640.203699196884886860.11811470755252695
O_qNSC10.425959170913866650.0061670349825635929952.04826839.00.38731971153846160.165197649572649540.11017628205128203
Y_Ast10.44409663311351480.00598565559214691529072.04827873.00.43106769400110070.192212438084755080.12878370941111722
Y_Endo10.241704249394793350.002458566545324489311961.04853069.00.22364350806788730.108017724270545940.057938299473288185
Y_NPC10.493487795121322070.00808849726382414339253.04813688.00.49117264922426310.198354265915980930.11395307365042162
Y_aNSC10.51828777104661660.00643121179325238631233.04825239.00.50593923094163230.224890340345147740.14350206512342714
Y_qNSC10.38699375999217250.00602745998438575829276.04827828.00.350594343489547770.155349091405929780.10206312337751057
O_Ast20.41547047939618650.00545294458694469417409.03175178.00.414613131139066060.154632661267160660.07082543511976562
O_Endo20.17113971592765820.0019394965877431456202.03191535.00.179297000967429870.049661399548532730.0008061915511125442
O_NPC20.47876475674633060.004357580828898274513920.03180513.00.50050287356321840.163793103448275860.003232758620689655
O_aNSC20.48389988639548620.00559516128021850717863.03174717.00.50305099927223870.189217936516822480.0015115042266136704
O_qNSC20.41584619474241840.00495374438440981315819.03177523.00.41045578102282070.00075858145268348190.0694102029205386
Y_Ast20.38350576088481440.00537529999379809317161.03175405.00.37457024648913230.00069925994988637020.0015733348872443331
Y_Endo20.256659052514966730.00186467051337697045963.03191921.00.245178601375146740.1012912963273520.030186147912124768
Y_NPC20.47986199970924760.00577034279989900918421.03173937.00.49964714184897680.0006514304326583790.059605884588241675
Y_aNSC20.50172572625376160.00494912468913359615805.03177689.00.52274596646630810.20221448908573240.08769376779500157
Y_qNSC20.37806384797201280.00530476978263817816937.03175850.00.36901458345633820.129184625376394880.06429710102143238
O_Ast30.4086791371446360.00496184516008151426747.05363788.00.372976408569185350.156279208883239260.09589860545107863
O_Endo30.20298522004011120.0014698335009800437936.05391315.00.199092741935483870.096270161290322580.04649697580645161
O_NPC30.45731981228759090.00484146210263506426101.05365039.00.44595992490709170.170414926631163560.09137580935596336
O_aNSC30.45896317120824730.00617874330411871133290.05354537.00.44770201261640130.169480324421748260.09543406428356864
O_qNSC30.38067343222924740.00510778536541202527532.05362671.00.32289699259044020.141943919802411750.09251053319773354
Y_Ast30.350218686682486960.00567987275125308530606.05357895.00.31317388747304450.13709730118277460.08468927661242892
Y_Endo30.21127908125028570.002084975632982039411254.05386411.00.191576328416563020.06984183401457260.026390616669628573
Y_NPC30.45653180386533520.00632957208150319334101.05353467.00.44426849652502860.161520190023752980.09501187648456057
Y_aNSC30.48328991403083710.005316033388986592528653.05361268.00.47269046871182770.187345129654835460.11569469165532405
Y_qNSC30.341488361623170.00543296540826471329280.05360042.00.296892076502732250.126366120218579240.07653688524590165
O_Ast40.445606884881674660.0035247467879583624054.06800266.00.41099193481333670.203874615448574050.0018707907208780246
O_Endo40.198090028084620650.00127616089581283688718.06822709.00.214613443450332650.038082128928653360.00034411562284927734
O_NPC40.47883589430817750.00371231976389335725333.06798702.00.48876958907354050.200371057513914650.11226463506098763
O_aNSC40.48464452005618630.00464189270415209231663.06789477.00.4926886271041910.20528692796007960.11767678362757793
O_qNSC40.40370071796070990.003712462299082927425333.06798440.00.354952038842616340.00047369044329530650.0017763391623573997
Y_Ast40.393617272193733440.003837969591415329626187.06796952.00.35842211784473210.18131133768663840.1079161415969756
Y_Endo40.243058184066619360.001600944491367417710935.06819408.00.23859167809785090.10242341106538637nan
Y_NPC40.47932708429301750.00491187479931558433501.06786909.00.48064236888451090.18984507925136560.10235515357750516
Y_aNSC40.50421818145933830.00405764033081970327685.06795246.00.50084883510926490.221058334838360130.0013003431461080007
Y_qNSC40.37853342177432490.00373524269185434825488.06798165.00.342474890144381650.166352793471437530.0017655367231638418
O_Ast50.45888286442732770.00537223050878239627924.05169917.00.43811774817361410.17916487609225040.10281478298238077
O_Endo50.220902755265063730.00150542164961104677839.05199340.00.23842326827401450.00204107666794233960.001148105625717566
O_NPC50.46371023011546710.00536816823402558827905.05170330.00.45267873141014150.16369826196022220.09571761333094428
O_aNSC50.45053635710333970.0077424043918111540203.05152370.00.44591199661716790.15998806059249310.08529214237743453
O_qNSC50.422607976402266660.0056129197977024629172.05168123.00.38259289729877970.153571918277800620.092280268750857
Y_Ast50.41667218760064290.00601188713171706431238.05164801.00.39650425763493180.17542736410781740.1022792752416928
Y_Endo50.26527950973514480.0022998297077058811971.05193197.00.26664439061064240.101912956311085130.003007267563277922
Y_NPC50.46575557011985980.0074332320387627938604.05154830.00.46039270541912760.155942389389700540.0870635167340172
Y_aNSC50.49315977357598630.00625163584856276432484.05163596.00.49495136066986820.175347863563600550.09669375692648688
Y_qNSC50.37866492813615370.00573371016728163929798.05167186.00.339150278542184050.14886905161420230.08789180481911538
O_Ast60.45879319500813930.00532846487315740738380.07164445.00.44236581552892130.196664929650859850.11912454403335072
O_Endo60.224390009532306870.001555843428321644511226.07204153.00.227151256012827350.078033137359700690.03340459647247461
O_NPC60.48529574654879240.00486078320498844835021.07169785.00.48496616315924730.183804003312298350.10448016904143227
O_aNSC60.49702180465917850.00614031225961241344214.07156397.00.49405165784593120.196046501108246260.1048536662595558
O_qNSC60.43422898222654580.0052566212408924737864.07165242.00.407141347982252270.172829072469892240.10109866892034652
Y_Ast60.41775627219475190.00541028979748138238966.07163236.00.405507365395472940.176846481548016220.09885541241081967
Y_Endo60.2609630889150250.00217587171077029515695.07197506.00.24364447276202610.09913985345651480.04071360305829882
Y_NPC60.49011878279595170.00636947983044350245860.07154100.00.48484518098560840.18251199302224160.10401221107719144
Y_aNSC60.5163970421133680.005322906866344393438342.07164865.00.51935214647123260.20760523707683480.1171300401648323
Y_qNSC60.39688718997576650.00533921220067674238457.07164291.00.37129781314195080.167953818550588940.0985256260238708
O_Ast70.44637035930389150.00413680968029213732047.07714744.00.41966486722626150.186101663182201140.09436140668393296
O_Endo70.21356108166210170.00105454366398560028181.07749677.00.194719471947194720.0480381371470480350.0187018701870187
O_NPC70.45319754111753090.004607849896416979535690.07709788.00.42535724292518910.000112076211824040350.08498178761557859
O_aNSC70.49638178296918450.00553704234702708142869.07699351.00.50096806550187780.1844689635867410.09039165830786816
O_qNSC70.41284543901000820.00422668520063781132742.07713754.00.36286726528617680.161138598741677350.08163826278174821
Y_Ast70.422451467027272040.00423109102506522932775.07713454.00.39621662852784130.18202898550724640.09617086193745232
Y_Endo70.24862388159486530.00167467211859174912988.07742560.00.229365568216815560.076686171850939330.03464736680012319
Y_NPC70.46809967515420720.006165791896125709647724.07692401.00.43556701030927840.166708574302237870.08467437767161176
Y_aNSC70.50104129152376560.0050125251791677238817.07705184.00.49192364170337740.19375531339361620.10163072880438984
Y_qNSC70.39001582902880290.00390633722383783530265.07717402.00.35777300512142740.160185032215430350.07285643482570626
O_Ast80.378394170978716970.005081530599371152424811.04857773.00.32683084115916330.112530732336463670.03845068719519568
O_Endo80.169289794975640670.00141507263978212676921.04883994.00.147088571015749180.0365554110677647760.011703511053315995
O_NPC80.435910085646186660.00560210892320411527348.04854385.00.4097557408219980.141472868217054270.04186777826532104
O_aNSC80.45148407279948960.00705604351161921734423.04844090.00.44984458065828080.142695290939197640.05385933823315806
O_qNSC80.33737061072961030.00545553949371823326633.04855194.00.264108436901588250.089663199789734540.03818570945819097
Y_Ast80.34548677725184610.00541760435653478226447.04855231.00.30593261995689490.1076870722577230.03743335728059893
Y_Endo80.182902285609876120.002170736516398438710613.04878511.00.15217186469424290.0290210119664562370.011872232168095732
Y_NPC80.459241971238650.00720566925789519135152.04843229.00.45124032771961760.14770141101502050.03538916704597178
Y_aNSC80.45901111409901210.00599227746701245829247.04851535.00.431975929155127060.14989571579991110.054159400964201465
Y_qNSC80.31002675411358640.00546130525625793626661.04855139.00.24552717452458650.090694272532913230.03375717339934736
O_Ast90.43992558414851410.00572143758010292129682.05158175.00.42288255508388920.17660535004379760.10316016440940637
O_Endo90.22201447993168990.0016409158884869818529.05189178.00.22757650369328170.088169773713213730.0021104467112205423
O_NPC90.45305539899534340.00528052067529461327402.05161859.00.42427560032114440.163491715933143540.10181738559229253
O_aNSC90.47240355806775590.006800922551208920635267.05150353.00.47455128023364620.17509286301641760.10012192701392236
O_qNSC90.41446463497584390.00554394410261115128764.05159600.00.37449589764984010.149909609233764420.09355444305381727
Y_Ast90.40740233757094970.00571548416222953629650.05158012.00.382394603709949430.166880269814502520.1011804384485666
Y_Endo90.22387043091463280.00257691528150412713388.05181971.00.20615476546160740.0896325067224380.03361219002091425
Y_NPC90.47815057009177060.00676606608060080135088.05150791.00.47244072047423620.158572731418148650.09003077975376197
Y_aNSC90.49610773200353750.0057688085730482229929.05158144.00.48842260015369710.195629656854555740.1129339436666778
Y_qNSC90.36305832617101110.00601114049539616831181.05156021.00.32911067637343250.136653731439017350.08312754562073058
In [9]:
df = pd.read_csv("{}/gw_peaks/peak_counts.txt".format(root), index_col=0, sep="\t")
keep = df.columns.values
tab = table(thead([th(colname) for colname in keep]),
   tbody([
           tr([td(str(row[colname])) for colname in keep]) for i, row in df.iterrows()
         ]
        )
  )
display(summary(b("Number of peaks per celltype")))
display(tab)
Number of peaks per celltype
celltypenarrow peaksidr peaks
O_Astrocyte6857638414
O_Endo202377288
O_NPC6902533119
O_aNSC8838347629
O_qNSC6952540389
Y_Astrocyte7145838000
Y_Endo2828013106
Y_NPC9140649465
Y_aNSC7484040636
Y_qNSC7070535342
In [3]:
from vdom.helpers import (b, summary, p, a, details)
from IPython.display import display
import numpy as np
import pandas as pd
from matlas.genome_data import *

root = "/mnt/lab_data/kundaje/users/msharmin/NSC_ATAC_PEAKS_to_share"
df = pd.read_csv("{}/gw_peaks/peak_counts.txt".format(root), index_col=0, sep="\t")

items_with_report = []
for celltype in df['celltype'].values:
    mitra_report_dest = "{0}/report/nsc_reports/{1}.html".format(MITRA_HTTP_PREFIX, celltype)
    items_with_report.append(p(a(celltype, href=mitra_report_dest)))
    
explanations = [' (differential regions w.r.t. aNSC_Young)', ' (differential regions w.r.t. aNSC_Old)', 
                ' (differential regions w.r.t. qNSC_Young)', ' (differential regions w.r.t. qNSC_Old)', 
                ' (differential regions w.r.t. qNSC_Young)', ' (differential regions w.r.t. aNSC_Young)']
for i, celltype in enumerate(['aNSC_Old', 'aNSC_Young', 
                              'qNSC_Old', 'qNSC_Young', 
                              'aNSC_Young_Q_A', 'qNSC_Young_Q_A']):
    mitra_report_dest = "{0}/report/nsc_reports/{1}.html".format(MITRA_HTTP_PREFIX, celltype)
    items_with_report.append(p(a(celltype+explanations[i], href=mitra_report_dest)))

display(
    details(
        summary(b("Motif Reports")), 
        summary(items_with_report),
        attributes={"open":"true"}
    )
)
In [ ]: