In [ ]:
 
In [ ]:
 

Total counts

In [ ]:
for task in tasks:
    if task == 'Klf4':
        continue
    fig, ax= plt.subplots(figsize=get_figsize(frac=0.25, aspect=1))
    rep0 = 1+sum([counts[f'/{task}/rep0/pos'], counts[f'/{task}/rep0/neg']]).sum(1)
    rep1 = 1+sum([counts[f'/{task}/rep1/pos'], counts[f'/{task}/rep1/neg']]).sum(1)
    
    xrange = [10, 1e4]
    ax.set_ylim(xrange)
    ax.set_xlim(xrange)
    ax.plot(xrange, xrange, c='grey', alpha=0.2)
    
    regression_eval(rep0,  rep1, alpha=.1, task=task, ax=ax, loglog=True)
    ax.xaxis.set_major_locator(ticker.LogLocator(base=10.0, numticks=4))
    ax.yaxis.set_major_locator(ticker.LogLocator(base=10.0, numticks=4))
    
    plt.minorticks_off()
    # save the figure
    os.makedirs(f"{figures}/scatter-replicates", exist_ok=True)
    fig.savefig(f"{figures}/scatter-replicates/{task}.pdf")
    fig.savefig(f"{figures}/scatter-replicates/{task}.png")
In [ ]:
fig, axes = plt.subplots(1, len(tasks) -1 , figsize=get_figsize(frac=.75, aspect=1/(len(tasks) - 1)),
                         sharex=True, sharey=True)
for i, (task, ax) in enumerate(zip(tasks, axes)):
    if task == 'Klf4':
        continue
    rep0 = 1+sum([counts[f'/{task}/rep0/pos'], counts[f'/{task}/rep0/neg']]).sum(1)
    rep1 = 1+sum([counts[f'/{task}/rep1/pos'], counts[f'/{task}/rep1/neg']]).sum(1)
    
    xrange = [10, 1e4]
    ax.set_ylim(xrange)
    ax.set_xlim(xrange)
    ax.plot(xrange, xrange, c='grey', alpha=0.2)
    
    regression_eval(rep0,  rep1, alpha=.1, task=task, ax=ax, loglog=True)
    ax.xaxis.set_major_locator(ticker.LogLocator(base=10.0, numticks=4))
    ax.yaxis.set_major_locator(ticker.LogLocator(base=10.0, numticks=4))
    if i > 0:
        ax.set_ylabel("")

fig.subplots_adjust(wspace=0)
plt.minorticks_off()
# Save the figure
fig.savefig(f"{figures}/scatter-replicates/all.pdf")
fig.savefig(f"{figures}/scatter-replicates/all.png")    

Per-base counts log10(counts + 1)

In [ ]:
import holoviews.operation.datashader as hd
hd.shade.cmap=["lightblue", "darkblue"]
hv.extension("bokeh", "matplotlib")
In [ ]:
import datashader as dsh
import datashader.transfer_functions as tf
In [ ]:
task = 'Sox2'
rep0 = np.log10(1+sum([counts[f'/{task}/rep0/pos'], counts[f'/{task}/rep0/neg']]).ravel())
rep1 = np.log10(1+sum([counts[f'/{task}/rep1/pos'], counts[f'/{task}/rep1/neg']]).ravel())

df = pd.DataFrame({"rep0": rep0, "rep1": rep1})
fig = hd.spread(hd.shade(hv.Image(dsh.Canvas(plot_width=300, plot_height=300).points(df, 'rep0' ,'rep1'))), px=2)
hv.Store.renderers['matplotlib'].save(fig, f"{figures}/per-base-scatter-replicates/{task}", 'pdf')
fig
In [ ]:
ypc = y_pred[ds.task2idx(task, 'counts')]
ypp = softmax(y_pred[ds.task2idx(task, 'profile')])

y_pred_profile =  np.log10(1+(ypp * (np.exp(ypc) - 1 )[:, np.newaxis]).sum(axis=-1).ravel())
y_true_profile =  np.log10(1+y_true[f'profile/{task}'].sum(axis=-1).ravel())

df = pd.DataFrame({"Predicted": y_pred_profile, "Observed": y_true_profile})
a = hd.spread(hd.shade(hv.Image(dsh.Canvas(plot_width=600, plot_height=600).points(df, 'Predicted' ,'Observed'), label='Using predicted total counts')), px=2)

y_pred_profile = np.log10(1+(ypp * y_true[f'profile/{task}'].sum(axis=1)[:, np.newaxis]).sum(axis=-1).ravel())
y_true_profile = np.log10(1+y_true[f'profile/{task}'].sum(axis=-1).ravel())

df = pd.DataFrame({"Predicted": y_pred_profile, "Observed": y_true_profile})
b = hd.spread(hd.shade(hv.Image(dsh.Canvas(plot_width=600, plot_height=600).points(df, 'Predicted' ,'Observed'), label='Using observed total counts')), px=2)
fig = a + b
hv.Store.renderers['matplotlib'].save(fig, f"{figures}/per-base-scatter/{task}", 'pdf')
fig
In [ ]:
task = 'Nanog'
rep0 = np.log10(1+sum([counts[f'/{task}/rep0/pos'], counts[f'/{task}/rep0/neg']]).ravel())
rep1 = np.log10(1+sum([counts[f'/{task}/rep1/pos'], counts[f'/{task}/rep1/neg']]).ravel())

df = pd.DataFrame({"rep0": rep0, "rep1": rep1})
fig = hd.spread(hd.shade(hv.Image(dsh.Canvas(plot_width=300, plot_height=300).points(df, 'rep0' ,'rep1'))), px=2)
hv.Store.renderers['matplotlib'].save(fig, f"{figures}/per-base-scatter-replicates/{task}", 'pdf')
fig
In [ ]:
ypc = y_pred[ds.task2idx(task, 'counts')]
ypp = softmax(y_pred[ds.task2idx(task, 'profile')])

y_pred_profile =  np.log10(1+(ypp * (np.exp(ypc) - 1 )[:, np.newaxis]).sum(axis=-1).ravel())
y_true_profile =  np.log10(1+y_true[f'profile/{task}'].sum(axis=-1).ravel())

df = pd.DataFrame({"Predicted": y_pred_profile, "Observed": y_true_profile})
a = hd.spread(hd.shade(hv.Image(dsh.Canvas(plot_width=600, plot_height=600).points(df, 'Predicted' ,'Observed'), label='Predicted total counts')), px=2)

y_pred_profile = np.log10(1+(ypp * y_true[f'profile/{task}'].sum(axis=1)[:, np.newaxis]).sum(axis=-1).ravel())
y_true_profile = np.log10(1+y_true[f'profile/{task}'].sum(axis=-1).ravel())

df = pd.DataFrame({"Predicted": y_pred_profile, "Observed": y_true_profile})
b = hd.spread(hd.shade(hv.Image(dsh.Canvas(plot_width=600, plot_height=600).points(df, 'Predicted' ,'Observed'), label='Observed total counts')), px=2)
fig = a + b
hv.Store.renderers['matplotlib'].save(fig, f"{figures}/per-base-scatter/{task}", 'pdf')
fig
In [ ]:
task = 'Oct4'
rep0 = np.log10(1+sum([counts[f'/{task}/rep0/pos'], counts[f'/{task}/rep0/neg']]).ravel())
rep1 = np.log10(1+sum([counts[f'/{task}/rep1/pos'], counts[f'/{task}/rep1/neg']]).ravel())

df = pd.DataFrame({"rep0": rep0, "rep1": rep1})
fig = hd.spread(hd.shade(hv.Image(dsh.Canvas(plot_width=300, plot_height=300).points(df, 'rep0' ,'rep1'))), px=2)
hv.Store.renderers['matplotlib'].save(fig, f"{figures}/per-base-scatter-replicates/{task}", 'pdf')
fig
In [ ]:
ypc = y_pred[ds.task2idx(task, 'counts')]
ypp = softmax(y_pred[ds.task2idx(task, 'profile')])

y_pred_profile =  np.log10(1+(ypp * (np.exp(ypc) - 1 )[:, np.newaxis]).sum(axis=-1).ravel())
y_true_profile =  np.log10(1+y_true[f'profile/{task}'].sum(axis=-1).ravel())

df = pd.DataFrame({"Predicted": y_pred_profile, "Observed": y_true_profile})
a = hd.spread(hd.shade(hv.Image(dsh.Canvas(plot_width=600, plot_height=600).points(df, 'Predicted' ,'Observed'), label='Predicted total counts')), px=2)

y_pred_profile = np.log10(1+(ypp * y_true[f'profile/{task}'].sum(axis=1)[:, np.newaxis]).sum(axis=-1).ravel())
y_true_profile = np.log10(1+y_true[f'profile/{task}'].sum(axis=-1).ravel())

df = pd.DataFrame({"Predicted": y_pred_profile, "Observed": y_true_profile})
b = hd.spread(hd.shade(hv.Image(dsh.Canvas(plot_width=600, plot_height=600).points(df, 'Predicted' ,'Observed'), label='Observed total counts')), px=2)
fig = a + b
hv.Store.renderers['matplotlib'].save(fig, f"{figures}/per-base-scatter/{task}", 'pdf')
fig