make a nicer Nanog plot where the periodicity can be seen
from basepair.imports import *
import matplotlib.ticker as ticker
from basepair.plot.profiles import extract_signal
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_dir = model_dir / "modisco/by_peak_tasks/weighted/Nanog"
from basepair.modisco.table import ModiscoData
ls {model_dir}
md = ModiscoData.load(modisco_dir, model_dir/ "grad.all.h5")
ps, t0, freq = compute_power_spectrum('metacluster_0/pattern_0', "Oct4", md)
tasks = md.get_tasks()
l = []
for pattern in tqdm(md.mr.patterns()):
if md.mr.n_seqlets(*pattern.split("/")) < 300:
continue
for task in md.get_tasks():
l.append(dict(task=task, pattern=pattern, frac=periodicity_10bp_frac(pattern, task, md)))
df = pd.DataFrame(l)
sns.heatmap(df.pivot("pattern", "task", "frac"))
sns.clustermap(df.pivot("pattern", "task", "frac"))
plt.savefig("fft-frac.png", dpi=300, bbox_inches="tight")
plt.savefig("fft-frac.pdf", bbox_inches="tight")
import seaborn as sns
sns.heatmap(df, annot=True)
md.mr.n_seqlets()
power_spectrum
assert t0[9] == 10.88888888888889
ps[0] / ps.sum()
np.where((t0 < 11) & (t0> 10))
plt.plot(t0, ps)
power_spectrum(md, 'metacluster_0/pattern_5', "Oct4")
power_spectrum(md, 'metacluster_2/pattern_0', "Nanog")
power_spectrum(md, 'metacluster_0/pattern_1', "Sox2")
power_spectrum(md, 'metacluster_0/pattern_4', "Oct4")
power_spectrum(md, 'metacluster_0/pattern_0', "Klf4")
p= md.grad_profile['metacluster_0/pattern_2']['Nanog']
seqlets = md.seqlets_per_task['metacluster_0/pattern_2']
power_spectrum(md, 'metacluster_0/pattern_0', "Sox2")
power_spectrum(md, 'metacluster_0/pattern_1')
pattern= 'metacluster_0/pattern_2'
wide_seqlets = [s.resize(md.footprint_width)
for s in seqlets
if s.center() > md.footprint_width //2 and \
s.center() < md.get_seqlen(pattern) - md.footprint_width //2
]
p = extract_signal(md.get_region_grad("Nanog", 'profile'), wide_seqlets)
seq = extract_signal(md.get_region_seq(), wide_seqlets)
from basepair.plot.tracks import plot_tracks
from basepair.plot.heatmaps import heatmap_importance_profile, normalize
np.fft.rfft
fig = plt.figure(figsize=(11, 2))
plt.plot(np.log(np.abs(p*seq).sum(axis=-1).sum(axis=0)));
heatmap_importance_profile(normalize(np.abs(p*seq).sum(axis=-1)[:500], pmin=50, pmax=99), figsize=(10, 20))
heatmap_importance_profile(normalize(np.abs(p).sum(axis=-1)[:500], pmin=50, pmax=99), figsize=(10, 20))
plt.savefig('nanog-imp-heatmap.png', dpi=300, tight_layout=True)
plt.savefig('nanog-imp-heatmap.pdf')
agg_profile = np.log(np.abs(p*seq).sum(axis=-1).sum(axis=0))
agg_profile = agg_profile - agg_profile.mean()
agg_profile = agg_profile / agg_profile.std()
def smooth(y, box_pts):
box = np.ones(box_pts)/box_pts
y_smooth = np.convolve(y, box, mode='same')
return y_smooth
smooth_part = smooth(agg_profile, 10)
oscilatory_part = agg_profile - smooth_part
fig, axes= plt.subplots(2, 1,figsize=(11, 4), sharex=True)
axes[0].plot(agg_profile, label='original')
axes[0].plot(smooth_part, label="smooth", alpha=0.5)
axes[0].legend()
axes[0].set_ylabel("Avg. importance");
axes[0].set_title("Average importance score")
#axes[0].set_xlabel("Position");
axes[1].plot(oscilatory_part);
axes[1].set_xlabel("Position");
axes[1].set_ylabel("original - smooth");
fig.subplots_adjust(hspace=0) # no space between plots
plt.savefig('nanog-agg-profile.png', dpi=300)
plt.savefig('nanog-agg-profile.pdf')
fig = plt.figure(figsize=(11, 2))
plt.plot(1/freq[:49], np.abs(np.fft.fft(oscilatory_part[102:])[:49])**2 + np.abs(np.fft.fft(oscilatory_part[:98])[:49])**2, "-o")
plt.xlim([0, 50])
plt.gca().xaxis.set_major_locator(ticker.MaxNLocator(25, integer=True))
plt.grid(alpha=0.3)
plt.xlabel("1/Frequency [bp]")
plt.ylabel("Power spectrum")
plt.title("Power spectrum")
plt.gcf().subplots_adjust(bottom=0.4)
plt.savefig('nanog-fft.png', dpi=300)
plt.savefig('nanog-fft.pdf')
plt.plot(agg_profile[99:])
plt.plot(agg_profile[98:0:-1])
plt.gca().xaxis.set_major_locator(ticker.MaxNLocator(13))
plt.grid()
plt.plot(agg_profile[100:])
plt.plot(agg_profile[97:0:-1])
plt.plot()
sp = np.abs(np.fft.fft(agg_profile))**2
freq = np.fft.fftfreq(agg_profile.shape[-1])
plt.plot(sp[2:25])
agg_profile = np.log(np.abs(p*seq).sum(axis=-1).sum(axis=0))
plt.plot(agg_profile)
fig = plt.figure(figsize=(11, 2))
plt.plot(np.log(np.abs(p*seq).sum(axis=-1).sum(axis=0)));