-
# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
hv.extension('bokeh')
%matplotlib inline
paper_config()
# Common paths
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_dir = model_dir / f"modisco/all/deeplift/profile/"
# create_tf_session(0)
from basepair.extractors import StrandedBigWigExtractor
from basepair.datasets import run_extractors
new_dir = Path("/srv/www/kundaje/leepc12/chip-nexus-pipeline/cromwell-executions/chip_nexus/6c43259b-1e23-46b5-aa0a-6d5f0845fea3/call-count_signal_track_pooled/execution/")
new_dir = Path("/srv/www/kundaje/leepc12/chip-nexus-pipeline/cromwell-executions/chip_nexus/6c43259b-1e23-46b5-aa0a-6d5f0845fea3/call-count_signal_track_pooled/execution/")
new_dir = Path("/srv/www/kundaje/leepc12/chip-nexus-pipeline/cromwell-executions/chip_nexus/45e503fc-46da-46f2-a83b-9e8b7321fd22/call-count_signal_track_pooled/execution/")
!ls
from genomelake.extractors import BigwigExtractor
bw_extractors = {'Oct4_new': [BigwigExtractor(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.positive.bigwig")),
BigwigExtractor(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.negative.bigwig"))],
'Oct4_old': [BigwigExtractor("/users/avsec/workspace/basepair-workflow/data/Oct4/5prime.counts.pos.bw"),
BigwigExtractor("/users/avsec/workspace/basepair-workflow/data/Oct4/5prime.counts.neg.bw")],
}
bw_extractors = {'Oct4_new': [BigwigExtractor(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.positive.bigwig")),
BigwigExtractor(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.negative.bigwig"))],
'Oct4_old': [BigwigExtractor("/oak/stanford/groups/akundaje/avsec/basepair-workflow/data/raw/bw/mesc_oct4_nexus_1_positive.bw"),
BigwigExtractor("/oak/stanford/groups/akundaje/avsec/basepair-workflow/data/raw/bw/mesc_oct4_nexus_1_negative.bw")],
}
mr = ModiscoResult(modisco_dir / "modisco.h5")
mr.open()
from pybedtools import BedTool
from basepair.preproc import resize_interval
# intervals = [resize_interval(interval, 70) for interval in intervals]
bw_extractors
from basepair.cli.schemas import TaskSpec
ts = {"Oct4_new": TaskSpec(task='Oct4_new',
pos_counts=str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.positive.bigwig"),
neg_counts=str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.negative.bigwig")),
"Oct4_old": TaskSpec(task='Oct4_old',
pos_counts="/oak/stanford/groups/akundaje/avsec/basepair-workflow/data/raw/bw/mesc_oct4_nexus.pooled.positive.bw",
neg_counts="/oak/stanford/groups/akundaje/avsec/basepair-workflow/data/raw/bw/mesc_oct4_nexus.pooled.negative.bw"),
}
from basepair.modisco.pattern_instances import dfi2seqlets, annotate_profile
from basepair.modisco.results import Seqlet, resize_seqlets
from basepair.plot.profiles import extract_signal
from basepair.plot.profiles import plot_stranded_profile, multiple_plot_stranded_profile
from basepair.plot.heatmaps import heatmap_stranded_profile, multiple_heatmap_stranded_profile
%matplotlib inline
paper_config()
modisco_seqlets = mr._get_seqlets("metacluster_0/pattern_0", trim_frac=0.08)
pattern = 'metacluster_0/pattern_0'
tfc = 'Oct4_old'
intervals = mr.get_seqlet_intervals(pattern, trim_frac=0, as_df=True)
intervals = intervals[['chrom', 'start', 'end', 'pattern', 'pattern', 'strand']].drop_duplicates()
bt = BedTool.from_dataframe(intervals)
intervals = list(bt)
mr.get_pattern(pattern).trim_seq_ic(0.08).plot("seq_ic");
seqlet_profiles = {t: np.abs(v.load_counts(intervals)) for t,v in ts.items()}
plt.scatter(np.ravel(seqlet_profiles['Oct4_new']), np.ravel(seqlet_profiles['Oct4_old']), s=2, alpha=0.1)
plt.plot([0, 40], [0, 40], alpha=0.1)
plt.xlabel("New")
plt.ylabel("Old")
sort_idx = np.argsort(-seqlet_profiles[tfc].max(axis=(1,2)))
multiple_plot_stranded_profile({p:v for p,v in seqlet_profiles.items()}, figsize_tmpl=(2.55,2))
multiple_heatmap_stranded_profile(seqlet_profiles, sort_idx=sort_idx[:1000], figsize=(4,6));
plt.scatter(np.ravel(seqlet_profiles['Oct4_new']), np.ravel(seqlet_profiles['Oct4_old']), s=2, alpha=0.1)
plt.plot([0, 40], [0, 40], alpha=0.1)
plt.xlabel("New")
plt.ylabel("Old")
sort_idx = np.argsort(-seqlet_profiles[tfc].max(axis=(1,2)))
multiple_plot_stranded_profile({p:v for p,v in seqlet_profiles.items()}, figsize_tmpl=(2.55,2))
multiple_heatmap_stranded_profile(seqlet_profiles, sort_idx=sort_idx[:1000], figsize=(4,6));
seqlet_profiles['Oct4_old'].sum() / seqlet_profiles['Oct4_new'].sum()
import pyBigWig
bw = pyBigWig.open(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.positive.bigwig"))
bw.stats('chr5', type='mean', exact=True)
bw_old = pyBigWig.open("/users/avsec/workspace/basepair-workflow/data/Oct4/5prime.counts.pos.bw")
bw_old.stats('chr5', type='mean', exact=True)