Goal

  • QC the new ChIP-nexus pipeline

Tasks

  • [ ]

Required files

-

In [1]:
# Imports
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from basepair.imports import *
hv.extension('bokeh')
Using TensorFlow backend.
In [98]:
%matplotlib inline
paper_config()
In [2]:
# Common paths
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_dir = model_dir / f"modisco/all/deeplift/profile/"
In [3]:
# create_tf_session(0)
In [4]:
from basepair.extractors import StrandedBigWigExtractor
In [5]:
from basepair.datasets import run_extractors
In [6]:
new_dir = Path("/srv/www/kundaje/leepc12/chip-nexus-pipeline/cromwell-executions/chip_nexus/6c43259b-1e23-46b5-aa0a-6d5f0845fea3/call-count_signal_track_pooled/execution/")
In [126]:
new_dir = Path("/srv/www/kundaje/leepc12/chip-nexus-pipeline/cromwell-executions/chip_nexus/6c43259b-1e23-46b5-aa0a-6d5f0845fea3/call-count_signal_track_pooled/execution/")
In [149]:
new_dir = Path("/srv/www/kundaje/leepc12/chip-nexus-pipeline/cromwell-executions/chip_nexus/45e503fc-46da-46f2-a83b-9e8b7321fd22/call-count_signal_track_pooled/execution/")
In [7]:
!ls 
04-align_instance_center.ipynb		    find-best-model.ipynb
2018-01-15-create-genome-wide-loader.ipynb  improve-count-model.ipynb
2019-01-04-debug-trim.ipynb		    imp-scores.ipynb
23-modisco-report.ipynb			    motif-scanning-comparison.ipynb
2-gdrive-upload.ipynb			    re-cluster-motifs.ipynb
30-test-modisco-table.ipynb		    stacked_seqlet_imp.ipynb
50-new-seqmodel.ipynb			    test-viz.ipynb
5-viz-few-imp-scores.ipynb		    Untitled1.ipynb
debug-modisco-table.ipynb		    Untitled2.ipynb
debug-motifs.ipynb			    Untitled.ipynb
deeplift-score-distribtion.ipynb
In [108]:
from genomelake.extractors import BigwigExtractor
In [9]:
bw_extractors = {'Oct4_new': [BigwigExtractor(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.positive.bigwig")),
                              BigwigExtractor(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.negative.bigwig"))],
                 'Oct4_old': [BigwigExtractor("/users/avsec/workspace/basepair-workflow/data/Oct4/5prime.counts.pos.bw"),
                              BigwigExtractor("/users/avsec/workspace/basepair-workflow/data/Oct4/5prime.counts.neg.bw")],
                
                }
In [86]:
bw_extractors = {'Oct4_new': [BigwigExtractor(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.positive.bigwig")),
                              BigwigExtractor(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.negative.bigwig"))],
                 'Oct4_old': [BigwigExtractor("/oak/stanford/groups/akundaje/avsec/basepair-workflow/data/raw/bw/mesc_oct4_nexus_1_positive.bw"),
                              BigwigExtractor("/oak/stanford/groups/akundaje/avsec/basepair-workflow/data/raw/bw/mesc_oct4_nexus_1_negative.bw")],
                
                }
In [87]:
mr = ModiscoResult(modisco_dir / "modisco.h5")
mr.open()
In [ ]:
 
In [88]:
from pybedtools import BedTool

from basepair.preproc import resize_interval
In [89]:
# intervals = [resize_interval(interval, 70) for interval in intervals]
In [90]:
bw_extractors
Out[90]:
{'Oct4_new': [<genomelake.extractors.BigwigExtractor at 0x7f266d86acf8>,
  <genomelake.extractors.BigwigExtractor at 0x7f266d86aa58>],
 'Oct4_old': [<genomelake.extractors.BigwigExtractor at 0x7f266d86a400>,
  <genomelake.extractors.BigwigExtractor at 0x7f266d86a470>]}
In [14]:
from basepair.cli.schemas import TaskSpec
In [150]:
ts = {"Oct4_new": TaskSpec(task='Oct4_new', 
                      pos_counts=str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.positive.bigwig"),
                      neg_counts=str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.negative.bigwig")),
      "Oct4_old": TaskSpec(task='Oct4_old', 
                      pos_counts="/oak/stanford/groups/akundaje/avsec/basepair-workflow/data/raw/bw/mesc_oct4_nexus.pooled.positive.bw",
                      neg_counts="/oak/stanford/groups/akundaje/avsec/basepair-workflow/data/raw/bw/mesc_oct4_nexus.pooled.negative.bw"),
     }
In [151]:
from basepair.modisco.pattern_instances import dfi2seqlets, annotate_profile
from basepair.modisco.results import Seqlet, resize_seqlets
from basepair.plot.profiles import extract_signal
from basepair.plot.profiles import  plot_stranded_profile, multiple_plot_stranded_profile
from basepair.plot.heatmaps import heatmap_stranded_profile, multiple_heatmap_stranded_profile
In [152]:
%matplotlib inline
paper_config()
In [153]:
modisco_seqlets = mr._get_seqlets("metacluster_0/pattern_0", trim_frac=0.08)
In [159]:
pattern = 'metacluster_0/pattern_0'
tfc = 'Oct4_old'
In [160]:
intervals = mr.get_seqlet_intervals(pattern, trim_frac=0, as_df=True)
intervals = intervals[['chrom', 'start', 'end', 'pattern', 'pattern', 'strand']].drop_duplicates()
bt = BedTool.from_dataframe(intervals)
intervals = list(bt)
In [161]:
mr.get_pattern(pattern).trim_seq_ic(0.08).plot("seq_ic");
seqlet_profiles = {t: np.abs(v.load_counts(intervals)) for t,v in ts.items()}
In [162]:
plt.scatter(np.ravel(seqlet_profiles['Oct4_new']), np.ravel(seqlet_profiles['Oct4_old']), s=2, alpha=0.1)
plt.plot([0, 40], [0, 40], alpha=0.1)
plt.xlabel("New")
plt.ylabel("Old")
Out[162]:
Text(0, 0.5, 'Old')
In [163]:
sort_idx = np.argsort(-seqlet_profiles[tfc].max(axis=(1,2)))
multiple_plot_stranded_profile({p:v for p,v in seqlet_profiles.items()}, figsize_tmpl=(2.55,2))
multiple_heatmap_stranded_profile(seqlet_profiles, sort_idx=sort_idx[:1000], figsize=(4,6));
In [106]:
plt.scatter(np.ravel(seqlet_profiles['Oct4_new']), np.ravel(seqlet_profiles['Oct4_old']), s=2, alpha=0.1)
plt.plot([0, 40], [0, 40], alpha=0.1)
plt.xlabel("New")
plt.ylabel("Old")
Out[106]:
Text(0, 0.5, 'Old')
In [104]:
sort_idx = np.argsort(-seqlet_profiles[tfc].max(axis=(1,2)))
multiple_plot_stranded_profile({p:v for p,v in seqlet_profiles.items()}, figsize_tmpl=(2.55,2))
multiple_heatmap_stranded_profile(seqlet_profiles, sort_idx=sort_idx[:1000], figsize=(4,6));
In [61]:
seqlet_profiles['Oct4_old'].sum() / seqlet_profiles['Oct4_new'].sum()
Out[61]:
1.4150938
In [27]:
import pyBigWig
In [28]:
bw = pyBigWig.open(str(new_dir / "mesc_oct4_nexus_1.trim.merged.nodup.pooled.positive.bigwig"))
In [45]:
bw.stats('chr5', type='mean', exact=True)
Out[45]:
[1.052221619828918]
In [30]:
bw_old = pyBigWig.open("/users/avsec/workspace/basepair-workflow/data/Oct4/5prime.counts.pos.bw")
In [46]:
bw_old.stats('chr5', type='mean', exact=True)
Out[46]:
[1.0676418915164991]