Goal

  • test the simple code for finding instances

Tasks

  • ## Design sketch

  • input

    • importance scores hdf5
    • modisco
  • output
    • hdf5 OR bed
In [1]:
import h5py
import numpy as np
from collections import OrderedDict
import modisco.util
import modisco.core
import modisco.metaclusterers
import modisco.coordproducers
import modisco.tfmodisco_workflow.seqlets_to_patterns
import modisco.tfmodisco_workflow
from modisco.tfmodisco_workflow import workflow
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Can not use cuDNN on context None: cannot compile with cuDNN. We got this error:
b'/tmp/try_flags_wdrkezfh.c:4:19: fatal error: cudnn.h: No such file or directory\ncompilation terminated.\n'
Mapped name None to device cuda: TITAN X (Pascal) (0000:02:00.0)
In [1]:
track_set = modisco.tfmodisco_workflow.workflow.prep_track_set(
                task_names=tasks,
                contrib_scores=task_to_scores,
                hypothetical_contribs=task_to_hyp_scores,
                one_hot=onehot_data)

grp = h5py.File("results.hdf5","r")
loaded_tfmodisco_results =\
    workflow.TfModiscoResults.from_hdf5(grp, track_set=track_set)
grp.close()
  File "<ipython-input-1-a88dc0792e88>", line 1
    mport h5py
             ^
SyntaxError: invalid syntax
In [4]:
from pathlib import Path
In [5]:
mdir = Path("/srv/scratch/avsec/workspace/chipnexus/data/processed/chipnexus/exp/models/oct-sox-nanog-klf-dnase/models/n_dil_layers=9")
In [12]:
from kipoi.readers import HDF5Reader
In [13]:
d = HDF5Reader.load(mdir / "grad.valid.h5")
In [15]:
d['metadata'].keys()
Out[15]:
dict_keys(['interval_from_task', 'range'])
In [22]:
import pandas as pd
In [29]:
dfm = pd.DataFrame(d['metadata']['range'])
In [30]:
dfm.columns= ["example_" + v for v in dfm.columns]
In [ ]:
dfm.columns
In [31]:
dfm.head()
Out[31]:
example_chr example_end example_id example_start example_strand
0 chr3 122146063 0 122145063 *
1 chr2 52072751 1 52071751 *
2 chr3 105428372 2 105427372 *
3 chr3 18624637 3 18623637 *
4 chr4 125535225 4 125534225 *
In [33]:
dfm.head().to_dict()
Out[33]:
{'example_chr': {0: 'chr3', 1: 'chr2', 2: 'chr3', 3: 'chr3', 4: 'chr4'},
 'example_end': {0: 122146063,
  1: 52072751,
  2: 105428372,
  3: 18624637,
  4: 125535225},
 'example_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4},
 'example_start': {0: 122145063,
  1: 52071751,
  2: 105427372,
  3: 18623637,
  4: 125534225},
 'example_strand': {0: '*', 1: '*', 2: '*', 3: '*', 4: '*'}}
In [16]:
d['metadata']['range']['chr']
d['metadata']['range']['start']
d['metadata']['range']['end']
Out[16]:
array([122146063,  52072751, 105428372, ...,  57300744,  71125230,
        29030856])
In [11]:
ls {mdir}
cometml.json   grad.test.h5   hparams.yaml  results.html
dataspec.yaml  grad.valid.h5  model.h5      results.ipynb
figures/       history.csv    modisco/
In [8]:
modisco_dir = mdir / "modisco/valid/"
In [10]:
(modisco_dir / "modisco.h5").exists()
Out[10]:
True