Goal

  • implement interval overlap

Tasks

  • [ ]

Required files

-

In [1]:
# Imports
from basepair.imports import *
hv.extension('bokeh')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
Using TensorFlow backend.
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/concise/utils/plot.py:115: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  min_coords = np.vstack(data.min(0) for data in polygons_data).min(0)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/concise/utils/plot.py:116: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  max_coords = np.vstack(data.max(0) for data in polygons_data).max(0)
In [2]:
# Common paths
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_dir = model_dir / f"modisco/all/profile/"
output_dir = Path("/srv/www/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/all/profile")
In [3]:
# create_tf_session(0)
In [4]:
data = f"{ddir}/processed/chipnexus/exp/models/genomewide/oct-sox-nanog-klf"
In [3]:
ls {data}
dataspec.yml               Nanog-summits.200bp.bed.gz  Sox2-summits.bed.gz
Klf4-summits.200bp.bed.gz  Nanog-summits.bed.gz        {}-summits.200bp.bed.gz
Klf4-summits.bed.gz        Oct4-summits.200bp.bed.gz   tasks.tsv
label.bash*                Oct4-summits.bed.gz
label.bash~*               Sox2-summits.200bp.bed.gz
In [7]:
from pybedtools import BedTool
In [10]:
btg.head()
chr1	0	1000
 chr1	1000	2000
 chr1	2000	3000
 chr1	3000	4000
 chr1	4000	5000
 chr1	5000	6000
 chr1	6000	7000
 chr1	7000	8000
 chr1	8000	9000
 chr1	9000	10000
 
In [13]:
btg = BedTool(f"{ddir}/raw/annotation/mm10/mm10.genome.stride1000.w1000.no-blacklist.bed.gz")
dfg = btg.to_dataframe()
dfg['name'] = dfg.index
In [15]:
btg = BedTool.from_dataframe(dfg)
In [18]:
ls {data}
dataspec.yml               Nanog-summits.200bp.bed.gz  Sox2-summits.bed.gz
Klf4-summits.200bp.bed.gz  Nanog-summits.bed.gz        {}-summits.200bp.bed.gz
Klf4-summits.bed.gz        Oct4-summits.200bp.bed.gz   tasks.tsv
label.bash*                Oct4-summits.bed.gz
label.bash~*               Sox2-summits.200bp.bed.gz
In [19]:
b = BedTool(f"{data}/Sox2-summits.200bp.bed.gz")
In [20]:
b.head()
chr17	17408939	17409140
 chr5	110284720	110284921
 chr1	57780178	57780379
 chr6	94183999	94184200
 chr5	121537408	121537609
 chr8	67966590	67966791
 chr11	68348455	68348656
 chr1	9955321	9955522
 chr10	123087129	123087330
 chr2	18567798	18567999
 
In [25]:
feature = 'feature1'
In [53]:
import pybedtools
In [ ]:
pybedtools.
In [64]:
from basepair.config import get_data_dir
from basepair.preproc import label_bed
ddir = get_data_dir()
data = f"{ddir}/processed/chipnexus/exp/models/genomewide/oct-sox-nanog-klf"
dfo = label_bed(f"{ddir}/raw/annotation/mm10/mm10.genome.stride1000.w1000.no-blacklist.bed.gz",
         {t: f"{data}/{t}-summits.200bp.bed.gz" for t in ['Oct4', 'Sox2', 'Nanog', 'Klf4']})
dfo.to_csv(f"{data}/1kb.osnk.tsv.gz", compression='gzip', sep='\t', index=False)
CPU times: user 12 s, sys: 1.11 s, total: 13.1 s
Wall time: 23.9 s
In [52]:
for t in ['Oct4', 'Sox2', 'Nanog', 'Klf4']:
    print(t)
    !zcat {data}/{t}-summits.200bp.bed.gz | wc -l
Oct4
21841
Sox2
9396
Nanog
18017
Klf4
49174
In [49]:
dfo.set_index(['chrom', 'start', 'end'], inplace=True)
dfo.sum(axis=0)
Out[49]:
task/Oct4     25965
task/Sox2     10963
task/Nanog    21462
task/Klf4     57561
dtype: int64
In [50]:
dfo.mean(axis=0)
Out[50]:
task/Oct4     0.0095
task/Sox2     0.0040
task/Nanog    0.0079
task/Klf4     0.0211
dtype: float64
In [28]:
intersected = btg.intersect(b, wa=True, u=True).to_dataframe()['name']
dfg[feature] = 0
dfg.loc[intersected, feature] = 1
In [ ]:
 
In [23]:
dfi.head()
Out[23]:
chrom start end name
0 chr1 3062000 3063000 3062
1 chr1 3063000 3064000 3063
2 chr1 3483000 3484000 3483
3 chr1 4150000 4151000 4150
4 chr1 4151000 4152000 4151
In [16]:
dfg.head()
Out[16]:
chrom start end name
0 chr1 0 1000 0
1 chr1 1000 2000 1
2 chr1 2000 3000 2
3 chr1 3000 4000 3
4 chr1 4000 5000 4
In [2]:
from gin_train.samplers import StratifiedRandomBatchSampler,iterable_cycle
In [3]:
classes = np.concatenate([np.ones((int(1e7),)), np.zeros((int(1e7),))])
In [4]:
import ipython_memory_usage.ipython_memory_usage as imu
In [5]:
imu.start_watching_memory()
In [5] used 0.0000 MiB RAM in 0.16s, peaked 0.00 MiB above current, total RAM usage 653.64 MiB
In [25]:
sampler = StratifiedRandomBatchSampler(classes, [0.5, 0.5], 128)
In [25] used 171.6680 MiB RAM in 1.61s, peaked 159.31 MiB above current, total RAM usage 545.91 MiB
In [38]:
it = iter(sampler)
In [38] used 0.0000 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 471.67 MiB
In [45]:
for i in sampler:
    pass
In [45] used 0.0000 MiB RAM in 6.33s, peaked 0.00 MiB above current, total RAM usage 640.98 MiB
In [36]:
len(sampler)
Out[36]:
156250
In [36] used 0.0039 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 471.67 MiB
In [ ]:
iterable_cycle()
In [5]:
intervals_file = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/genomewide/oct-sox-nanog-klf/1kb.osnk.tsv.gz'
dataspec = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/dataspec.yml'
In [8]:
from basepair.datasets import get_gw_StrandedProfile_datasets
train, valid = get_gw_StrandedProfile_datasets(dataspec = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/dataspec.yml',
                                              intervals_file = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/genomewide/oct-sox-nanog-klf/1kb.osnk.tsv.gz',
                                              peak_width=1000,
                                              seq_width=3088,
                                              exclude_chr = ['chrX', 'chrY']
                                             )  # use the default train and valid chromosomes
In [9]:
ds = valid[0][1]
In [10]:
valid[0][0]
Out[10]:
'valid-genome-wide'
In [11]:
it = ds.batch_train_iter(64, num_workers=6)
In [119]:
%tqdm_restart
In [12]:
for i in tqdm(range(100)):
    # ds[i]
    batch = next(it)
100%|██████████| 100/100 [00:02<00:00, 49.41it/s]
In [13]:
it = ds.batch_train_iter(cycle=False, batch_size=64, num_workers=6)
In [19]:
for i in tqdm(range(100)):
    # ds[i]
    batch = next(it)
100%|██████████| 100/100 [00:02<00:00, 38.33it/s]
In [96]:
from tqdm import tqdm
In [96] used 0.0000 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 1174.79 MiB
In [68]:
%tqdm_restart
In [68] used -0.0078 MiB RAM in 0.11s, peaked 0.01 MiB above current, total RAM usage 944.02 MiB

In [8]:
from kipoi.data_utils import iterable_cycle
In [8] used 0.0000 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 654.44 MiB

TODO

  • there is a memory leak in multiprocessing, you should prevent it by storing the arrays as numpy dictionaries
In [5]:
train.tsv.df[0]
Out[5]:
195435     chr10
195436     chr10
195437     chr10
           ...  
2208235     chr7
2208236     chr7
2208237     chr7
Name: 0, Length: 1514227, dtype: category
Categories (13, object): [chr10, chr11, chr12, chr13, ..., chr19, chr5, chr6, chr7]
In [1]:
from basepair.data import Dataset
Using TensorFlow backend.
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/concise/utils/plot.py:115: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  min_coords = np.vstack(data.min(0) for data in polygons_data).min(0)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/concise/utils/plot.py:116: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
  max_coords = np.vstack(data.max(0) for data in polygons_data).max(0)
In [2]:
from basepair.datasets import *
In [10]:
import torch
In [63]:
class StrandedProfile(Dataset):

    def __init__(self, ds,
                 peak_width=200,
                 seq_width=None,
                 incl_chromosomes=None,
                 excl_chromosomes=None,
                 intervals_file=None,
                 shuffle=True, target_transformer=None):
        """Dataset for loading the bigwigs and fastas

        Args:
          ds (basepair.src.schemas.DataSpec): data specification containing the
            fasta file, bed files and bigWig file paths
          chromosomes (list of str): a list of chor
          peak_width: resize the bed file to a certain width
          bcolz: If True, the bigwig/fasta files are in the genomelake bcolz format
          in_memory: If True, load the whole bcolz into memory. Only applicable when bcolz=True
          shuffle: True
          preprocessor: trained preprocessor object containing the .transform methods
        """
        if isinstance(ds, str):
            self.ds = DataSpec.load(ds)
        else:
            self.ds = ds
        self.peak_width = peak_width
        if seq_width is None:
            self.seq_width = peak_width
        else:
            self.seq_width = seq_width
        self.shuffle = shuffle
        self.intervals_file = intervals_file
        self.incl_chromosomes = incl_chromosomes
        self.excl_chromosomes = excl_chromosomes
        self.target_transformer = target_transformer
        # not specified yet
        self.fasta_extractor = None
        self.bw_extractors = None
        
        # Load chromosome lengths
        fa = FastaFile(self.ds.fasta_file)
        self.chrom_lens = {name: l for name, l in zip(fa.references, fa.lengths)}
        del fa

        self.tsv = TsvReader(self.intervals_file,
                         num_chr=False,
                         label_dtype=int,
                         mask_ambigous=-1,
                         incl_chromosomes=incl_chromosomes,
                         excl_chromosomes=excl_chromosomes,
                         )
        self.dfm = self.tsv.df  # use the data-frame from tsv
            
        # self.dfmo = {"chrom": self.dfm}
        
        if self.shuffle:
            self.dfm = self.dfm.sample(frac=1)

    def __len__(self):
        return len(self.dfm)
    
    def get_targets(self):
        """
        'targets'
        """
        assert self.intervals_file is not None
        return self.tsv.get_targets()

    def __getitem__(self, idx):
        if self.fasta_extractor is None:
            # first call
            self.bw_extractors = {task: [BigwigExtractor(task_spec.pos_counts),
                                         BigwigExtractor(task_spec.neg_counts)]
                                  for task, task_spec in self.ds.task_specs.items()}
            self.fasta_extractor = FastaExtractor(self.ds.fasta_file)

            # Load the bias model if available
        interval = Interval(self.dfm.iat[idx, 0],  # chrom
                            self.dfm.iat[idx, 1],  # start
                            self.dfm.iat[idx, 2])  # end
        target_interval = resize_interval(deepcopy(interval), self.peak_width)
        seq_interval = resize_interval(deepcopy(interval), self.seq_width)
        # task = self.dfm.iat[idx, 3]  # task
        # TODO - add data augmentation

        sequence = self.fasta_extractor([seq_interval])[0]
#         cuts = {f"profile/{task}": run_extractors(self.bw_extractors[task],
#                                                   [target_interval],
#                                                   ignore_strand=spec.ignore_strand)[0]
#                 for task, spec in self.ds.task_specs.items()}
        cuts = {}
        task = ''
        return {"inputs": sequence,
                "targets": cuts,
                "metadata": {"range": GenomicRanges(target_interval.chrom, 
                                                    target_interval.start, 
                                                    target_interval.stop,
                                                    idx),
                             "interval_from_task": task}}
In [64]:
class Ds(Dataset):

    def __init__(self, ds, seq_width=1000, intervals_file=None):
        """Dataset for loading the bigwigs and fastas

        Args:
          ds (basepair.src.schemas.DataSpec): data specification containing the
            fasta file, bed files and bigWig file paths
          chromosomes (list of str): a list of chor
          peak_width: resize the bed file to a certain width
          bcolz: If True, the bigwig/fasta files are in the genomelake bcolz format
          in_memory: If True, load the whole bcolz into memory. Only applicable when bcolz=True
          shuffle: True
          preprocessor: trained preprocessor object containing the .transform methods
        """
        if isinstance(ds, str):
            self.ds = DataSpec.load(ds)
        else:
            self.ds = ds
        self.tsv = TsvReader(intervals_file,
                         num_chr=False,
                         label_dtype=int,
                         mask_ambigous=-1
                         )
        self.seq_width = seq_width
        self.dfm = self.tsv.df  # use the data-frame from tsv
        self.dfm[0] = self.dfm[0].astype(str)
        self.fasta_extractor = None

    def __len__(self):
        return len(self.dfm)
    
    def get_targets(self):
        """
        'targets'
        """
        assert self.intervals_file is not None
        return self.tsv.get_targets()

    def __getitem__(self, idx):
        return torch.ones((10000, 10))
        # return np.ones((10000, 10))
#         if self.fasta_extractor is None:
#             self.fasta_extractor = FastaExtractor(self.ds.fasta_file)
#         interval = Interval(self.dfm.iat[idx, 0],  # chrom
#                             self.dfm.iat[idx, 1],  # start
#                             self.dfm.iat[idx, 2])  # end
#         seq_interval = resize_interval(deepcopy(interval), self.seq_width)
#         sequence = self.fasta_extractor([seq_interval])[0]
#         # sequence = ''
#         return {"input": sequence,
#                 "s2": sequence,
#                 "metadata": {"range": GenomicRanges(interval.chrom, 
#                                                     interval.start, 
#                                                     interval.stop,
#                                                     idx)}}
In [65]:
from torch.utils.data import DataLoader
In [66]:
%tqdm_restart

In [67]:
import torch
In [68]:
intervals_file = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/genomewide/oct-sox-nanog-klf/1kb.osnk.tsv.gz'
dataspec = '/users/avsec/workspace/basepair/data/processed/chipnexus/exp/models/oct-sox-nanog-klf/dataspec.yml'
In [76]:
train = StrandedProfile(dataspec, 10000, intervals_file=intervals_file)
it = train.batch_iter(batch_size=32, num_workers=12)
In [70]:
from kipoi.data_utils import numpy_collate
In [71]:
dl = DataLoader(train, num_workers=12, batch_size=32)
it = iter(dl)
In [75]:
next(it)
Out[75]:
{'inputs': tensor([[[1., 0., 0., 0.],
          [0., 1., 0., 0.],
          [0., 0., 0., 1.],
          ...,
          [1., 0., 0., 0.],
          [1., 0., 0., 0.],
          [0., 0., 1., 0.]],
 
         [[1., 0., 0., 0.],
          [0., 1., 0., 0.],
          [1., 0., 0., 0.],
          ...,
          [0., 0., 1., 0.],
          [0., 0., 1., 0.],
          [1., 0., 0., 0.]],
 
         [[1., 0., 0., 0.],
          [1., 0., 0., 0.],
          [0., 1., 0., 0.],
          ...,
          [0., 0., 1., 0.],
          [0., 1., 0., 0.],
          [0., 0., 0., 1.]],
 
         ...,
 
         [[0., 0., 0., 1.],
          [0., 0., 0., 1.],
          [0., 1., 0., 0.],
          ...,
          [0., 0., 0., 1.],
          [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
 
         [[0., 1., 0., 0.],
          [1., 0., 0., 0.],
          [1., 0., 0., 0.],
          ...,
          [0., 0., 0., 1.],
          [0., 0., 0., 1.],
          [1., 0., 0., 0.]],
 
         [[0., 1., 0., 0.],
          [1., 0., 0., 0.],
          [1., 0., 0., 0.],
          ...,
          [0., 0., 0., 1.],
          [1., 0., 0., 0.],
          [0., 0., 0., 1.]]]),
 'targets': {},
 'metadata': {'range': {'chr': ['chr17',
    'chr3',
    'chr6',
    'chr1',
    'chr10',
    'chr1',
    'chr10',
    'chr9',
    'chr7',
    'chr2',
    'chrY',
    'chr18',
    'chr9',
    'chrY',
    'chr6',
    'chrY',
    'chr11',
    'chr2',
    'chrY',
    'chr1',
    'chr9',
    'chr10',
    'chr10',
    'chrY',
    'chr10',
    'chr5',
    'chr1',
    'chr4',
    'chr10',
    'chr13',
    'chr13',
    'chrY'],
   'start': tensor([ 56325500, 128308500,  17817500,  94541500,  26313500, 160664500,
           103891500,  13377500,  29069500,  57283500,  50638500,  16560500,
            34656500,  47534500,  16556500,  29475500,  59287500, 156045500,
            18834500,  62269500, 113084500,   3397500, 117583500,  45514500,
            65664500,  69164500, 133235500,  34073500, 107823500,  99026500,
            79919500,  88448500]),
   'end': tensor([ 56335500, 128318500,  17827500,  94551500,  26323500, 160674500,
           103901500,  13387500,  29079500,  57293500,  50648500,  16570500,
            34666500,  47544500,  16566500,  29485500,  59297500, 156055500,
            18844500,  62279500, 113094500,   3407500, 117593500,  45524500,
            65674500,  69174500, 133245500,  34083500, 107833500,  99036500,
            79929500,  88458500]),
   'id': tensor([320032, 320033, 320034, 320035, 320036, 320037, 320038, 320039, 320040,
           320041, 320042, 320043, 320044, 320045, 320046, 320047, 320048, 320049,
           320050, 320051, 320052, 320053, 320054, 320055, 320056, 320057, 320058,
           320059, 320060, 320061, 320062, 320063]),
   'strand': ['*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*',
    '*']},
  'interval_from_task': ['',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '',
   '']}}
In [28]:
np.array("asd")
Out[28]:
array('asd', dtype='<U3')
In [73]:
from basepair.data import to_numpy
In [78]:
for i in tqdm(range(10000)):
    # if i % 1000 == 0:
    #     gc.collect()
    batch = next(it)
    # o = to_numpybatch)
    # a= batch['inputs'].numpy()
    # del batch
 51%|█████▏    | 5131/10000 [00:25<00:27, 179.65it/s]
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-78-15125a4bb5f9> in <module>
      2     # if i % 1000 == 0:
      3     #     gc.collect()
----> 4     batch = next(it)
      5     # o = to_numpybatch)
      6     # a= batch['inputs'].numpy()

~/workspace/basepair/basepair/data.py in <genexpr>(.0)
    104                                   drop_last=drop_last,
    105                                   **kwargs)
--> 106         return (to_numpy(batch) for batch in dl)
    107 
    108 

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
    629         while True:
    630             assert (not self.shutdown and self.batches_outstanding > 0)
--> 631             idx, batch = self._get_batch()
    632             self.batches_outstanding -= 1
    633             if idx != self.rcvd_idx:

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _get_batch(self)
    608             # need to call `.task_done()` because we don't use `.join()`.
    609         else:
--> 610             return self.data_queue.get()
    611 
    612     def __next__(self):

~/bin/anaconda3/envs/chipnexus/lib/python3.6/multiprocessing/queues.py in get(self, block, timeout)
    111                 self._rlock.release()
    112         # unserialize the data after having released the lock
--> 113         return _ForkingPickler.loads(res)
    114 
    115     def qsize(self):

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/torch/multiprocessing/reductions.py in rebuild_storage_fd(cls, df, size)
    254         fd = multiprocessing.reduction.rebuild_handle(df)
    255     else:
--> 256         fd = df.detach()
    257     try:
    258         storage = storage_from_cache(cls, fd_id(fd))

~/bin/anaconda3/envs/chipnexus/lib/python3.6/multiprocessing/resource_sharer.py in detach(self)
     55         def detach(self):
     56             '''Get the fd.  This should only be called once.'''
---> 57             with _resource_sharer.get_connection(self._id) as conn:
     58                 return reduction.recv_handle(conn)
     59 

~/bin/anaconda3/envs/chipnexus/lib/python3.6/multiprocessing/resource_sharer.py in get_connection(ident)
     85         from .connection import Client
     86         address, key = ident
---> 87         c = Client(address, authkey=process.current_process().authkey)
     88         c.send((key, os.getpid()))
     89         return c

~/bin/anaconda3/envs/chipnexus/lib/python3.6/multiprocessing/connection.py in Client(address, family, authkey)
    485         c = PipeClient(address)
    486     else:
--> 487         c = SocketClient(address)
    488 
    489     if authkey is not None and not isinstance(authkey, bytes):

~/bin/anaconda3/envs/chipnexus/lib/python3.6/multiprocessing/connection.py in SocketClient(address)
    610     '''
    611     family = address_type(address)
--> 612     with socket.socket( getattr(socket, family) ) as s:
    613         s.setblocking(True)
    614         s.connect(address)

~/bin/anaconda3/envs/chipnexus/lib/python3.6/socket.py in __init__(self, family, type, proto, fileno)
    142         # constructor of _socket.socket converts the given argument to an
    143         # integer automatically.
--> 144         _socket.socket.__init__(self, family, type, proto, fileno)
    145         self._io_refs = 0
    146         self._closed = False

KeyboardInterrupt: 
 51%|█████▏    | 5131/10000 [00:40<00:27, 179.65it/s]
In [94]:
del train
In [94] used 0.0000 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 946.27 MiB
In [97]:
it = train.batch_train_iter(batch_size=32, num_workers=6)
In [97] used 0.0000 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 1174.79 MiB
In [90]:
del it
In [90] used 0.0000 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 946.27 MiB
In [98]:
for i in tqdm(range(1000)):
    next(it)
100%|██████████| 1000/1000 [00:14<00:00, 68.37it/s]
In [98] used 0.0078 MiB RAM in 14.74s, peaked 0.00 MiB above current, total RAM usage 1174.80 MiB
In [84]:
import torch
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-84-eb42ca6e4af3> in <module>
----> 1 import torch

ModuleNotFoundError: No module named 'torch'
In [84] used -0.2500 MiB RAM in 0.13s, peaked 0.25 MiB above current, total RAM usage 946.27 MiB
In [ ]: