Signature:
get_gw_StrandedProfile_datasets(
['dataspec', 'intervals_file=None', 'peak_width=200', 'seq_width=None', 'shuffle=True', 'target_transformer=<basepair.preproc.AppendCounts object at 0x7f7fd3702c88>', 'include_metadata=False', "valid_chr=['chr2', 'chr3', 'chr4']", "test_chr=['chr1', 'chr8', 'chr9']", 'exclude_chr=[]', 'vmtouch=True'],
)
Docstring: <no docstring>
Source:
@gin.configurable
def get_gw_StrandedProfile_datasets(dataspec,
intervals_file=None,
peak_width=200,
seq_width=None,
shuffle=True,
target_transformer=AppendCounts(),
include_metadata=False,
valid_chr=['chr2', 'chr3', 'chr4'],
test_chr=['chr1', 'chr8', 'chr9'],
exclude_chr=[],
vmtouch=True):
from basepair.metrics import BPNetMetric, PeakPredictionProfileMetric, pearson_spearman
# test and valid shouldn't be in the valid or test sets
for vc in valid_chr:
assert vc not in exclude_chr
for vc in test_chr:
assert vc not in exclude_chr
dataspec = DataSpec.load(dataspec)
if vmtouch:
# use vmtouch to load all file to memory
dataspec.touch_all_files()
tasks = list(dataspec.task_specs)
nonprofile_metric = BPNetMetric(tasks=list(dataspec.task_specs),
count_metric=pearson_spearman,
profile_metric=None)
return (StrandedProfile(dataspec, peak_width,
seq_width=seq_width,
intervals_file=intervals_file,
include_metadata=include_metadata,
excl_chromosomes=valid_chr + test_chr + exclude_chr,
shuffle=shuffle, target_transformer=target_transformer),
[('valid-genome-wide',
StrandedProfile(dataspec, peak_width,
seq_width=seq_width,
intervals_file=intervals_file,
include_metadata=include_metadata,
incl_chromosomes=valid_chr,
shuffle=shuffle, target_transformer=target_transformer),
nonprofile_metric),
('valid-peaks', StrandedProfile(dataspec, peak_width,
seq_width=seq_width,
intervals_file=None,
include_metadata=include_metadata,
incl_chromosomes=valid_chr,
shuffle=shuffle, target_transformer=target_transformer)),
('train-peaks', StrandedProfile(dataspec, peak_width,
seq_width=seq_width,
intervals_file=None,
include_metadata=include_metadata,
excl_chromosomes=valid_chr + test_chr + exclude_chr,
shuffle=shuffle, target_transformer=target_transformer)),
# use the default metric for the peak sets
])
File: ~/workspace/basepair/basepair/datasets.py
Type: function