#(c) 2016 by Authors
#This file is a part of ABruijn program.
#Released under the BSD license (see LICENSE file)

"""
Main logic of the package
"""

from __future__ import print_function
from __future__ import absolute_import
import sys
import os
import logging
import argparse
import json
import shutil
import subprocess

import flye.polishing.alignment as aln
import flye.polishing.polish as pol
import flye.polishing.consensus as cons
import flye.assembly.assemble as asm
import flye.assembly.repeat_graph as repeat
import flye.assembly.scaffolder as scf
from flye.__version__ import __version__
import flye.config.py_cfg as cfg
from flye.config.configurator import setup_params
from flye.utils.bytes2human import human2bytes, bytes2human
from flye.utils.sam_parser import AlignmentException
import flye.utils.fasta_parser as fp
import flye.short_plasmids.plasmids as plas
import flye.trestle.trestle as tres
import flye.trestle.graph_resolver as tres_graph
from flye.repeat_graph.repeat_graph import RepeatGraph
from flye.six.moves import range

logger = logging.getLogger()

class ResumeException(Exception):
    pass

class Job(object):
    """
    Describes an abstract list of jobs with persistent
    status that can be resumed
    """
    run_params = {"stage_name" : ""}

    def __init__(self):
        self.name = None
        self.args = None
        self.work_dir = None
        self.out_files = {}
        self.log_file = None

    def run(self):
        logger.info(">>>STAGE: %s", self.name)

    def save(self, save_file):
        Job.run_params["stage_name"] = self.name

        with open(save_file, "w") as f:
            json.dump(Job.run_params, f)

    def load(self, save_file):
        with open(save_file, "r") as f:
            data = json.load(f)
            if (not "pipeline_version" in data or
                    data["pipeline_version"] != cfg.vals["pipeline_version"]):
                raise ResumeException("Inconsistent pipeline version")

            Job.run_params = data

    def completed(self, save_file):
        with open(save_file, "r") as f:
            dummy_data = json.load(f)

            for file_path in self.out_files.values():
                if not os.path.exists(file_path):
                    return False

            return True


class JobConfigure(Job):
    def __init__(self, args, work_dir):
        super(JobConfigure, self).__init__()
        self.args = args
        self.work_dir = work_dir
        self.name = "configure"

    def run(self):
        super(JobConfigure, self).run()
        params = setup_params(self.args)
        Job.run_params = params


class JobAssembly(Job):
    def __init__(self, args, work_dir, log_file):
        super(JobAssembly, self).__init__()
        #self.out_assembly = out_assembly
        self.args = args
        self.work_dir = work_dir
        self.log_file = log_file

        self.name = "assembly"
        self.assembly_dir = os.path.join(self.work_dir, "00-assembly")
        self.assembly_filename = os.path.join(self.assembly_dir,
                                              "draft_assembly.fasta")
        self.out_files["assembly"] = self.assembly_filename

    def run(self):
        super(JobAssembly, self).run()
        if not os.path.isdir(self.assembly_dir):
            os.mkdir(self.assembly_dir)
        asm.assemble(self.args, Job.run_params, self.assembly_filename,
                     self.log_file, self.args.asm_config, )
        if os.path.getsize(self.assembly_filename) == 0:
            raise asm.AssembleException("No disjointigs were assembled - "
                                        "please check if the read type and genome "
                                        "size parameters are correct")
        asm_len, asm_n50 = scf.short_statistics(self.assembly_filename)
        logger.debug("Disjointigs length: %d, N50: %d", asm_len, asm_n50)


class JobShortPlasmidsAssembly(Job):
    def __init__(self, args, work_dir, contigs_file, repeat_graph,
                 graph_edges):
        super(JobShortPlasmidsAssembly, self).__init__()

        self.args = args
        self.work_dir = work_dir
        self.work_dir = os.path.join(work_dir, "22-plasmids")
        self.contigs_path = contigs_file
        self.repeat_graph = repeat_graph
        self.graph_edges = graph_edges

        self.name = "plasmids"
        self.out_files["repeat_graph"] = os.path.join(self.work_dir,
                                                      "repeat_graph_dump")
        self.out_files["repeat_graph_edges"] = \
            os.path.join(self.work_dir, "repeat_graph_edges.fasta")


    def run(self):
        super(JobShortPlasmidsAssembly, self).run()
        logger.info("Recovering short unassembled sequences")
        if not os.path.isdir(self.work_dir):
            os.mkdir(self.work_dir)
        plasmids = plas.assemble_short_plasmids(self.args, self.work_dir,
                                                self.contigs_path)

        #updating repeat graph
        repeat_graph = RepeatGraph(fp.read_sequence_dict(self.graph_edges))
        repeat_graph.load_from_file(self.repeat_graph)
        plas.update_graph(repeat_graph, plasmids)
        repeat_graph.dump_to_file(self.out_files["repeat_graph"])
        fp.write_fasta_dict(repeat_graph.edges_fasta,
                            self.out_files["repeat_graph_edges"])


class JobRepeat(Job):
    def __init__(self, args, work_dir, log_file, disjointigs):
        super(JobRepeat, self).__init__()

        self.args = args
        self.disjointigs = disjointigs
        self.log_file = log_file
        self.name = "repeat"

        self.work_dir = os.path.join(work_dir, "20-repeat")
        self.out_files["repeat_graph"] = os.path.join(self.work_dir,
                                                      "repeat_graph_dump")
        self.out_files["repeat_graph_edges"] = os.path.join(self.work_dir,
                                                        "repeat_graph_edges.fasta")
        self.out_files["reads_alignment"] = os.path.join(self.work_dir,
                                                         "read_alignment_dump")
        #self.out_files["repeats_dump"] = os.path.join(self.work_dir,
        #                                              "repeats_dump")

    def run(self):
        super(JobRepeat, self).run()
        if not os.path.isdir(self.work_dir):
            os.mkdir(self.work_dir)
        logger.info("Building and resolving repeat graph")
        repeat.analyse_repeats(self.args, Job.run_params, self.disjointigs,
                               self.work_dir, self.log_file,
                               self.args.asm_config)


class JobContigger(Job):
    def __init__(self, args, work_dir, log_file, repeat_graph_edges,
                 repeat_graph, reads_alignment):
        super(JobContigger, self).__init__()

        self.args = args
        self.repeat_graph_edges = repeat_graph_edges
        self.repeat_graph = repeat_graph
        self.reads_alignment = reads_alignment
        self.log_file = log_file
        self.name = "contigger"

        self.work_dir = os.path.join(work_dir, "30-contigger")
        self.out_files["contigs"] = os.path.join(self.work_dir,
                                                 "contigs.fasta")
        self.out_files["assembly_graph"] = os.path.join(self.work_dir,
                                                        "graph_final.gv")
        self.out_files["edges_sequences"] = os.path.join(self.work_dir,
                                                         "graph_final.fasta")
        self.out_files["gfa_graph"] = os.path.join(self.work_dir,
                                                   "graph_final.gfa")
        self.out_files["stats"] = os.path.join(self.work_dir, "contigs_stats.txt")
        self.out_files["scaffold_links"] = os.path.join(self.work_dir,
                                                        "scaffolds_links.txt")

    def run(self):
        super(JobContigger, self).run()
        if not os.path.isdir(self.work_dir):
            os.mkdir(self.work_dir)
        logger.info("Generating contigs")
        repeat.generate_contigs(self.args, Job.run_params, self.repeat_graph_edges,
                                self.work_dir, self.log_file, self.args.asm_config,
                                self.repeat_graph, self.reads_alignment)


def _list_files(startpath, maxlevel=1):
    for root, _, files in os.walk(startpath):
        level = root.replace(startpath, "").count(os.sep)
        if level > maxlevel:
            continue
        indent = " " * 4 * (level)
        logger.debug(indent + os.path.basename(root) + "/")
        subindent = " " * 4 * (level + 1)
        for f in files:
            fsize = bytes2human(os.path.getsize(os.path.join(root, f)))
            logger.debug("%s%-12s%s", subindent, fsize, f)


class JobFinalize(Job):
    def __init__(self, args, work_dir, log_file,
                 contigs_file, graph_file, repeat_stats,
                 polished_stats, polished_gfa, scaffold_links):
        super(JobFinalize, self).__init__()

        self.args = args
        self.log_file = log_file
        self.name = "finalize"
        self.contigs_file = contigs_file
        self.graph_file = graph_file
        self.repeat_stats = repeat_stats
        self.polished_stats = polished_stats
        self.scaffold_links = scaffold_links
        self.polished_gfa = polished_gfa
        self.work_dir = work_dir

        #self.out_files["contigs"] = os.path.join(work_dir, "contigs.fasta")
        #self.out_files["scaffolds"] = os.path.join(work_dir, "scaffolds.fasta")
        self.out_files["assembly"] = os.path.join(work_dir, "assembly.fasta")
        self.out_files["stats"] = os.path.join(work_dir, "assembly_info.txt")
        self.out_files["graph"] = os.path.join(work_dir, "assembly_graph.gv")
        self.out_files["gfa"] = os.path.join(work_dir, "assembly_graph.gfa")

    def run(self):
        super(JobFinalize, self).run()
        #shutil.copy2(self.contigs_file, self.out_files["contigs"])
        shutil.copy2(self.graph_file, self.out_files["graph"])
        shutil.copy2(self.polished_gfa, self.out_files["gfa"])

        scaffolds = scf.generate_scaffolds(self.contigs_file, self.scaffold_links,
                                           self.out_files["assembly"])

        #create the scaffolds.fasta symlink for backward compatability
        #try:
        #    if os.path.lexists(self.out_files["scaffolds"]):
        #        os.remove(self.out_files["scaffolds"])
        #    relative_link = os.path.relpath(self.out_files["assembly"],
        #                                    self.work_dir)
        #    os.symlink(relative_link, self.out_files["scaffolds"])
        #except OSError as e:
        #    logger.debug(e)

        logger.debug("---Output dir contents:----")
        _list_files(os.path.abspath(self.args.out_dir))
        logger.debug("--------------------------")
        scf.generate_stats(self.repeat_stats, self.polished_stats, scaffolds,
                           self.out_files["stats"])
        logger.info("Final assembly: %s", self.out_files["assembly"])


class JobConsensus(Job):
    def __init__(self, args, work_dir, in_contigs):
        super(JobConsensus, self).__init__()

        self.args = args
        self.in_contigs = in_contigs
        self.consensus_dir = os.path.join(work_dir, "10-consensus")
        self.out_consensus = os.path.join(self.consensus_dir, "consensus.fasta")
        self.name = "consensus"
        self.out_files["consensus"] = self.out_consensus

    def run(self):
        super(JobConsensus, self).run()
        if not os.path.isdir(self.consensus_dir):
            os.mkdir(self.consensus_dir)

        #split into 1Mb chunks to reduce RAM usage
        CHUNK_SIZE = 1000000
        chunks_file = os.path.join(self.consensus_dir, "chunks.fasta")
        chunks = aln.split_into_chunks(fp.read_sequence_dict(self.in_contigs),
                                       CHUNK_SIZE)
        fp.write_fasta_dict(chunks, chunks_file)

        logger.info("Running Minimap2")
        out_alignment = os.path.join(self.consensus_dir, "minimap.sam")
        aln.make_alignment(chunks_file, self.args.reads, self.args.threads,
                           self.consensus_dir, self.args.platform, out_alignment,
                           reference_mode=True, sam_output=True)

        contigs_info = aln.get_contigs_info(chunks_file)
        logger.info("Computing consensus")
        consensus_fasta = cons.get_consensus(out_alignment, chunks_file,
                                             contigs_info, self.args.threads,
                                             self.args.platform)

        #merge chunks back into single sequences
        merged_fasta = aln.merge_chunks(consensus_fasta)
        fp.write_fasta_dict(merged_fasta, self.out_consensus)
        os.remove(chunks_file)
        os.remove(out_alignment)


class JobPolishing(Job):
    def __init__(self, args, work_dir, log_file, in_contigs, in_graph_edges,
                 in_graph_gfa):
        super(JobPolishing, self).__init__()

        self.args = args
        self.log_file = log_file
        self.in_contigs = in_contigs
        self.in_graph_edges = in_graph_edges
        self.in_graph_gfa = in_graph_gfa
        self.polishing_dir = os.path.join(work_dir, "40-polishing")

        self.name = "polishing"
        final_contigs = os.path.join(self.polishing_dir,
                                     "filtered_contigs.fasta")
        self.out_files["contigs"] = final_contigs
        self.out_files["stats"] = os.path.join(self.polishing_dir,
                                               "filtered_stats.txt")
        self.out_files["polished_gfa"] = os.path.join(self.polishing_dir,
                                                      "polished_edges.gfa")

    def run(self):
        super(JobPolishing, self).run()
        if not os.path.isdir(self.polishing_dir):
            os.mkdir(self.polishing_dir)

        contigs, stats = \
            pol.polish(self.in_contigs, self.args.reads, self.polishing_dir,
                       self.args.num_iters, self.args.threads, self.args.platform,
                       output_progress=True)
        #contigs = os.path.join(self.polishing_dir, "polished_1.fasta")
        #stats = os.path.join(self.polishing_dir, "contigs_stats.txt")
        pol.filter_by_coverage(self.args, stats, contigs,
                               self.out_files["stats"], self.out_files["contigs"])
        pol.generate_polished_edges(self.in_graph_edges, self.in_graph_gfa,
                                    self.out_files["contigs"],
                                    self.polishing_dir, self.args.platform,
                                    self.args.threads)
        os.remove(contigs)


class JobTrestle(Job):
    def __init__(self, args, work_dir, log_file, repeat_graph,
                 graph_edges, reads_alignment_file):
        super(JobTrestle, self).__init__()

        self.args = args
        self.work_dir = os.path.join(work_dir, "21-trestle")
        self.log_file = log_file
        #self.repeats_dump = repeats_dump
        self.graph_edges = graph_edges
        self.repeat_graph = repeat_graph
        self.reads_alignment_file = reads_alignment_file

        self.name = "trestle"
        self.out_files["repeat_graph"] = os.path.join(self.work_dir,
                                                      "repeat_graph_dump")
        self.out_files["repeat_graph_edges"] = \
            os.path.join(self.work_dir, "repeat_graph_edges.fasta")

    def run(self):
        super(JobTrestle, self).run()

        if not os.path.isdir(self.work_dir):
            os.mkdir(self.work_dir)

        summary_file = os.path.join(self.work_dir, "trestle_summary.txt")
        resolved_repeats_seqs = os.path.join(self.work_dir,
                                             "resolved_copies.fasta")
        repeat_graph = RepeatGraph(fp.read_sequence_dict(self.graph_edges))
        repeat_graph.load_from_file(self.repeat_graph)

        try:
            repeats_info = tres_graph \
                .get_simple_repeats(repeat_graph, self.reads_alignment_file,
                                    fp.read_sequence_dict(self.graph_edges))
            tres_graph.dump_repeats(repeats_info,
                                    os.path.join(self.work_dir, "repeats_dump"))

            tres.resolve_repeats(self.args, self.work_dir, repeats_info,
                                 summary_file, resolved_repeats_seqs)
            tres_graph.apply_changes(repeat_graph, summary_file,
                                     fp.read_sequence_dict(resolved_repeats_seqs))
        except KeyboardInterrupt as e:
            raise
        except Exception as e:
            logger.warning("Caught unhandled exception: " + str(e))
            logger.warning("Continuing to the next pipeline stage. "
                           "Please submit a bug report along with the full log file")

        repeat_graph.dump_to_file(self.out_files["repeat_graph"])
        fp.write_fasta_dict(repeat_graph.edges_fasta,
                            self.out_files["repeat_graph_edges"])


def _create_job_list(args, work_dir, log_file):
    """
    Build pipeline as a list of consecutive jobs
    """
    jobs = []

    #Run configuration
    jobs.append(JobConfigure(args, work_dir))

    #Assembly job
    jobs.append(JobAssembly(args, work_dir, log_file))
    disjointigs = jobs[-1].out_files["assembly"]

    #Consensus
    if args.read_type != "subasm":
        jobs.append(JobConsensus(args, work_dir, disjointigs))
        disjointigs = jobs[-1].out_files["consensus"]

    #Repeat analysis
    jobs.append(JobRepeat(args, work_dir, log_file, disjointigs))
    #repeats_dump = jobs[-1].out_files["repeats_dump"]
    repeat_graph_edges = jobs[-1].out_files["repeat_graph_edges"]
    repeat_graph = jobs[-1].out_files["repeat_graph"]
    reads_alignment = jobs[-1].out_files["reads_alignment"]

    #Trestle: Resolve Unbridged Repeats
    if not args.no_trestle and not args.meta and args.read_type == "raw":
        jobs.append(JobTrestle(args, work_dir, log_file,
                    repeat_graph, repeat_graph_edges,
                    reads_alignment))
        repeat_graph_edges = jobs[-1].out_files["repeat_graph_edges"]
        repeat_graph = jobs[-1].out_files["repeat_graph"]

    #Short plasmids
    if args.plasmids:
        jobs.append(JobShortPlasmidsAssembly(args, work_dir, disjointigs,
                                             repeat_graph, repeat_graph_edges))
        repeat_graph_edges = jobs[-1].out_files["repeat_graph_edges"]
        repeat_graph = jobs[-1].out_files["repeat_graph"]

    #Contigger
    jobs.append(JobContigger(args, work_dir, log_file, repeat_graph_edges,
                             repeat_graph, reads_alignment))
    raw_contigs = jobs[-1].out_files["contigs"]
    scaffold_links = jobs[-1].out_files["scaffold_links"]
    graph_file = jobs[-1].out_files["assembly_graph"]
    gfa_file = jobs[-1].out_files["gfa_graph"]
    final_graph_edges = jobs[-1].out_files["edges_sequences"]
    repeat_stats = jobs[-1].out_files["stats"]

    #Polishing
    contigs_file = raw_contigs
    polished_stats = None
    polished_gfa = gfa_file
    if args.num_iters > 0:
        jobs.append(JobPolishing(args, work_dir, log_file, raw_contigs,
                                 final_graph_edges, gfa_file))
        contigs_file = jobs[-1].out_files["contigs"]
        polished_stats = jobs[-1].out_files["stats"]
        polished_gfa = jobs[-1].out_files["polished_gfa"]

    #Report results
    jobs.append(JobFinalize(args, work_dir, log_file, contigs_file,
                            graph_file, repeat_stats, polished_stats,
                            polished_gfa, scaffold_links))

    return jobs


def _set_genome_size(args):
    """
    Select k-mer size based on the target genome size
    """
    if args.genome_size.isdigit():
        args.genome_size = int(args.genome_size)
    else:
        args.genome_size = human2bytes(args.genome_size.upper())


def _run_polisher_only(args):
    """
    Runs standalone polisher
    """
    logger.info("Running Flye polisher")
    logger.debug("Cmd: %s", " ".join(sys.argv))

    pol.polish(args.polish_target, args.reads, args.out_dir,
               args.num_iters, args.threads, args.platform,
               output_progress=True)


def _run(args):
    """
    Runs the pipeline
    """
    logger.info("Starting Flye " + _version())
    logger.debug("Cmd: %s", " ".join(sys.argv))
    logger.debug("Python version: " + sys.version)

    for read_file in args.reads:
        if not os.path.exists(read_file):
            raise ResumeException("Can't open " + read_file)

    save_file = os.path.join(args.out_dir, "params.json")
    jobs = _create_job_list(args, args.out_dir, args.log_file)

    if args.stop_after and not args.stop_after in [j.name for j in jobs]:
        raise ResumeException("Stop after: unkown stage '{0}'"
                                .format(args.stop_after))

    current_job = 0
    if args.resume or args.resume_from:
        if not os.path.exists(save_file):
            raise ResumeException("Can't find save file")

        logger.info("Resuming previous run")
        if args.resume_from:
            job_to_resume = args.resume_from
        else:
            job_to_resume = json.load(open(save_file, "r"))["stage_name"]

        can_resume = False
        for i in range(len(jobs)):
            if jobs[i].name == job_to_resume:
                jobs[i].load(save_file)
                current_job = i
                if not jobs[i - 1].completed(save_file):
                    raise ResumeException("Can't resume: stage '{0}' incomplete"
                                          .format(jobs[i - 1].name))
                can_resume = True
                break

        if not can_resume:
            raise ResumeException("Can't resume: stage {0} does not exist"
                                  .format(job_to_resume))

    for i in range(current_job, len(jobs)):
        jobs[i].save(save_file)
        jobs[i].run()
        if args.stop_after == jobs[i].name:
            if i + 1 < len(jobs):
                jobs[i + 1].save(save_file)
            logger.info("Pipeline stopped as requested by --stop-after")
            break


def _enable_logging(log_file, debug, overwrite):
    """
    Turns on logging, sets debug levels and assigns a log file
    """
    log_formatter = logging.Formatter("[%(asctime)s] %(name)s: %(levelname)s: "
                                      "%(message)s", "%Y-%m-%d %H:%M:%S")
    console_formatter = logging.Formatter("[%(asctime)s] %(levelname)s: "
                                          "%(message)s", "%Y-%m-%d %H:%M:%S")
    console_log = logging.StreamHandler()
    console_log.setFormatter(console_formatter)
    if not debug:
        console_log.setLevel(logging.INFO)

    if overwrite:
        open(log_file, "w").close()
    file_handler = logging.FileHandler(log_file, mode="a")
    file_handler.setFormatter(log_formatter)

    logger.setLevel(logging.DEBUG)
    logger.addHandler(console_log)
    logger.addHandler(file_handler)


def _usage():
    return ("flye (--pacbio-raw | --pacbio-corr | --nano-raw |\n"
            "\t     --nano-corr | --subassemblies) file1 [file_2 ...]\n"
            "\t     --genome-size SIZE --out-dir PATH\n"
            "\t     [--threads int] [--iterations int] [--min-overlap int]\n"
            "\t     [--meta] [--plasmids] [--no-trestle] [--polish-target]\n"
            "\t     [--debug] [--version] [--help] [--resume] \n"
            "\t     [--resume-from] [--stop-after]")


def _epilog():
    return ("Input reads can be in FASTA or FASTQ format, uncompressed\n"
            "or compressed with gz. Currently, raw and corrected reads\n"
            "from PacBio and ONT are supported. Expected error rates are\n"
            "<30% for raw and <2% for corrected reads. Additionally, the\n"
            "--subassemblies option performs a consensus assembly of multiple\n"
            "sets of high-quality contigs. You may specify multiple\n"
            "files with reads (separated by spaces). Mixing different read\n"
            "types is not yet supported. The --meta option enables the mode\n"
            "for metagenome/uneven coverage assembly.\n\n"
            "You must provide an estimate of the genome size as input,\n"
            "which is used for solid k-mers selection. Standard size\n"
            "modifiers are supported (e.g. 5m or 2.6g). In the case\n"
            "of metagenome assembly, the expected total assembly size\n"
            "should be provided.\n\n"
            "To reduce memory consumption for large genome assemblies,\n"
            "you can use a subset of the longest reads for initial disjointig\n"
            "assembly by specifying --asm-coverage option. Typically,\n"
            "30x coverage is enough to produce good disjointigs.\n\n"
            "You can separately run Flye polisher on a target sequence \n"
            "using --polish-target option.")


def _version():
    repo_root = os.path.dirname((os.path.dirname(__file__)))
    try:
        git_label = subprocess.check_output(["git", "-C", repo_root, "describe"],
                                            stderr=open(os.devnull, "w")).decode()
        commit_id = git_label.strip("\n").rsplit("-", 1)[-1]
        return __version__ + "-" + commit_id
    except (subprocess.CalledProcessError, OSError):
        pass
    return __version__ + "-release"


def main():
    def check_int_range(value, min_val, max_val, require_odd=False):
        ival = int(value)
        if ival < min_val or ival > max_val:
            raise argparse.ArgumentTypeError("value should be in the "
                            "range [{0}, {1}]".format(min_val, max_val))
        if require_odd and ival % 2 == 0:
            raise argparse.ArgumentTypeError("should be an odd number")
        return ival

    parser = argparse.ArgumentParser \
        (description="Assembly of long and error-prone reads",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         usage=_usage(), epilog=_epilog())

    read_group = parser.add_mutually_exclusive_group(required=True)
    read_group.add_argument("--pacbio-raw", dest="pacbio_raw",
                        default=None, metavar="path", nargs="+",
                        help="PacBio raw reads")
    read_group.add_argument("--pacbio-corr", dest="pacbio_corrected",
                        default=None, metavar="path", nargs="+",
                        help="PacBio corrected reads")
    read_group.add_argument("--nano-raw", dest="nano_raw", nargs="+",
                        default=None, metavar="path",
                        help="ONT raw reads")
    read_group.add_argument("--nano-corr", dest="nano_corrected", nargs="+",
                        default=None, metavar="path",
                        help="ONT corrected reads")
    read_group.add_argument("--subassemblies", dest="subassemblies", nargs="+",
                        default=None, metavar="path",
                        help="high-quality contigs input")
    parser.add_argument("-g", "--genome-size", dest="genome_size",
                        metavar="size", required=False,
                        help="estimated genome size (for example, 5m or 2.6g)")
    parser.add_argument("-o", "--out-dir", dest="out_dir",
                        default=None, required=True,
                        metavar="path", help="Output directory")
    parser.add_argument("-t", "--threads", dest="threads",
                        type=lambda v: check_int_range(v, 1, 128),
                        default=1, metavar="int", help="number of parallel threads [1]")
    parser.add_argument("-i", "--iterations", dest="num_iters",
                        type=lambda v: check_int_range(v, 0, 10),
                        default=1, help="number of polishing iterations [1]",
                        metavar="int")
    parser.add_argument("-m", "--min-overlap", dest="min_overlap", metavar="int",
                        type=lambda v: check_int_range(v, 1000, 10000),
                        default=None, help="minimum overlap between reads [auto]")
    parser.add_argument("--asm-coverage", dest="asm_coverage", metavar="int",
                        default=None, help="reduced coverage for initial "
                        "disjointig assembly [not set]", type=int)
    parser.add_argument("--plasmids", action="store_true",
                        dest="plasmids", default=False,
                        help="rescue short unassembled plasmids")
    parser.add_argument("--meta", action="store_true",
                        dest="meta", default=False,
                        help="metagenome / uneven coverage mode")
    parser.add_argument("--no-trestle", action="store_true",
                        dest="no_trestle", default=False,
                        help="skip Trestle stage")
    parser.add_argument("--polish-target", dest="polish_target",
                        metavar="path", required=False,
                        help="run polisher on the target sequence")
    parser.add_argument("--resume", action="store_true",
                        dest="resume", default=False,
                        help="resume from the last completed stage")
    parser.add_argument("--resume-from", dest="resume_from", metavar="stage_name",
                        default=None, help="resume from a custom stage")
    parser.add_argument("--stop-after", dest="stop_after", metavar="stage_name",
                        default=None, help="stop after the specified stage completed")
    #parser.add_argument("--kmer-size", dest="kmer_size",
    #                    type=lambda v: check_int_range(v, 11, 31, require_odd=True),
    #                    default=None, help="kmer size (default: auto)")
    parser.add_argument("--debug", action="store_true",
                        dest="debug", default=False,
                        help="enable debug output")
    parser.add_argument("-v", "--version", action="version", version=_version())
    args = parser.parse_args()

    if not args.genome_size and not args.polish_target:
        parser.error("Genome size argument (-g/--genome-size) "
                     "is required for assembly")

    if args.pacbio_raw:
        args.reads = args.pacbio_raw
        args.platform = "pacbio"
        args.read_type = "raw"
    if args.pacbio_corrected:
        args.reads = args.pacbio_corrected
        args.platform = "pacbio"
        args.read_type = "corrected"
    if args.nano_raw:
        args.reads = args.nano_raw
        args.platform = "nano"
        args.read_type = "raw"
    if args.nano_corrected:
        args.reads = args.nano_corrected
        args.platform = "nano"
        args.read_type = "corrected"
    if args.subassemblies:
        args.reads = args.subassemblies
        args.platform = "pacbio"    #arbitrary
        args.read_type = "subasm"

    if not os.path.isdir(args.out_dir):
        os.mkdir(args.out_dir)
    args.out_dir = os.path.abspath(args.out_dir)

    args.log_file = os.path.join(args.out_dir, "flye.log")
    _enable_logging(args.log_file, args.debug,
                    overwrite=False)

    args.asm_config = os.path.join(cfg.vals["pkg_root"],
                                   cfg.vals["bin_cfg"][args.read_type])

    try:
        aln.check_binaries()
        pol.check_binaries()
        asm.check_binaries()
        repeat.check_binaries()

        if not args.polish_target:
            _set_genome_size(args)
            _run(args)
        else:
            _run_polisher_only(args)

    except (AlignmentException, pol.PolishException,
            asm.AssembleException, repeat.RepeatException,
            ResumeException, fp.FastaError) as e:
        logger.error(e)
        return 1

    return 0