ó
¤>dc           @   sl   d  Z  d d l Z d d l m Z d d l m Z d d l m Z d d l	 m
 Z
 d e f d „  ƒ  YZ d S(	   s8    Helper class for working with alignment type analyses.
iÿÿÿÿN(   t   BaseTool(   t	   Fast5File(   t   SegmentationTools(   t	   Fast5Readt   AlignmentToolsc           B   sS   e  Z d  Z d d d d d „ Z d „  Z d „  Z d „  Z d d „ Z d „  Z	 RS(	   s=    Provides helper methods specific to alignment analyses.
    t   rc         C   s<  t  | t ƒ r$ | |  _ t |  _ n9 t  | t ƒ rQ t | | ƒ |  _ t |  _ n t d ƒ ‚ | d k r™ |  j j
 d ƒ } | d k r™ t d ƒ ‚ q™ n  | |  _ |  j j | ƒ } | d k r| d k rÕ i  } n  |  j j d | | | ƒ |  j j | ƒ } n  d | k r8| d d	 k r8|  j ƒ  t d ƒ ‚ n  d S(
   s6   Create a new alignment tools object.
        
        :param source: Either an open Fast5File object, or a filename
            of a fast5 file.
        :param mode: The open mode (r or r+). Only if a filename is used
            for the source argument.
        :param group_name: The specific alignment analysis instance
            you are interested in.
        :param meta: Metadata for a new alignment analysis.
        :param config: Configuration data for a new alignment analysis.
        
        To create a new alignment analysis, provide a group name that
        does not already exist, and an optional dictionary with the metadata.
        The following fields are recommended, as a minimum:
            
            * name - The name of the basecall software used.
            * time_stamp - The time at which the analysis was performed.
        
        If the group name already exists, the "meta" parameter is ignored. If
        the specified group has a "component" attribute, and its value is not
        "alignment", an exception will be thrown.
        s(   Unrecognized type for argument "source".t	   Alignments*   No Alignment analysis group found in file.t	   alignmentt	   componentt   calibration_strands6   Analysis does not appear to be an alignment component.N(   R   R	   (   t
   isinstanceR   t   handlet   Falset   close_handle_when_donet   strR   t   Truet	   Exceptiont   Nonet   get_latest_analysist
   group_namet   get_analysis_attributest   add_analysist   close(   t   selft   sourcet   modeR   t   metat   configt   attrs(    (    sO   /tmp/pip-install-m8tpmx/ont-fast5-api/ont_fast5_api/analysis_tools/alignment.pyt   __init__   s.    				
c         C   s»   |  j  j |  j ƒ } i i d d 6d 6i d d 6d 6i d d 6d 6} d | k rk |  j | d ƒ | d <n  d | k r‘ |  j | d ƒ | d <n  d | k r· |  j | d ƒ | d <n  | S(	   së   Get details about the alignments that have been performed.

        :return: A dict of dicts.

        The keys of the top level are 'template', 'complement' and '2d'.
        Each of these dicts contains the following fields:

            * status: Can be 'no data', 'no match found', or 'match found'.
            * direction: Can be 'forward', 'reverse'.
            * ref_name: Name of reference.
            * ref_span: Section of reference aligned to, as a tuple (start, end).
            * seq_span: Section of the called sequence that aligned, as a tuple (start, end).
            * seq_len: Total length of the called sequence.
            * num_aligned: Number of bases that aligned to bases in the reference.
            * num_correct: Number of aligned bases that match the reference.
            * num_deletions: Number of bases in the aligned section of the
                reference that are not aligned to bases in the called sequence.
            * num_insertions: Number of bases in the aligned section of the called
                sequence that are not aligned to bases in the reference.
            * identity: The fraction of aligned bases that are correct (num_correct /
                num_aligned).
            * accuracy: The overall basecall accuracy, according to the alignment.
                (num_correct / (num_aligned + num_deletions + num_insertions)).
        
        Note that if the status field is not 'match found', then all the other
        fields will be absent.
        s   no datat   statust   templatet
   complementt   2dt   genome_mapping_templatet   genome_mapping_complementt   genome_mapping_2d(   R   t   get_summary_dataR   t   _get_results(   R   t   summaryt   results(    (    sO   /tmp/pip-install-m8tpmx/ont-fast5-api/ont_fast5_api/analysis_tools/alignment.pyt   get_results?   s    c         C   sx   d j  |  j | ƒ } |  j j | d ƒ } |  j j | d ƒ } | d k sW | d k r[ d S| j d ƒ d } | | f S(   s>   Get the alignment SAM and Fasta, if present.
        
        :param section: Can be 'template', 'complement', or '2d'.
        :return: A tuple containing the SAM and the section of the reference
            aligned to (both as strings). Returns None if no alignment is
            present for that section.
        s   {}/Aligned_{}t   SAMt   Fastas   
i   N(   t   formatR   R   t   get_analysis_datasetR   t   split(   R   t   sectiont   subgroupt   samt   fastat   sequence(    (    sO   /tmp/pip-install-m8tpmx/ont-fast5-api/ont_fast5_api/analysis_tools/alignment.pyt   get_alignment_datag   s    c         C   sÎ   d j  | ƒ } | |  j j d j  |  j ƒ k rJ |  j j |  j | ƒ n  t j | d t ƒ} |  j j d j  |  j | ƒ d | ƒ t j d j  | | ƒ d t ƒ} |  j j d j  |  j | ƒ d | ƒ d S(	   s1   Add the SAM and Fasta alignment data for a section.
        
        :param section: Can be 'template', 'complement', or '2d'.
        :param sam: A string containing the SAM contents.
        :param sequence: A string containing the section of the
            reference the basecall aligned to.
        s
   Aligned_{}s   Analyses/{}t   dtypes   {}/{}R*   s   >{}
{}
R+   N(   R,   R   R   t   add_analysis_subgroupt   npt   arrayR   t   add_analysis_dataset(   R   R/   R1   R3   R0   t   sam_arrt	   fasta_arr(    (    sO   /tmp/pip-install-m8tpmx/ont-fast5-api/ont_fast5_api/analysis_tools/alignment.pyt   add_alignment_dataw   s    "%!c         C   sm  d } | r |  j  | ƒ } n |  j ƒ  | } | d d k rB d S| d } | d | d } | d } | d | d } | d }	 |  j j ƒ  d	 }
 |  j j |  j ƒ } | d k	 rÐ t | ƒ j d
 ƒ } n d } d } | d k	 r7t	 |  j d | ƒ5 } | j ƒ  } | d k	 r.| d j
 | ƒ } n  Wd QXn  | d k rGd S| | t |	 ƒ } |
 | | } | S(   sÙ   Calculate speed using alignment information.

        :param section: The section (template or complement) we're calculating
            speed for.
        :param alignment_results: Optional dictionary of the alignment summary,
            so that speed can be calculated without having to write the summary
            out to the fast5 file first.
        :return: Speed in bases per second or zero if the speed could not be
            calculated.

        The only reliable way we have of finding out how many bases have gone through the pore is by
        looking at how much of the reference the sequence aligned to. This takes that information and
        uses it to calculate speed in reference-bases-per-second.
        g        R   s   match foundt   ref_spani   i    t   seq_spant   seq_lent   sampling_ratet   segmentationR   s   duration_{}N(   R&   R)   R   t   get_channel_infot	   get_chainR   R   t   dictt   getR   R,   t   float(   R   R/   t   alignment_resultst   speedR(   R=   t   ref_lenR>   R?   t	   total_lent   sample_ratet   chaint   segmentation_groupt   durationt   segR'   t   normalized_duration(    (    sO   /tmp/pip-install-m8tpmx/ont-fast5-api/ont_fast5_api/analysis_tools/alignment.pyt   calculate_speed‡   s6    


c            sæ   i d d 6} ˆ  d } | d k r1 d | d <| Sd | d <d | d <| j  d	 ƒ rk | d
  } d | d <n  | | d <ˆ  d ˆ  d f | d <ˆ  d ˆ  d f | d <ˆ  d | d <| j ‡  f d †  d d d d d d g Dƒ ƒ | S(   Ns   no dataR   t   genomet   no_matchs   no match founds   match foundt   forwardt	   directiont   _rciýÿÿÿt   reverset   ref_namet   genome_startt
   genome_endR=   t   strand_startt
   strand_endR>   t
   num_eventsR?   c            s   i  |  ] } ˆ  | | “ q S(    (    (   t   .0t   key(   R'   (    sO   /tmp/pip-install-m8tpmx/ont-fast5-api/ont_fast5_api/analysis_tools/alignment.pys
   <dictcomp>Í   s   	 t   num_alignedt   num_correctt   num_insertionst   num_deletionst   identityt   accuracy(   t   endswitht   update(   R   R'   R(   RX   (    (   R'   sO   /tmp/pip-install-m8tpmx/ont-fast5-api/ont_fast5_api/analysis_tools/alignment.pyR&   ¾   s"    





N(
   t   __name__t
   __module__t   __doc__R   R   R)   R4   R<   RQ   R&   (    (    (    sO   /tmp/pip-install-m8tpmx/ont-fast5-api/ont_fast5_api/analysis_tools/alignment.pyR      s   0	(		7(   Rj   t   numpyR7   t&   ont_fast5_api.analysis_tools.base_toolR    t   ont_fast5_api.fast5_fileR   t)   ont_fast5_api.analysis_tools.segmentationR   t   ont_fast5_api.fast5_readR   R   (    (    (    sO   /tmp/pip-install-m8tpmx/ont-fast5-api/ont_fast5_api/analysis_tools/alignment.pyt   <module>   s   