ó
ƒå˜[c           @` sÀ   d  d l  m Z m Z m Z m Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l	 Z	 d  d l
 Z
 d  d l Z d  d l Z d  d l Z d  d l Z e j e ƒ Z d d d „  ƒ  YZ d S(   i    (   t   absolute_importt   divisiont   print_functiont   unicode_literalsNt	   Node_pathc           B` sA   e  Z d  Z d „  Z d „  Z d „  Z d „  Z e d „  ƒ Z RS(   uü   
    Object representation of the connected set of Node objects that represent the reconstructed isoforms graph traversal

    Instance members:

        transcript_name : (str)  name of the isoform

        node_obj_list : (list) of Node objects

    c         C` sÖ   | |  _  t ƒ  |  _ t j d | ƒ } t } t ƒ  } x™ | D]‘ } | j d ƒ \ }	 }
 |
 j d ƒ \ } } t | ƒ } t | ƒ } | sž t } |	 d 7}	 n  | j	 | |	 | | | d !ƒ } |  j j
 | ƒ q= Wd S(   uV   
        constructor, instantiates Node_path and builds vertices in the graph
        u   \d+:\d+\-\d+u   :u   -u   fsti   N(   t   transcript_namet   listt   node_obj_listt   ret   findallt   Falset   splitt   intt   Truet   get_nodet   append(   t   selft   tgraphR   t   path_stringt   sequencet   node_descr_listt   first_kmer_flagt   obj_node_listt
   node_descrt   loc_node_idt   node_coord_ranget   lendt   rendt   node_obj(    (    sy   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_path.pyt   __init__!   s     			c         C` s   |  j  S(   N(   R   (   R   (    (    sy   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_path.pyt   get_transcript_name?   s    c         C` s   |  j  S(   N(   R   (   R   (    (    sy   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_path.pyt   get_pathB   s    c         C` sC   t  ƒ  } x$ |  j D] } | j t | ƒ ƒ q Wd j | ƒ } | S(   Nu   --(   R   R   R   t   strt   join(   R   t   node_str_listt   nodet   path_str(    (    sy   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_path.pyt   __repr__F   s
    	c         C` sÙ  t  ƒ  } |  j ƒ  } xu | D]m } | j ƒ  } t j d | ƒ r t j d d | ƒ } |  j | ƒ } | d k	 r‰ | j | | f ƒ q‰ q q W| s¤ t	 j
 d ƒ | St	 j
 d j | ƒ ƒ t ƒ  } t  ƒ  }	 xB| D]:\ }
 } |
 j ƒ  } | j ƒ  } | d d d … } | d d d … } t j | | ƒ sJt d j | | ƒ ƒ ‚ n  t | ƒ t | ƒ } | d k r´| | k s„t d	 ƒ ‚ | j |
 j ƒ  ƒ | g | |
 <|	 j |
 ƒ qÓ | d | !} t	 j
 d
 j | | | ƒ ƒ | j |
 j ƒ  ƒ |
 | g | |
 <|
 j | ƒ qÓ Wx£ | D]› } | j ƒ  } | d } | | k r| | } t | ƒ d k rm| d | d <q³t | ƒ d k r¤| d | d <| j d | d ƒ q³t d ƒ ‚ qqWx |	 D] } |  j | ƒ q¾W| S(   uX  
        fst nodes will have an extra 5' sequence as compared to the corresponding non-fst nodes.

        If both the fst and non-fst version of the node exist, must modify the fst nodes so that
        they are separated from their 5' extension, and the core of the node (suffix) is shared.

        input: TGraph obj, list of node_path objects.

        The node_path objects are modified in-place as needed.
        A fst-node will be truncated to the unique prefix and the non-fst node will be integrated into the path.

        returns the node_path_list with any required adjustments

        u   fstu    u   no FST nodes to adjustu   Adjusting FST nodes: {}Niÿÿÿÿu8   Error, core_node_seq:
{}
is not a suffix of fst seq:
{}
i    uG   Error, prefix starts at first position but sequences are not equivalentu<   FST-SEQ-EXTRACTION

FSTseq:
{}

COREseq:
{}

PREFIXseq:
{}

i   i   u   shouldn't get here(   R   t   get_all_nodest
   get_loc_idR   t   searcht   subt   retrieve_nodet   NoneR   t   loggert   debugt   formatt   dictt   get_seqt   matcht   RuntimeErrort   lent   AssertionErrort   add_transcriptst   get_transcriptst   set_seqR   t   insertt
   prune_node(   R   t   node_path_listt   fst_nodes_require_adjt   nodesR#   t   node_idt   core_node_idt	   core_nodet   old_fst_node_to_new_fst_nodest   fst_nodes_to_deletet   fst_nodet   fst_node_seqt   core_node_seqt   fst_node_seq_revt   core_node_seq_revt   prefix_endptt   prefix_stringt	   node_patht
   first_nodet   replacement_node_list(    (    sy   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_path.pyt   adjust_for_fst_nodesP   s^    			

(	   t   __name__t
   __module__t   __doc__R   R   R   R%   t   staticmethodRL   (    (    (    sy   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_path.pyR      s   
				
(    (   t
   __future__R    R   R   R   t   ost   sysR   t   loggingt   argparset   collectionst   numpyt   timet   TNodet   Trinity_utilt	   getLoggerRM   R,   R   (    (    (    sy   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_path.pyt   <module>   s   "$