σ
ε[c           @` sΐ   d  d l  m Z m Z m Z m Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l	 Z	 d  d l
 Z
 d  d l Z d  d l Z d  d l Z d  d l Z e j e  Z d d d     YZ d S(   i    (   t   absolute_importt   divisiont   print_functiont   unicode_literalsNt   Node_alignmentc           B` sώ   e  Z d  Z d Z d   Z d   Z d   Z d   Z d   Z	 e
 d    Z e
 d    Z e
 d    Z e
 d	    Z d
   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z e d  Z RS(   uλ  
    Object has two members:

        transcript_names = [ transA,
                             transB,
                             transC,
                             ...
                             ]

        aligned_nodes = [ [transA_node_1, transA_node_2, ... ],
                          [transB_node_1, transB_node_2, ... ],
                          [ None,         transC_node_1, ... ],  
                        ]

    Note, can have None at node positions to include gaps.

    c         C` s   | |  _  | |  _ | |  _ d  S(   N(   t   gene_idt   transcript_namest   aligned_nodes(   t   selfR   t   transcript_name_listt   node_obj_matrix(    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   __init__)   s    		c         C` s   |  j  S(   N(   R   (   R   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   get_gene_id.   s    c         C` s   | |  _  d  S(   N(   R   (   R   R   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   set_gene_id1   s    c         C` s   |  j  S(   N(   R   (   R   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   get_transcript_names4   s    c         C` s   |  j  S(   N(   R   (   R   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   get_aligned_nodes8   s    c         C` sU   t    } x! |  j   D] } | j |  q W|  j   } t | | g | g  } | S(   uΘ   
        Factory method:
           constructs a Node_alignment object from a Node_path object

           mostly just reshaping the info for use with the multiple alignment methods.
        
        (   t   listt   get_patht   appendt   get_transcript_nameR   (   t   path_objt	   node_listt   node_objt   transcript_nameR   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   get_single_seq_node_alignment<   s    
	c         C` sR   t  j |   } t  j |  } t  j |  } t  j |  } t j | |  } | S(   uT   
        given to Node_alignment objects, counts the number of shared nodes
        (   R   t   get_node_sett   get_node_loc_idst   sett   intersection(   t   align_At   align_Bt
   node_set_at
   node_set_bt   common_nodes(    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   compute_number_common_nodesP   s    c         C` s7   t    } x' |  D] } | j   } | j |  q W| S(   ub   
        private static method
        gets the list of loc_id among all nodes in the set
        (   R   t
   get_loc_idt   add(   t   node_sett   loc_ids_sett   nodet   loc_id(    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyR   a   s
    	c         C` s   t  |   } |  j   } t   } x^ t d |  D]M } xD t d |  D]3 } |  j | | } | d k	 rG | j |  qG qG Wq1 W| S(   uW   
        extracts a list of unique Node objects from the Node_alignment object
        i    N(   t   lent   widthR   t   rangeR   t   NoneR$   (   t	   align_objt	   num_transt   alignment_widthR%   t	   align_numt	   align_posR   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyR   p   s    	c         C` sZ   t    } xJ t d t |    D]3 } |  j | | } | d k	 r | j |  q q W| S(   u\   
        At a given column of the Node_alignment, extracts the list of unique nodes
        i    N(   R   R+   R)   R   R,   R$   (   R   t   col_post	   node_objst   iR   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   get_node_set_at_column_pos   s    		c         C` s   t  |  j |   } | d S(   Ni    (   R   R5   (   R   R2   R   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   get_representative_column_node   s    c         C` sK   t    } x; t d t |    D]$ } |  j | | } | j |  q W| S(   Ni    (   R   R+   R)   R   R   (   R   R2   R3   R4   R   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   get_node_LIST_at_column_pos   s
    	c         C` sV   |  j  |  } t   } x7 | D]/ } | d  k rA | j t  q | j t  q W| S(   N(   R7   R   R,   R   t   Falset   True(   R   R2   R   t   occupancy_listR'   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt    get_node_occupancy_at_column_pos₯   s    	c         C` s8   x1 t  d t |    D] } |  j | j |  q Wd  S(   Ni    (   R+   R)   R   R   (   R   R   R4   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   append_node_to_each_entry³   s    c         C` s_   xX t  d t |    D]A } | | t k rC |  j | j |  q |  j | j d   q Wd  S(   Ni    (   R+   R)   R9   R   R   R,   (   R   R   t   occupancy_patternR4   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt*   append_node_according_to_occupancy_patternΈ   s    c         C` sc   t  |   } t  |  | k r- t d   n  x/ t d |  D] } |  j | j | |  q= Wd  S(   Nu.   Error, column size differs from num_alignmentsi    (   R)   t   RuntimeErrorR+   R   R   (   R   t   column_node_listt   num_alignmentsR4   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt
   add_columnΓ   s
    c         C` s   t  |  j  S(   uD   
        number of transcripts represented in the alignment
        (   R)   R   (   R   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   __len__Μ   s    c         C` s   t  |  j d  S(   u<   
        width of the alignment (number of columns)
        i    (   R)   R   (   R   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyR*   Σ   s    c         C` s=  t  |  j  } d j | d j |  j   } |  j   } d } xΊ t d | |  D]¦ } x t d |  D] } |  j | } |  j | } | d j |  7} xB t | | |  D]- }	 |	 | k rΕ Pn  | d j | |	  7} q― W| d 7} qh W| d 7} qR Wx: t d |  D]) } |  j |  }
 | |
 j   d 7} qW| S(   Nu/   
# Alignment obj contains: {} transcripts: {}

u   ,i
   i    u   {}u   	{}u   
(	   R)   R   t   formatt   joinR*   R+   R   R6   t   toString(   R   t   num_transcriptst   ret_textt   align_widtht   NODES_PER_LINER4   t   jR   t   aligned_nodes_entryt   xt	   repr_node(    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   __repr__Ϊ   s&    c         C` s«  t  |   } |  j   } t   } x$ t d |  D] } | j g   q1 Wt |  j   |  j   |  } g  } |  j d  } xH t d |  D]7 } |  j |  } | | k rΌ | j |  n  | } q W| j |  t	 j
 d j |   t   }	 x² t d | d  D] } | | k r~d }
 t  |	  d k rEt j j |	  }
 n
 |	 d }
 |  j | d  } | j |
 |  t   }	 n  | | k  r|	 j |  j |   qqW| S(   u:   
        merge unbranched nodes into single nodes
        i    i   u   Block_breakpoints: {}N(   R)   R*   R   R+   R   R   R   R   R;   t   loggert   debugRD   R,   t   TNodet   merge_nodesR>   R6   (   R   RG   R*   R
   R4   t   squeezed_alignmentt   block_breakpointst   prev_col_node_sett   node_column_sett   blocked_nodest   node_to_addt   blocked_node_occupancy(    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   squeeze   s8    	
	
c         C` sΓ  |  j    } d } t   } t   } t   } t   } x9 | D]1 } d | | <d | | <t   | | <d | | <q= WxΝt d |  j    D]Ά}	 |  j |	  }
 |
 j   } t |  d k rΤ t d t	 |
    n  |
 j
   } |  j |	  } t |  d } | |
 j   7} t |  } x t d t |   D]	} | | } | | t k r
| | c d j | d d t	 |  t	 |  d d	 d d
 j | |  g	  7<| | c | 7<t | |  } | } | t |  d } | | c | 7<| | j d j | | |   q1x- t d t |   D] } | | c d 7<q Wq1Wq Wd j | j    } d } xS | D]K } | | } | | } d j |  } | d j | t |  | |  7} qdW| | | | f S(   Nu    i    u%   Error, node seq of length zero: node=i   u   	u   Trinity_geneu   exonu   .u   +u!   gene_id "{}"; transcript_id "{}"
u   {}:{}-{}u   
u    u   >{} len={} path=[{}]
{}
(   R   t   dictR   R+   R*   R6   t   get_seqR)   R?   t   strR#   R;   R9   RE   RD   R   t   values(   R   t	   gene_nameR   t   gene_seqt   transcript_to_gtf_linest   transcript_to_malignt   transcript_to_Trinity_fa_seqt   transcript_to_Trinity_fa_pathR   R4   R   t   node_seqt   node_idt   node_occupancyt	   pos_startt   pos_endRK   t   cdna_seq_lent   rel_node_startt   rel_node_endRM   t   gene_gtft   trinity_fasta_textt   transcript_seqt	   path_listt   path_list_text(    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   to_gene_fasta_and_gtf4  sZ    				


#

c         C` sF   x? t  d |  j    D]( } |  j |  } | j t |   q Wd  S(   Ni    (   R+   R*   R6   t
   set_loc_idR^   (   R   R4   RN   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt$   reassign_node_loc_ids_by_align_order  s    c         C` sz  |  j    } |  j   } t j |  } t   } x t d |  D] } |  j |  } t j d j | j	     | j
   }	 | j   }
 | r’ d t |  }
 n  | j |	 |
 | j    } | j |  q@ Wxt | D]l } d  } x] t d |  D]L } | | d  k rρ | d  k r0| j | g | | g  n  | | } qρ qρ WqΥ Wt j d  x! | D] } t j | j	    qYW| S(   Ni    u   repr node: {}u   loc_u   New graph node listing:(   R   R*   t   TGraphR   R+   R6   RP   RQ   RD   RF   t   get_transcriptsR#   R^   t   get_nodeR]   R   R,   t	   add_edges(   R   R`   t   reset_node_idsR   R*   t   refined_tgrapht   new_node_listR4   RN   t   transcriptsR(   t   new_nodet   iso_node_alignmentt   prevR'   (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   to_splice_graph  s0    	N(   t   __name__t
   __module__t   __doc__R,   t   GAPR   R   R   R   R   t   staticmethodR   R"   R   R   R5   R6   R7   R;   R<   R>   RB   RC   R*   RO   R[   Rs   Ru   R8   R   (    (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyR      s2   																	&	4	M	(    (   t
   __future__R    R   R   R   t   ost   syst   ret   loggingt   argparset   collectionst   numpyt   timeRR   Rv   t	   getLoggerR   RP   R   (    (    (    s~   /oak/stanford/groups/akundaje/marinovg/programs/trinityrnaseq-Trinity-v2.8.4/Analysis/SuperTranscripts/pylib/Node_alignment.pyt   <module>   s   "$