ó
ZIc           @   s  d  Z  d d l m Z d d l m Z d d l m Z d d l m Z d d l	 m
 Z
 d d „ Z d e f d	 „  ƒ  YZ e d
 k rd d l m Z d GHHd GHe d ƒ Z xF e e ƒ D]8 Z e GHx* e D]" Z e e j ƒ Ge j Ge j GHqÊ Wq¸ Wd GHHd GHe d ƒ Z xF e e ƒ D]8 Z e GHx* e D]" Z e e j ƒ Ge j Ge j GHq*WqWd GHHd GHd e e e e ƒ  ƒ ƒ ƒ k s†t ‚ d GHHd GHe ƒ  Z e e ƒ j e g ƒ e j d ƒ e j ƒ  GHe ƒ  Z y/ e e ƒ j e e g ƒ e sÿt d ƒ ‚ Wqe k
 rqXn  d S(   sj  
Bio.AlignIO support for the "nexus" file format.

You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).

See also the Bio.Nexus module (which this code calls internally),
as this offers more than just accessing the alignment or its
sequences as SeqRecord objects.
iÿÿÿÿ(   t   Nexus(   t	   Alignment(   t	   SeqRecord(   t   AlignmentWriter(   t   Alphabetc      
   c   s
  t  j  |  ƒ } | j s! t ‚ n  t | j ƒ } t | j ƒ t | j ƒ k sT t ‚ | r‘ | t | j ƒ k r‘ t	 d t | j ƒ | f ƒ ‚ n  xm t
 | j | j ƒ D]V \ } } | j | ƒ sÈ t ‚ | j | } | j j t | d | d | d d ƒƒ q§ W| Vd S(   sF  Returns SeqRecord objects from a Nexus file.

    Thus uses the Bio.Nexus module to do the hard work.

    You are expected to call this function via Bio.SeqIO or Bio.AlignIO
    (and not use it directly).

    NOTE - We only expect ONE alignment matrix per Nexus file,
    meaning this iterator will only yield one Alignment.s$   Found %i sequences, but seq_count=%it   idt   namet   descriptiont    N(   R    t   matrixt   StopIterationR   t   alphabett   lent   unaltered_taxlabelst	   taxlabelst   AssertionErrort
   ValueErrort   zipt
   startswitht   _recordst   appendR   (   t   handlet	   seq_countt   nt	   alignmentt   old_namet   new_namet   seq(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/NexusIO.pyt   NexusIterator   s     
		$"t   NexusWriterc           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   sÔ   Nexus alignment writer.

    Note that Nexus files are only expected to hold ONE alignment
    matrix.

    You are expected to call this class via the Bio.AlignIO.write() or
    Bio.SeqIO.write() functions.
    c         C   sœ   t  | ƒ } y | j ƒ  } Wn t k
 r5 d } n X| d k rF d Sy | j ƒ  } Wn t k
 ro d } n X| d k	 r‹ t d ƒ ‚ n  |  j | ƒ d S(   sÒ   Use this to write an entire file containing the given alignments.

        alignments - A list or iterator returning Alignment objects.
                     This should hold ONE and only one Alignment.
        Ns0   We can only write one Alignment to a Nexus file.i   (   t   itert   nextR
   t   NoneR   t   write_alignment(   t   selft
   alignmentst
   align_itert   first_alignmentt   second_alignment(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/NexusIO.pyt
   write_fileH   s    

c         C   s¾   t  | j ƒ  ƒ d k r' t d ƒ ‚ n  | j ƒ  d k rH t d ƒ ‚ n  d d |  j | j ƒ } t j | ƒ } | j | _ x* | D]" } | j | j	 | j
 j ƒ  ƒ q„ W| j |  j ƒ d  S(   Ni    s   Must have at least one sequences    Non-empty sequences are requireds.   #NEXUS
begin data; dimensions ntax=0 nchar=0; s   format datatype=%s; end;(   R   t   get_all_seqsR   t   get_alignment_lengtht   _classify_alphabet_for_nexust	   _alphabetR    R   t   add_sequenceR   R   t   tostringt   write_nexus_dataR   (   R"   R   t   minimal_recordR   t   record(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/NexusIO.pyR!   c   s     c         C   s‚   t  j | ƒ } t | t  j  ƒ s0 t d ƒ ‚ nN t | t  j ƒ rF d St | t  j ƒ r\ d St | t  j ƒ rr d St d ƒ ‚ d S(   sv   Returns 'protein', 'dna', 'rna' based on the alphabet (PRIVATE).

        Raises an exception if this is not possible.s   Invalid alphabett   proteint   dnat   rnas#   Need a DNA, RNA or Protein alphabetN(   R   t   _get_base_alphabett
   isinstancet	   TypeErrort   ProteinAlphabett   DNAAlphabett   RNAAlphabetR   (   R"   R   t   a(    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/NexusIO.pyR*   s   s    (   t   __name__t
   __module__t   __doc__R'   R!   R*   (    (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/NexusIO.pyR   ?   s   		t   __main__(   t   StringIOs   Quick self tests#   Repeated names without a TAXA blocksù  #NEXUS
    [TITLE: NoName]

    begin data;
    dimensions ntax=4 nchar=50;
    format interleave datatype=protein   gap=- symbols="FSTNKEYVQMCLAWPHDRIG";

    matrix
    CYS1_DICDI          -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---- 
    ALEU_HORVU          MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG 
    CATH_HUMAN          ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK----
    CYS1_DICDI          -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X
    ;
    end; 
    t   Dones    Repeated names with a TAXA blocksO  #NEXUS
    [TITLE: NoName]

    begin taxa
    CYS1_DICDI
    ALEU_HORVU
    CATH_HUMAN
    CYS1_DICDI;
    end;

    begin data;
    dimensions ntax=4 nchar=50;
    format interleave datatype=protein   gap=- symbols="FSTNKEYVQMCLAWPHDRIG";

    matrix
    CYS1_DICDI          -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---- 
    ALEU_HORVU          MAHARVLLLA LAVLATAAVA VASSSSFADS NPIRPVTDRA ASTLESAVLG 
    CATH_HUMAN          ------MWAT LPLLCAGAWL LGV------- -PVCGAAELS VNSLEK----
    CYS1_DICDI          -----MKVIL LFVLAVFTVF VSS------- --------RG IPPEEQ---X
    ;
    end; 
    s   Reading an empty filei    s
   Writing...s-   Should have rejected more than one alignment!N(   R=   t	   Bio.NexusR    t   Bio.Align.GenericR   t   Bio.SeqRecordR   t
   InterfacesR   t   BioR   R    R   R   R;   R?   R   R:   t   rt   reprR   R   R   R   t   listR   R'   t   seekt   readt   FalseR   (    (    (    s†   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/AlignIO/NexusIO.pyt   <module>   sX   $H	$	$'		