
;Wc           @  s  d  d l  m Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l	 m
 Z
 e j d d d d g  Z d   Z d	   Z d
   Z d   Z d   Z d   Z d   Z d e f d     YZ d e f d     YZ d e f d     YZ d e f d     YZ d e f d     YZ d   Z d   Z d   Z e   e   e   f Z e d  Z e d  Z  d   Z! d   Z" e# d  k re"   n  d S(!   i(   t   with_statementN(   t	   quoteattrt	   LabColourt   lt   at   bc         C  s!  t  |   t  |  k r	x t j |  |  D] \ } } | j   r| | j   ru | | k  r_ d S| | k  r+ d Sq+ q d Sn | j   r d S| j   r | j   r | | k  r d S| | k  r+ d Sq+ q d Sn | j   r d S| | k  r d S| | k  r+ d Sq+ Wd St  |  t  |   Sd S(   s&   Compares 2 strings of alphabet symbolsii   i    N(   t   lent	   itertoolst   izipt   isalphat   isdigit(   t   xt   yR   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   alphabetSymbolCompare	   s8    c   
      C  s8  |  d ?d @d } |  d ?d @d } |  d @d } t  | | |  } t | | |  } | | } | | d } d } d }	 | | k r+| d k r | d | | }	 n | | | }	 | | k r | | | } | | k  r| d 7} qn3 | | k r| | | d } n | | | d } | d :} n  | |	 | f S(	   s   Create HSL from an RGB integeri   i   i   i   i    g      ?i   i   (   t   mint   max(
   t   rgbt   rt   gR   t   min_ct   max_ct   delta_cR   t   ht   s(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _rgb2hsl/   s,    
c         C  s  d   } |  d } |  d } |  d } | d k rH | } | } | } n | d k  re | d | } n | | | | } d | | }	 | |	 | | d d  } | |	 | |  } | |	 | | d d  } t  t | d   d	 >t  t | d   d
 >Bt  t | d   B}
 |
 S(   s&   Create an RGB integer from a HSL tuplec         S  s   | d k  r | d 7} n | d k r2 | d 8} n  | d d k  rV |  | |  d | S| d k  rf | S| d d k  r |  | |  d d | d S|  Sd  S(   Ni    i   g      ?g      @g      ?g       @g      @(    (   t   pt   qt   t(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _hueL   s    i    i   i   g      ?g      ?g      @i   i   i   (   t   intt   round(   t   hslR   R   R   R   R   R   R   R   R   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _hsl2rgbJ   s"    	


	Fc         C  sE   t  |   } | d | d | d d | d d d f } t |  S(   s$   Make a lighter version of the colouri    i   i   g      ?i   (   R   R    (   R   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _lightenm   s    /c         C  s  | } | } | } | d k r1|  d } t  j |  } | | } | d | }	 | d | | }
 | d | d | } | d k r | } | } |	 } q1| d k r |
 } | } |	 } q1| d k r |	 } | } | } q1| d k r |	 } |
 } | } q1| d k r| } |	 } | } q1| } |	 } |
 } n  t t | d   } t t | d   } t t | d   } | d	 >| d
 >B| B} | S(   s   Create an RGB integer from HSVi    i<   g      ?i   i   i   i   i   i   i   (   t   matht   floorR   R   (   t   huet   satt   valueR   R   R   R   t   it   fR   R   R   t   redt   greent   blueR   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _hsv2rgbs   sL    

						c         C  s  d   } d   } | |  d ?d @ } | |  d ?d @ } | |  d @ } | d | d | d } | d	 | d
 | d } | d | d | d } | | d  } | | d  } | | d  } d | d }	 d | | }
 d | | } t  |	 |
 |  S(   s#   Convert an RGB integer to Lab tuplec         S  sQ   |  d } | d k r9 | d d } t  j | d  } n
 | d :} | d 9} | S(   s.   Helper function for XYZ colourspace conversioni   gbX9Ȧ?g)\(?gzG?g333333@gףp=
)@id   (   R"   t   pow(   R&   t   c(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt	   xyzHelper   s    


c         S  sA   |  } | d k r+ t  j | d d  } n d | d d } | S(   s.   Helper function for Lab colourspace conversiong2#?g      ?g      @gS%@g      0@g      ]@(   R"   R-   (   R&   R.   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt	   labHelper   s
    i   i   i   g7d?g,C?gv?gz6?g,C?g]m{?gN@aÓ?g2%䃾?g"~j?g^IW@g      Y@gn8[@g      ]@g     @@g      i@(   R   (   R   R/   R0   t   c1t   c2t   c3R   R   t   zR   R   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _rgb2lab   s    	
		c         C  s  t  j |  j |  j |  j |  j  } t  j | j | j | j | j  } | | } |  j | j } |  j | j } |  j | j } d } | | | | | | }	 |	 d k r t  j |	  } n  | }
 | d d | } | d d | } t  j |
 |
 | | | |  S(   s;   Calculate the distance between 2 colours in Lab colourspacei    g      ?g
ףp=
?gQ?(   R"   t   sqrtR   R   R   (   t   lab1t   lab2R1   R2   t   dct   dlt   dat   dbt   dht
   dh_squaredt   firstt   secondt   third(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt	   _lab_dist   s    ''
t   AlphabetParseErrorc           B  s    e  Z d  Z d   Z d   Z RS(   s%   An error in the alphabet being parsedc         C  s   | |  _  d  S(   N(   R&   (   t   selfR&   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   __init__   s    c         C  s   t  |  j  S(   N(   t   reprR&   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   __str__   s    (   t   __name__t
   __module__t   __doc__RE   RG   (    (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyRC      s   	t   AlphabetReaderSymbolc           B  s8   e  Z d  Z d d d  Z d   Z d   Z d   Z RS(   s4   Temporary information storage for an alphabet readerc         C  sC   | |  _  | |  _ | |  _ | |  _ | |  _ g  |  _ t |  _ d  S(   N(   t   symbolt   namet   colourt
   complementt   compriset   aliasest   Falset   alias(   RD   RL   RM   RN   RO   RP   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyRE      s    						c         C  s   | d  k r d St | t  s# d S|  j d  k sG t |  j  d k r | j d  k sk t | j  d k r~ t |  j | j  Sd Sn; | j d  k s t | j  d k r d St |  j | j  Sd  S(   Nii   (   t   Nonet
   isinstanceRK   RP   R   R   RL   (   RD   t   obj(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   __cmp__   s    $$$c         C  s   t  |  j  S(   N(   RF   RL   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyRG      s    c         C  s"   |  j  d  k o! t |  j   d k S(   Ni   (   RP   RT   R   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   isAmbiguous   s    N(   RH   RI   RJ   RT   RE   RW   RG   RX   (    (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyRK      s
   			t   AlphabetReaderc           B  s   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 d d d  Z d d d d d	  Z d
   Z d   Z d   Z RS(   sE   Reads an alphabet and validates it before creating an alphabet objectc         C  s  d } d } d } d | d | d | d } t  j d | d	  |  _ t  j d
 | d  |  _ t  j d
 | d | d  |  _ t  j d
 | d | d  |  _ t |  _ t |  _ t |  _	 t |  _
 t |  _ t |  _ t |  _ d  |  _ d  |  _ i  |  _ g  |  _ g  |  _ d  S(   Ns   [A-Za-z0-9?.*-]s   [0-9a-fA-F]{6}s0   ("(?:[^\\"]+|\\["\\/bfnrt]|\\u[0-9A-Fa-f]{4})*")t   (s   )(?:\s+s	   )?(?:\s+(s   ))?s   ^\s*ALPHABET(?:\s+v1)?(?:\s+s$   )?(?:\s+(DNA|RNA|PROTEIN)-LIKE)?\s*$s   ^\s*s   \s*$s   \s*~\s*s   \s*=\s*(s   *)\s*$(   t   ret   compilet	   header_ret   core_single_ret   core_pair_ret   ambig_reRR   t   parsedt   seen_headert   seen_symbolt
   seen_ambigt   seen_lct   seen_uct   fully_complementableRT   RM   t   liket
   sym_lookupt	   core_symst
   ambig_syms(   RD   t   qr_symt	   qr_colourt   qr_namet   qr_core(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyRE      s(    ""											c         C  s   t  } x |  j D] } | j d k r1 t } q n  |  j | j } | d k rr t d | j d | j d   n  | j | j k r t d   q q W| |  _	 d S(   s8   Ensure all referenced complement symbols actually exist.s   core symbol s    has complement s    which has not been defineds+   Alphabet symbol complement is not symmetricN(
   t   TrueRj   RO   RT   RR   Ri   RC   RL   t   RuntimeErrorRg   (   RD   Rg   t   sym_objt   complement_sym_obj(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _checkComplements  s    %c         C  s  i  } xG |  j  D]< } | j | k r< | | j j |  q | g | | j <q Wx| j   D] \ } } t |  d k r |  j | } x | D]" } t | _ | j j | j	  q Wq] t |  d k r] d } x> | D]6 } | d k st | j	 | j	  d k  r | } q q Wx< | D]1 } | | k	 rt | _ | j j | j	  qqWq] q] Wg  |  j  D] } | j s^| ^ q^|  _  |  j j   |  j  j   d S(   si   Check every ambiguity character to see if it has the same comprising
        characters as something elsei   i    N(   Rk   RP   t   appendt	   iteritemsR   Ri   Rp   RS   RQ   RL   RT   R   Rj   t   sort(   RD   t   comprise_lookupt   symRP   t   symst   core_symt   prime_ambig(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _mergeAliases'  s.    	'	!(c      	   C  s   t  |  j  d k s: t  |  j d j  t  |  j  k r t d d d d d j g  |  j D] } | j ^ qY  } |  j j d |  | |  j	 d <n  d S(   s<   Tries to find a wildcard but if none exists then creates onei    t   ?RP   t    N(
   R   Rk   RP   Rj   RK   RT   t   joinRL   t   insertRi   (   RD   Ry   t   wildcard(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _createWildcardIfMissingH  s    ::c         C  sZ  d } t    } x= |  j D]2 } | j d k rA | j | j  q | d 7} q Wx3 |  j D]( } | j d k rY | j | j  qY qY W| t |  7} g  } x! | D] } | j t |   q Wd } d } d | } g  }	 xC t	 |  D]5 }
 t
 | |
 | |  } |	 j | t |  f  q Wx | rd } d } d } x t	 t |   D]k }
 xb t	 t |	   D]N } t | |
 |	 | d  } | d k s| | k  rh| } |
 } | } qhqhWqOW| d k rt d   n  | | =|	 | =q$Wx9 |  j D]. } | j d k r|	 j d  d | _ qqWx, |  j D]! } | j d k r1d | _ q1q1Wd S(   s7   Assigns colours to any core symbols that don't have onei    i   g      ?g?ih  s   Somehow we ran out of colours?!N(   t   setRj   RN   RT   t   addRk   R   Ru   R5   t   xrangeR,   RB   Rq   t   pop(   RD   t   ncolourst   unique_coloursRr   t   uniquesR   R%   R&   t   stept   coloursR'   t	   best_distt   best_it   best_jt   jt   dist(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _setMissingColoursP  sR    	
	c         C  s   i  } x |  j  D] } | | | j <q Wx |  j  D] } | j } g  } xx | D]6 } |  j | } | j d k rs Pn  | j | j  qM Wd j t | d t  } | | k r1 | | j	 | _ q1 q1 Wd S(   s8   Tries to find a complement for every ambiguous characterR   t   cmpN(
   Rk   RP   Ri   RO   RT   Ru   R   t   sortedR   RL   (   RD   Rx   Rr   RP   t   complement_listRy   t   component_objRO   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _setMissingComplements  s    	c         C  s  |  j  d k rd } d } |  j  d k r3 d } n6 |  j  d k rQ d } d } n |  j  d k ri d } n  | d k rxt |  D]\ } } | |  j k r t d |  j  d	 |   n  |  j | } | j d k r t d |  j  d
 | d   n  d } | d k r2| | |  j k r2|  j | | } n  d } | j d k rZ|  j | j } n  | | k	 r t d |  j  d
 | d   q q Wqn  d S(   sJ   Confirms that an alphabet is an extension of a specified standard alphabett   RNAt   ACGUt   DNAt   ACGTt   TGCAt   PROTEINt   ACDEFGHIKLMNPQRSTVWYs   alphabet is not s   -like; missing symbol s   -like; symbol s    is ambiguouss    complement rules are incorrectN(   Rh   RT   t	   enumerateRi   RC   RP   RO   (   RD   t   required_symbolst   required_complementsR'   Ry   Rr   t	   comp1_objt	   comp2_obj(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   _checkLikeStandards  s2    			"c         C  s   |  j  r t d   n  |  j r0 t d   n  |  j rH t d   n  | d  k r | d k r | d k r | d k r t d   n  | |  _ | |  _ t |  _ d  S(   Ns   Parsing is already done!s   repeated headers   header after symbolR   R   R   s2   If defined then "like" must be DNA, RNA or PROTEIN(	   Ra   Rq   Rb   RC   Rc   RT   RM   Rh   Rp   (   RD   RM   Rh   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   parseHeader  s    			0		c         C  s  |  j  r t d   n  t | t  o6 t |  d k sH t d   n  | d  k rs t | t  rs t d   n  | d  k r t | t t f  r t d   n  | d  k r t | t  o t |  d k r t d   n  | d  k rt | t  rt d   n  | d  k r3| d  k r3t d   n  |  j pB|  j	 sTt
 d   n  | |  j k rt
 d	 t |  d
   n  | d  k rXt | d t } d j g  t |  D]2 \ } } | d k s| | d | k r| ^ q } x | D]^ }	 |  j j |	  }
 |
 d  k r.t
 d |	 d   q|
 j   rt
 d |	 d   qqWn |  j rpt
 d   n  | d k r| d  k s| d j t g  |  j D] }	 |	 j ^ qd t  k rt
 d   qn  | j   rt |  _ n | j   rt |  _ n  t | | | | |  } | d  k r;|  j j |  n/ |  j sQ|  j   n  |  j j |  t |  _ | |  j | <t |  _ d  S(   Ns   Parsing is already done!i   s(   Expected symbol to be a single characters   Expected name to be a strings.   Expected colour to be a unsigned 24 bit numbers3   Expected complement symbol to be a single characters-   Expected only comprise or complement not boths    expected header but found symbols   symbol s    is already usedR   R   i    s   referenced symbol s    is unknowns    is ambiguouss2   unexpected core symbol (as ambiguous symbols seen)R~   s'   symbol ? is reserved for wildcards only(    Ra   Rq   RU   t
   basestringR   RT   R   t   longRb   Rc   RC   Ri   t   strR   R   R   R   t   getRX   Rd   Rj   RL   t   islowerRp   Re   t   isupperRf   RK   Ru   Rt   Rk   t   seen_symbols(   RD   RL   RM   RN   RO   RP   t   compriseListR'   t   eRy   t   comprise_objt
   symbol_obj(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   parseSymbol  sZ    	!".N	F		c         C  s  d   } d   } d   } | |  } t  |  d k sE | j   rI d  S|  j j |  } | d  k r |  j | | j d   | j d   d  S|  j j |  } | d  k rG|  j | j d  | | j d   | | j d   d | j d	  |  j | j d	  | | j d
   | | j d   d | j d  d  S|  j	 j |  } | d  k r|  j | j d  | | j d   | | j d    d  S|  j
 j |  } | d  k r|  j | j d  | | j d   | | j d   d | j d	  d  St d   d  S(   Nc         S  s%  d } x4 | t  |   k  r< |  | j   s/ Pn  | d 7} q	 W| t  |   k rS d S|  | d k rg d S| d 7} x | t  |   k  r |  | } |  | d k r |  d | !S|  | d k r| d 7} xK | t  |   k  r|  | d k r|  | d d k rPn  | d 7} q Wn  | d 7} qt W|  S(   Ni    i   R   t   #t   "s   \(   R   t   isspace(   t   lineR'   R.   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   removeComments  s,    


$c         S  s   |  d  k r t j |   Sd  S(   N(   RT   t   jsont   loads(   RM   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt
   decodeName  s    c         S  s   |  d  k r t |  d  Sd  S(   Ni   (   RT   R   (   RN   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   decodeColour  s    i    i   i   i   RO   i   i   i   RP   s   unrecognised pattern(   R   R   R]   t   matchRT   R   t   groupR_   R   R^   R`   RC   (   RD   R   R   R   R   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt	   parseLine  s0    			(II:Ic         C  sU   |  j  s |  j   n  |  j   |  j   |  j   |  j   |  j   t |  _ d  S(   N(	   Rd   Rt   R}   R   R   R   R   Rp   Ra   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt	   parseDone;  s    	




c         C  sM   t  |  + } x! | D] } |  j | j    q WWd  QX|  j   t |   S(   N(   t   openR   t   stripR   t   Alphabet(   RD   t   filenamet   fhR   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt	   parseFileE  s
    
N(   RH   RI   RJ   RE   Rt   R}   R   R   R   R   RT   R   R   R   R   R   (    (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyRY      s   			!		3		<	@	
t   AlphabetSymbolc           B  s;   e  Z d  Z d   Z d   Z d   Z d   Z d   Z RS(   s   A symbol of the alphabetc         C  sR   | |  _  | |  _ t |  |  _ | |  _ | |  _ d  |  _ d  |  _ d  |  _	 d  S(   N(
   t   indexRL   t   listRQ   RM   RN   RT   RO   RP   t   pair(   RD   R   RL   RQ   RM   RN   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyRE   N  s    						c         C  s   t  | t  r |  j | j k r% t S|  j | j k r; t S|  j | j k rQ t S|  j | j k rg t S|  j | j k r} t St St	 S(   N(
   RU   R   R   RR   RL   RQ   RO   RP   Rp   t   NotImplemented(   RD   t   other(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   __eq__[  s    c         C  s$   |  j  |  } | t k r | S| S(   N(   R   R   (   RD   R   t   result(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   __ne__j  s    c         C  s   |  j  d k r |  j  d k s< |  j  d k rC |  j  d k rC |  j  S|  j  d k rl |  j  d k rl d |  j  Sd j t |  j    Sd	 S(
   s)   Provide an XML compatible ID for a symbolt   At   ZR   R4   t   0t   9t   ns   x{:02X}N(   RL   t   formatt   ord(   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   xmlidp  s
    <c         C  s|   |  j  g } |  j d  k rD | j d  | j t j |  j   n  |  j d k ro | j d j |  j   n  d j |  S(   Nt    i    s    {:06X}R   (	   RL   RM   RT   Ru   R   t   dumpsRN   R   R   (   RD   t   out(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   asTexty  s    (   RH   RI   RJ   RE   R   R   R   R   (    (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyR   L  s   					R   c           B  s@  e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 d   Z
 d	   Z d
   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z  e! d  Z" d    Z# d! d" d#  Z$ RS($   s   Biological alphabet class.
    This defines the set of symbols from which various objects can be built, e.g. sequences and motifs.
    c         C  sf  | j  s t d   n  t | j  |  _ | j | j k |  _ g  |  _ i  |  _	 i  |  _
 i  |  _ | j |  _ | j |  _ | j |  _ d } x t j | j | j  D] } t | | j | j | j | j  } |  j j |  x t j | j | j  D]e } | |  j	 | <|  j r | d k r-| d k sE| d k r^| d k r^| |  j	 | j   <q^q q W| d 7} q Wx^ t j | j | j  D]D } | j d  k r|  j	 | j } |  j	 | j } | j | _ qqWx0 t |  j  D] } t | g  |  j | _ qWxa | j D]V } |  j	 | j } g  }	 x( | j D] }
 |	 j |  j	 |
 j  q4Wt |	  | _ qWx! |  j D] } | |  j
 | j <qrWt g  |  j D]! } | j |  j k  r| j ^ q |  _ |  j rt g  |  j D]! } | j |  j k  r| j ^ q |  _  n	 d  |  _  |  j rQx0 |  j D]" } |  j! | j  |  j | j <q(Wn  g  t |  j d  D] } g  ^ qe} x+ |  j D]  } | t | j  j |  qWx |  j |  j D] } x t d t | j   D] } xi | | D]] } | j | j k  sqn  | j | j } | |  j
 k r| j |  j
 | j f | _" PqqW| j" d  k rPqqWqWd  S(   NsH   The reader must finish parsing before an alphabet can be created from iti    R   R   R   R4   i   (#   Ra   Rq   R   Rj   t   ncoreRe   Rf   t   ignore_caset   symbolst   lookupt   findt   comp_symbolsRM   Rh   Rg   R   t   chainRk   R   RL   RQ   RN   Ru   t   swapcaseRO   RT   R   R   t	   frozensetRP   t   tuplet   coret   complementst   getComplementR   (   RD   t   readerR   Rr   t   alph_symRy   t
   alph_sym_1t
   alph_sym_2R'   t   comprise_listt   comprise_symbolt   _t   symbols_by_countR   t   partt   remain(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyRE     sr    					$	0:	=		#&c         C  s;   |  j  d  k r+ t |  j   d k r+ |  j  Sd j |  j  S(   Ni    R   (   RM   RT   R   R   R   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   __repr__  s    $c         C  sY   t  | t  rU |  j | j k r% t S|  j | j k r; t S|  j | j k rQ t St St S(   N(   RU   R   R   RR   R   R   Rp   R   (   RD   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyR     s    c         C  s$   |  j  |  } | t k r | S| S(   N(   R   R   (   RD   R   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyR     s    c         C  s   t  | t t f  rO | t |  j  k  r8 |  j | j St d | d   n1 | |  j k rl |  j | j St d | d   d S(   s   Get the colour for this symbols   Symbol index s    does not exist in alphabets   Symbol N(   RU   R   R   R   R   RN   Rq   R   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt	   getColour  s    c         C  s   t  | t t f  r[ | t |  j  k  r> t |  j | j  St d t |  d   n7 | |  j	 k r~ t |  j	 | j  St d | d   d S(   s$   Get the muted colour for this symbols   Symbol index s    does not exist in alphabets   Symbol N(
   RU   R   R   R   R   R!   RN   Rq   R   R   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getMutedColour  s    c         C  s1   t  |  j  |  j k r) |  j |  j j Sd Sd S(   s   Get the wildcard symbolN(   R   R   R   RL   RT   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getWildcard  s    c         C  s   |  j  S(   sF   Retrieve a tuple with all core symbols, immutable membership and order(   R   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt
   getSymbols  s    c         C  s   |  j  S(   sD   Retrieve a tuple with all core symbol complement indicies, immutable(   R   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getComplements  s    c         C  s   | |  j  k S(   s+   Check if the symbol is a member of alphabet(   R   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   isValidSymbol  s    c         C  s*   | |  j  k r& |  j  | j |  j k  St S(   s0   Check if the symbol is a core member of alphabet(   R   R   R   RR   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   isCoreSymbol  s    c         C  s*   | |  j  k r& |  j  | j |  j k St S(   s3   Check if the symbol is the wildcard of the alphabet(   R   R   R   RR   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   isWildcardSymbol  s    c         C  s<   | |  j  k r8 |  j  | j j   | j   k r8 t Sn  t S(   sY   Check if the symbol is a member of the alphabet and the normal way to identify the letter(   R   RL   t   lowerRp   RR   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   isPrimeSymbol  s    "c         C  s!   | |  j  k r |  j  | j Sd S(   s,   Retrieve the index of the symbol (immutable)N(   R   R   RT   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getIndex"  s    c         C  s\   t  | t t f  r; | t |  j  k  rX |  j | j Sn | |  j k rX |  j | j Sd S(   s?   Retrieve the indexes of the comprising core symbols (immutable)N(   RU   R   R   R   R   RP   R   RT   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getComprisingIndexes(  s    c         C  s\   t  | t t f  r; | t |  j  k  rX |  j | j Sn | |  j k rX |  j | j Sd S(   s?   Retrieve the indexes of the comprising core symbols (immutable)N(   RU   R   R   R   R   R   R   RT   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getPairIndexes2  s    c         C  s'   | t  |  j  k  r# |  j | j Sd S(   s$   Return the symbol at the given indexN(   R   R   RL   RT   (   RD   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt	   getSymbol=  s    c         C  s*   | t  |  j  k  r& |  j | j   Sd  S(   N(   R   R   R   RT   (   RD   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getXmlIdC  s    c         C  s{   |  j  | } g  } xX t j | j | j  D]> } | j |  |  j r, | j   r, | j | j    q, q, Wd j	 |  S(   s/   Returns all symbols variants at the given indexR   (
   R   R   R   RL   RQ   Ru   R   R	   R   R   (   RD   R   R   Rz   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt
   getAliasesH  s    c         C  sn   t    } x5 | D]- } | |  j k r) d S| |  j | j O} q Wt |  } | |  j k rj |  j | j Sd S(   s   Find a symbol that matches the string of comprising symbols.
        Duplicates and aliases are allowed in the string but all
        included symbols must be known.N(   R   R   RT   RP   R   R   RL   (   RD   RP   t   symi_setRy   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt
   findSymbolS  s    	c         C  s   |  j  S(   s$   Do all core symbols have complements(   Rg   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   isComplementablea  s    c         C  s\   t  | t t f  r; | t |  j  k  rX |  j | j Sn | |  j k rX |  j | j Sd S(   s2   Retrieve the index of the complement of the symbolN(   RU   R   R   R   R   RO   R   RT   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getComplementIndexe  s    c         C  s-   |  j  |  } | d k r) |  j | j Sd S(   s%   Retrieve the complement of the symbolN(   R  RT   R   RL   (   RD   Ry   t   complementi(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyR   p  s    c         C  s   |  j  | S(   s,   Retrieve the complement of the symbol faster(   R   (   RD   Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getComplementFastw  s    c         C  s(   x! | D] } |  j  |  s t Sq Wt S(   sE   Check if the string contains only symbols that belong to the alphabet(   R   RR   Rp   (   RD   t   symstrRy   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   isValidString{  s    c         C  s   |  j  S(   s3   Retrieve the number of core symbols in the alphabet(   R   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getLen  s    c         C  s   t  t |  j  |  j d  S(   sE   Retrieve the number of core symbols plus the wildcard in the alphabeti   (   R   R   R   R   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt
   getWildLen  s    c         C  s   t  |  j  S(   sE   Retrieve the full count of core and ambiguous symbols in the alphabet(   R   R   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt
   getFullLen  s    c         C  s    g  | D] } |  j  |  ^ q S(   s,   Encode the symstr as indexes of the alphabet(   R   (   RD   R  Ry   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   encodeString  s    c         C  s?   |  j  d k r+ t |  j   d k r+ |  j  Sd j |  j  Sd S(   s   Get the alphabet namei    R   N(   RM   RT   R   R   R   (   RD   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getName  s    $c         C  s  g  } g  } x |  j  D] } | j |  j k s4 | r= | j n	 |  j   } |  j r | j j   r | j | j j    | j |  n  xc | j	 D]X } | j |  | j |  |  j r | j   r | j | j    | j |  q q Wq Wt
 j d j |  d j |   S(   s   Create a translation table that will convert sequences to the prime symbol.
        Optionally convert ambiguous symbols to the wildcard symbol.R   (   R   R   R   RL   R   R   R	   Ru   R   RQ   t   stringt	   maketransR   (   RD   t   allow_ambigt   srct   destR   t   dsymRy   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt
   translator  s    -c   	      C  s  g  } x t  |  j  D] } |  j | } | j d k r> q n  |  j | j } | j | k  rc q n  | j | j    | j d  | j | j    | j d  q WxY t  |  j  D]H } |  j | } | j d k r q n  | j | j    | j d  q Wx t  |  j t |  j   D] } |  j | } d j	 t
 g  | j D] } |  j | j ^ qEd t  } | j | j    | j d  | j |  | j d  | j d k rxH | j D]: } | j |  | j d  | j |  | j d  qWqqWx t  |  j  D]p } |  j | } | j d k rxK | j D]= } | j |  | j d  | j | j  | j d  qAWqqWd j	 |  S(   s.   Create the text representation of the alphabets    ~ s   
R   R   s    = N(   R   R   R   RO   RT   R   Ru   R   R   R   R   RP   RL   R   RQ   (	   RD   R   R'   t   sym1t   sym2Ry   t   symit   comprise_strRS   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyR     sP    ";s       s     c           s  g  } | j  d j | t   j        j d k r\ | j  d j   j j     n  | j  d  x  j D]} | j  d j | | | j   t | j	    t
 | j  d k r | j  d j t d j | j     n  t
 | j  d k r@| j d k rx  j | j } | j  d	 j t | j	    qxn8 | j  d
 j t d j   f d   | j D     | j d k rt
 | j  d k r| j  d j t | j    n  | j d k r| j  d j | j   n  | j  d  qs W| j  d j |   d j |  S(   s-   Create the XML representation of the alphabets   {:s}<alphabet name={:s}s    like="{:s}"s   >
s%   {:s}{:s}<letter id="{:s}" symbol={:s}i    s    aliases={:s}R   i   s    complement={:s}s    equals={:s}c         3  s   |  ] }   j  | j Vq d  S(   N(   R   RL   (   t   .0R'   (   RD   (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pys	   <genexpr>  s    s
    name={:s}s    colour="{:06X}"s   />
s   {:s}</alphabet>
N(   Ru   R   R   R  Rh   RT   R   R   R   RL   R   RQ   R   RP   RO   RM   RN   (   RD   t   padt   indentR   Ry   t   csym(    (   RD   sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   asXML  s*    %".+%8$"(%   RH   RI   RJ   RE   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R  R  R  R  R   R  R	  R
  R  R  R  R  Rp   R  R   R  (    (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyR     sD   	F														
																0c          C  s  t    }  |  j d d d |  j d d d d d |  j d d	 d
 d d |  j d d d d d |  j d d d d d |  j d d d d |  j d d d d |  j d d d d |  j d d d d |  j d d d d |  j d d  d d! |  j d" d# d d$ |  j d% d& d d' |  j d( d) d d* |  j d+ d, d d- |  j d. d/ d d0 |  j d1 d2 d d3 |  j d4 d2 d d3 |  j d5 d2 d d3 |  j   t |   S(6   s   Create a DNA alphabetR   Rh   R   t   Adeninei   RO   t   Tt   Ct   Cytosinei   t   Gt   Guaninei  t   Thyminei   t   Ut   UracilRP   t   Wt   Weakt   ATt   St   Strongt   CGt   Mt   Aminot   ACt   Kt   Ketot   GTt   Rt   Purinet   AGt   Yt
   Pyrimidinet   CTt   Bs   Not At   CGTt   Ds   Not Ct   AGTt   Hs   Not Gt   ACTt   Vs   Not Tt   ACGt   Ns   Any baseR   t   Xt   .(   RY   R   R   R   R   (   t   factory(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   dna  s,    	
c          C  s  t    }  |  j d d d |  j d d d  |  j d d d  |  j d	 d
 d  |  j d d d  |  j d d d d |  j d d d d |  j d d d d |  j d d d d |  j d d d d |  j d d d d  |  j d! d" d d# |  j d$ d% d d& |  j d' d( d d) |  j d* d+ d d, |  j d- d. d d/ |  j d0 d1 d d2 |  j d3 d1 d d2 |  j d4 d1 d d2 |  j   t |   S(5   s   Create a RNA alphabetR   Rh   R   R  i   R!  R"  i   R#  R$  i  R&  R'  i   R   R%  RP   R(  R)  t   AUR+  R,  R-  R.  R/  R0  R1  R2  t   GUR4  R5  R6  R7  R8  t   CUR:  s   Not At   CGUR<  s   Not Ct   AGUR>  s   Not Gt   ACUR@  s   Not URA  RB  s   Any baseR   RC  RD  (   RY   R   R   R   R   (   RE  (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   rna  s,    	
c          C  s0  t    }  |  j d d d |  j d d d  |  j d d d	  |  j d
 d d  |  j d d d  |  j d d d  |  j d d d  |  j d d d  |  j d d d  |  j d d d  |  j d d d  |  j d d d  |  j d  d! d	  |  j d" d# d  |  j d$ d% d  |  j d& d' d(  |  j d) d* d  |  j d+ d, d  |  j d- d. d  |  j d/ d0 d1  |  j d2 d3 d  |  j d4 d5 d6 d7 |  j d8 d9 d6 d: |  j d; d< d6 d= |  j d> d? d6 d@ |  j dA d? d6 d@ |  j dB d? d6 d@ |  j   t |   S(C   s   Create a protein alphabett   ProteinRh   R   R   t   Alaninei   R4  t   Argininei   RB  t
   Asparaginei   R<  s   Aspartic acidi  R!  t   Cysteinet   Es   Glutamic acidt   Qt	   GlutamineR#  t   Glycinei  R>  t	   Histidinei t   It
   Isoleucinet   Lt   LeucineR1  t   LysineR.  t
   Methioninet   Ft   Phenylalaninet   Pt   Prolinei  R+  t   SerineR   t	   ThreonineR(  t
   TryptophanR7  t   Tyrosinei3 R@  t   ValineR:  s   Asparagine or Aspartic acidRP   t   NDR   s   Glutamine or Glutamic acidt   QEt   Js   Leucine or Isoleucinet   LIRC  s   Any amino acidt   ARNDCEQGHILKMFPSTWYVt   *RD  (   RY   R   R   R   R   (   RE  (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   protein'  s<    	
c         C  s(   x! | D] } | j  |  k r | Sq Wd S(   sj  Retrieve a pre-defined alphabet by name.
    Currently, "Protein", "DNA" and "RNA" are available.
    Example:
    >>> alpha = sequence.getAlphabet('Protein')
    >>> alpha.getSymbols()
    will retrieve the 20 amino acid alphabet and output the tuple:
    ('A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y')
    N(   RM   RT   (   RM   t	   alphabetst   alphabet(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt	   getByNameJ  s    	c         C  s  i  } x7 |  D]/ } | | k r2 | | c d 7<q d | | <q Wg  } x? | D]7 } x. | D] } | j  |  sZ PqZ qZ W| j |  qM Wt |  d k r d St |  d k r | d Sd } d } g  } x| D]} t   }	 t   }
 d } d } d } x | j   D] \ } } | j |  rx|	 j | j |   | j	 |  rk|
 j | j |   | | 7} q| | 7} q| j
 |  r| j	 |  r| | 7} q| | 7} q| | 7} qW| | | } | | | t |	  | j   } | | t |
  | j   } | | k s0| | k rH| | k rH| } | } | g } q | | k r | | k r | j |  q q Wt |  d k r| d S| d S(   s?   Retrieve a pre-defined alphabet by looking at symbol frequency.i   i    N(   R   Ru   R   RT   R   Rv   R   R   R   R   R   R
  (   t   seqRn  t   countsRy   t   valid_alphabetsRo  t   best_normal_scoret   best_prime_scoret   best_alphabetst   seent
   prime_seent   primet   altR   t   countt   totalt   normal_scoret   prime_score(    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   getBySeqX  s`    		"$c         C  s   t    } | j |   S(   N(   RY   R   (   R   R   (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   loadFromFile  s    	c          C  s/   t  t j  d k r+ t t j d  }  n  d  S(   Ni   (   R   t   syst   argvR  (   Ro  (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   main  s    t   __main__($   t
   __future__R    t   collectionsR   R   R"   R[   R  R  t   xml.sax.saxutilsR   t
   namedtupleR   R   R   R    R!   R,   R5   RB   t	   ExceptionRC   t   objectRK   RY   R   R   RF  RM  Rm  t   predefRp  R  R  R  RH   (    (    (    sD   /woldlab/castor/home/georgi/programs/meme_4.11.2/scripts/alphabet.pyt   <module>   s6   T	&		#		+	$	" P8 r			!:		 