ó
jëIc           @   sh   d  Z  d d l m Z d d l m Z d d l m Z d d l Z d d l Z d e	 f d „  ƒ  YZ
 d S(   s€  
Implementation of sequence motifs.

Changes:
10.2007 - BW added matrix (vertical, horizontal) input, jaspar, transfac-like output
26.08.2007 - added a background attribute  (Bartek Wilczynski)
26.08.2007 - added a DPQ measure   (Bartek Wilczynski)
9.2007 (BW) : added the .to_fasta() and .weblogo() methods allowing to use the Berkeley weblogo server at http://weblogo.berkeley.edu/
iÿÿÿÿ(   t   Seq(   t	   FreqTable(   t   IUPACNt   Motifc           B   sÁ  e  Z d  Z e j d „ Z d „  Z d „  Z d „  Z d „  Z	 e
 d „ Z e
 d „ Z d „  Z e d	 „ Z d
 „  Z d d d „ Z d d d e
 d „ Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z e d „ Z d „  Z d „  Z d „  Z d „  Z e d „ Z d- e d „ Z! d- e d „ Z" d „  Z# d „  Z$ d  „  Z% d! „  Z& d" „  Z' d# „  Z( d$ „  Z) d% „  Z* d& d' „ Z+ d( „  Z, d- d) „ Z- d- e
 d* „ Z. d+ „  Z/ d, „  Z0 RS(.   s/   
    A class representing sequence motifs.
    c            s©   g  ˆ  _  t ˆ  _ i  ˆ  _ t ˆ  _ g  ˆ  _ t ˆ  _ g  ˆ  _ t ˆ  _ g  ˆ  _	 | ˆ  _
 d  ˆ  _ t t ‡  f d †  ˆ  j
 j ƒ ƒ ˆ  _ d ˆ  _ d  ˆ  _ d ˆ  _ d  S(   Nc            s   |  d t  ˆ  j j ƒ f S(   Ng      ð?(   t   lent   alphabett   letters(   t   n(   t   self(    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   <lambda>#   s    g      ð?t    (   t	   instancest   Falset   has_instancest   countst
   has_countst   maskt   _pwm_is_currentt   _pwmt   _log_odds_is_currentt	   _log_oddsR   t   Nonet   lengtht   dictt   mapR   t
   backgroundt   betat   infot   name(   R   R   (    (   R   sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   __init__   s    											'		c         C   sP   |  j  d  k r | |  _  n1 |  j  | k rL d G|  j  G|  j GHt d ƒ ‚ n  d  S(   NR   s(   You can't change the length of the motif(   R   R   R   t
   ValueError(   R   R   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _check_length(   s
    c         C   s=   |  j  d  k r | |  _  n |  j  | k r9 t d ƒ ‚ n  d  S(   Ns   Wrong Alphabet(   R   R   R   (   R   R   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _check_alphabet/   s    c         C   s¯   |  j  | j ƒ |  j t | ƒ ƒ |  j rj x; t |  j ƒ D]' } | | } |  j | | c d 7<q< Wn  |  j s} |  j r™ |  j	 j
 | ƒ t |  _ n  t |  _ t |  _ d S(   s0   
        adds new instance to the motif
        i   N(   R    R   R   R   R   t   rangeR   R   R   R   t   appendt   TrueR   R   R   (   R   t   instancet   it   let(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   add_instance5   s    	
	c         C   s   |  j  t | ƒ ƒ g  |  _ x\ | D]T } | d k rH |  j j d ƒ q# | d k rg |  j j d ƒ q# t d | ƒ ‚ q# Wd S(   sª   
        sets the mask for the motif

        The mask should be a string containing asterisks in the position of significant columns and spaces in other columns
        t   *i   t    i    s2   Mask should contain only '*' or ' ' and not a '%s'N(   R   R   R   R"   R   (   R   R   t   char(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   set_maskH   s    	c         C   s"  |  j  r |  j Sg  |  _ xö t |  j ƒ D]å } i  } x? |  j j D]1 } | ri |  j |  j | | | <qB d | | <qB W|  j rµ xf |  j j D]! } | | c |  j	 | | 7<q Wn4 |  j
 ré x( |  j D] } | | | c d 7<qÈ Wn  |  j j t j | t j |  j ƒ ƒ q) Wd |  _  |  j S(   sÁ   
        returns the PWM computed for the set of instances

        if laplace=True (default), pseudocounts equal to self.background multiplied by self.beta are added to all positions.
        g        i   (   R   R   t   xrangeR   R   R   R   R   R   R   R   R   R"   R   t   COUNT(   R   t   laplaceR%   R   t   lettert   seq(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   pwmX   s$    			"	)	c         C   s¨   |  j  r |  j Sg  |  _ |  j | ƒ } xm t |  j ƒ D]\ } i  } x= |  j j D]/ } t j | | | |  j	 | d ƒ | | <qQ W|  j j
 | ƒ q8 Wd |  _  |  j S(   sP   
        returns the logg odds matrix computed for the set of instances
        i   i   (   R   R   R1   R,   R   R   R   t   matht   logR   R"   (   R   R.   R1   R%   t   dt   a(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   log_oddsx   s    		-	c         C   s’   d } |  j  ƒ  } xy t |  j ƒ D]h } | d 7} xU |  j j D]G } | | | d k r? | | | | t j | | | d ƒ 7} q? q? Wq" W| S(   s=   Method returning the information content of a motif.
        i    i   (   R1   R!   R   R   R   R2   R3   (   R   t   resR1   R%   R5   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   icŠ   s    
5c   	      C   s/  d } d } |  j  ƒ  } xó t |  j ƒ D]â } d } d } x± |  j j D]£ } | | | d k rG | | | | t j | | | d ƒ t j |  j | d ƒ 7} | | | | t j | | | d ƒ t j |  j | d ƒ d 7} qG qG W| | 7} | | | d 7} q( W| r'| t j | ƒ f S| Sd S(   sX   
        Computes expected score of motif's instance and its standard deviation
        g        i    i   N(	   R1   R!   R   R   R   R2   R3   R   t   sqrt(	   R   t   st_devt   exst   varR1   R%   t   ex1t   ex2R5   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt	   exp_score–   s    AL
c         c   s   |  j  s t d ƒ ‚ n  xq t d t | ƒ |  j d ƒ D]O } xF |  j D]; } | j ƒ  | | | |  j !j ƒ  k rI | | f VPqI qI Wq9 Wd S(   so   
        a generator function, returning found positions of instances of the motif in a given sequence
        s   This motif has no instancesi    i   N(   R   R   R,   R   R   R   t   tostring(   R   t   sequencet   posR$   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   search_instances«   s    	'&i    c   	      C   s³   |  j  ƒ  } d } x\ t |  j ƒ D]K } | | | } | sJ |  j | r" y | | | | 7} Wqm qm Xq" q" W| r¯ | s | |  j :} q¯ | t t d „  |  j ƒ ƒ :} n  | S(   s9   
        give the pwm score for a given position
        g        c         S   s   |  S(   N(    (   t   x(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyR	   È   s    (   R6   R,   R   R   R   t   filter(	   R   RA   t   positiont
   normalizedt   maskedt   lot   scoreRB   R5   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt	   score_hit·   s    "g        c   
      c   sÄ   | r |  j  ƒ  } n  | j ƒ  j ƒ  } x– t d t | ƒ |  j d ƒ D]t } |  j | | | | ƒ } | | k r€ | | f Vn  | rH | j | | | | ƒ }	 |	 | k r¼ | |	 f Vq¼ qH qH Wd S(   s}   
        a generator function, returning found hits in a given sequence with the pwm score higher than the threshold
        i    i   N(   t   reverse_complementR@   t   upperR,   R   R   RK   (
   R   RA   RG   RH   t	   thresholdt   botht   rcRB   RJ   t	   rev_score(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt
   search_pwmË   s    'c         C   sª   |  j  | j  k r! t d ƒ ‚ n  d } xr t |  j d | j ƒ D]V } | d k  rj |  j | | ƒ } n | j |  | ƒ } | | k  rB | } | } qB qB Wd | | f S(   s·   
        return the similarity score based on pearson correlation for the given motif against self.

        We use the Pearson's correlation of the respective probabilities.
        s.   Cannot compare motifs with different alphabetsiþÿÿÿi   i    (   R   R   R!   R   t   dist_pearson_at(   R   t   motifRH   t   max_pt   offsett   pt   max_o(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   dist_pearsonÜ   s    !c         C   s=  d } d } d } d } d } t  |  j | | j ƒ } xœ t t  |  j | | j ƒ ƒ D]{ }	 xr |  j j D]d }
 |  |	 |
 } | |	 | |
 } | | } | | } | | | } | | | } | | | } qj WqW W| t |  j j ƒ 9} | | | d | } | | | | d | | | | d } | t j | ƒ S(   Ni    g      ð?(   t   maxR   R!   R   R   R   R2   R9   (   R   RT   RV   t   sxxt   sxyt   sxt   syt   syyt   normRB   t   lt   xit   yit   s1t   s2(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyRS   ò   s&    &

*c         C   s™   d } xr t  |  j d | j ƒ D]V } | d k  rI |  j | | ƒ } n | j |  | ƒ } | | k  r! | } | } q! q! Wd | |  j |  d ƒ | f S(   s   
        A similarity measure taking into account a product probability of generating overlaping instances of two motifs
        g        i   i    (   R!   R   t   dist_product_at(   R   t   otherRU   RV   RW   RX   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   dist_product	  s    !c   	      C   sŠ   d } xy t  t |  j | | j ƒ ƒ D]X } |  | } | | | } x7 |  j j ƒ  D]& \ } } | | | | | | 7} qT Wq& W| | S(   Ni    (   R!   RZ   R   R   t   items(	   R   Rg   RV   t   sR%   t   f1t   f2R   t   b(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyRf     s    &
"c   	      C   s  t  d ƒ } d } g  } xñ t |  j d | j ƒ D]Õ } | d k  rh |  j | | ƒ } |  j | } n | j |  | ƒ } | j | } t |  j | j | ƒ } |  j | j d | } | | | d | | d | } | j | | f ƒ | | k r3 | } | } q3 q3 W| | f S(   sÒ  Calculates the DPQ distance measure between motifs.

        It is calculated as a maximal value of DPQ formula (shown using LaTeX
        markup, familiar to mathematicians):
        
        \sqrt{\sum_{i=1}^{alignment.len()} \sum_{k=1}^alphabet.len() \
        \{ m1[i].freq(alphabet[k])*log_2(m1[i].freq(alphabet[k])/m2[i].freq(alphabet[k])) +
           m2[i].freq(alphabet[k])*log_2(m2[i].freq(alphabet[k])/m1[i].freq(alphabet[k]))
        }
        
        over possible non-spaced alignemts of two motifs.  See this reference:

        D. M Endres and J. E Schindelin, "A new metric for probability
        distributions", IEEE transactions on Information Theory 49, no. 7
        (July 2003): 1858-1860.
        t   infiÿÿÿÿi   i    i   (   t   floatR!   R   t   dist_dpq_att   minR"   (	   R   Rg   t   min_dt   min_ot   d_sRV   R4   t   overlapt   out(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   dist_dpq!  s"    !"c         C   sn   d „  } d } xX t  t |  j | | j ƒ ƒ D]7 } |  | } | | | } | | | | |  j ƒ 7} q/ W| S(   s|   
        calculates the dist_dpq measure with a given offset.

        offset should satisfy 0<=offset<=self.length
        c         S   sƒ   d } xm | j  D]b } |  | | | d } | |  | t j |  | | d ƒ | | t j | | | d ƒ 7} q Wt j | ƒ S(   Ni    i   (   R   R2   R3   R9   (   Rk   Rl   t   alphaRj   R   t   avg(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   dpqP  s
    Ji    (   R!   RZ   R   R   (   R   Rg   RV   Rz   Rj   R%   Rk   Rl   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyRp   J  s    	&
c         C   s\   xU | j  ƒ  } d | k r5 |  j | j d ƒ ƒ Pn  |  j t | j ƒ  |  j ƒ ƒ q Wd S(   sÂ   Reads the motif from the stream (in AlignAce format).

        the self.alphabet variable must be set beforehand.
        If the last line contains asterisks it is used for setting mask
        R(   s   
\cN(   t   readlineR+   t   stripR'   R    R   (   R   t   streamt   ln(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _read^  s    c         C   sƒ   d } x% |  j  D] } | | j ƒ  d } q W| r x; t |  j ƒ D]* } |  j | rd | d } qD | d } qD W| d } n  | S(   s+    string representation of a motif.
        R
   s   
R(   R)   (   R   R@   R,   R   R   (   R   RH   t   strt   instR%   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   __str__l  s    c         C   s   |  j  d k r d S|  j  Sd S(   s%   return the length of a motif
        i    N(   R   R   (   R   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   __len__|  s    c         C   s   | j  |  j ƒ  ƒ d S(   s0   
        writes the motif to the stream
        N(   t   writeR‚   (   R   R}   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _write„  s    c         C   s\   |  j  s |  j ƒ  n  d } x9 t |  j ƒ D]( \ } } | d | | j ƒ  d } q, W| S(   s/   
        FASTA representation of motif
        R
   s   > instance %d
s   
(   R   t   make_instances_from_countst	   enumerateR   R@   (   R   R€   R%   R   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt	   _to_fasta  s    	 c         C   sõ   t  ƒ  } |  j r< xÐ |  j D] } | j | j ƒ  ƒ q Wn© t | _ |  j d | j d <|  j d | j d <|  j d | j d <|  j d | j d <| j d j ƒ  | j d j ƒ  | j d j ƒ  | j d j ƒ  |  j	 | _	 |  j
 | _
 | S(   s;   
        Gives the reverse complement of the motif
        t   Tt   At   Ct   G(   R   R   R   R'   RL   R#   R   R   t   reverseR   R   (   R   R7   R%   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyRL   ™  s     			c         C   s   |  j  | d d d | ƒS(   sq   
        reads the motif from Jaspar .pfm file

        The instances are fake, but the pwm is accurate.
        R   t   ACGTt   make_instances(   t   _from_horiz_matrix(   R   R}   R   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _from_jaspar_pfm°  s    c   	      C   sú   i  |  _  t |  _ | d k r- |  j j } n  d |  _ x | D] } g  |  j  | <q= Wxr | j ƒ  D]d } t t	 | j
 ƒ  j ƒ  ƒ } x1 t | | ƒ D]  \ } } |  j  | j | ƒ q’ W|  j d 7_ qa W|  j d |  j ƒ | t k rö |  j ƒ  n  |  S(   sJ   reads a vertical count matrix from stream and fill in the counts.
        i    i   R(   N(   R   R#   R   R   R   R   R   t	   readlinesR   Ro   R|   t   splitt   zipR"   R+   R†   (	   R   R}   R   R   R%   R~   t   rect   kt   v(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _from_vert_matrix¸  s     			c            s#  | d k r ˆ  j j } n  i  ˆ  _ t ˆ  _ x‡ | D] } | j ƒ  j ƒ  j ƒ  } | d | k ro | d } n  y t	 t
 | ƒ ˆ  j | <Wq4 t k
 r² t	 t | ƒ ˆ  j | <q4 Xq4 Wt t	 ‡  f d †  | ƒ ƒ } t ˆ  j | d ƒ } | ˆ  _ ˆ  j d | ƒ | t k rˆ  j ƒ  n  ˆ  S(   sL   reads a horizontal count matrix from stream and fill in the counts.
        i    i   c            s   ˆ  j  |  d S(   Ni    (   R   (   t   nuc(   R   (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyR	   á  s    R(   N(   R   R   R   R   R#   R   R{   R|   R“   R   t   intR   Ro   t   sumR   R   R+   R†   (   R   R}   R   R   R%   R~   Rj   Ra   (    (   R   sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyR   Í  s&    			c      	      su  d j  ˆ  j j ƒ } g  } t ˆ  _ g  ˆ  _ t t ‡  f d †  ˆ  j j ƒ ƒ } x² t ˆ  j	 ƒ D]¡ } | j
 d ƒ x5 ˆ  j j D]' } | | | ˆ  j | | | | <q Wt | | ƒ | k  ra d Gt | | ƒ G| GH| | c | | | t | | ƒ  7<qa qa Wxe t | ƒ D]W } d } x) t ˆ  j	 ƒ D] } | | | | 7} q/Wt | ˆ  j ƒ } ˆ  j | ƒ qWˆ  j S(   sÄ   Creates "fake" instances for a motif created from a count matrix.

        In case the sums of counts are different for different columnes, the shorter columns are padded with background.
        R
   c            s   ˆ  j  |  d S(   Ni    (   R   (   R™   (   R   (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyR	   õ  s    s   WARNING, column too short(   t   joinR   R   R#   R   R   R›   R   R!   R   R"   R   R   R    R'   (   R   Rx   t   colRj   R%   R   R   t   j(    (   R   sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyR†   ë  s&    		$%-c         C   sØ   i  } x |  j  j D] } g  | | <q Wt |  _ t |  j ƒ } x‰ t |  j ƒ D]x } t t	 d „  |  j  j ƒ ƒ } x% |  j D] } | | | c d 7<q} Wx) |  j  j D] } | | j
 | | ƒ q¨ WqO W| |  _ | S(   s>   Creates the count matrix for a motif with instances.

        c         S   s
   |  d f S(   Ni    (    (   R5   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyR	     s    i   (   R   R   R#   R   R   R   R!   R   R   R   R"   R   (   R   R   R5   Rj   R%   t   ciR   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   make_counts_from_instances  s    		c         C   s  xé t  rë | j ƒ  } | d k s1 | d d k r5 Pn  | j ƒ  j ƒ  } d } x( | | | | j ƒ  k rw | d 7} qP Wd } xH | t | ƒ k  rÈ | | | | j ƒ  k rÈ | | | 7} | d 7} q Wt | |  j ƒ } |  j | ƒ q W|  j	 d t | ƒ ƒ |  S(   s`   
        reads the motif from Jaspar .sites file

        The instances and pwm are OK.
        R
   i    t   >i   R(   (
   R#   R{   R|   t   lowerR   RM   R    R   R'   R+   (   R   R}   R~   R%   R   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _from_jaspar_sites  s     	/c         C   s.   | t  |  j ƒ k r# |  j ƒ  | S|  j Sd S(   sË   Returns the probability distribution over symbols at a given position, padding with background.

        If the requested index is out of bounds, the returned distribution comes from background.
        N(   R!   R   R1   R   (   R   t   index(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   __getitem__7  s    c         C   sŒ   d } xs t  |  j ƒ D]b } d } d } xC |  | j ƒ  D]1 } |  | | | k r9 |  | | } | } q9 q9 W| | 7} q Wt | |  j ƒ S(   s3   Returns the consensus sequence of a motif.
        R
   i    t   X(   R!   R   t   keysR    R   (   R   R7   R%   t   max_ft   max_nR   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt	   consensusA  s    c         C   sŒ   d } xs t  |  j ƒ D]b } d } d } xC |  | j ƒ  D]1 } |  | | | k  r9 |  | | } | } q9 q9 W| | 7} q Wt | |  j ƒ S(   sL   returns the least probable pattern to be generated from this motif.
        R
   g      $@R¦   (   R!   R   R§   R    R   (   R   R7   R%   t   min_ft   min_nR   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   anticonsensusO  s    c         C   s   |  j  |  j ƒ  d ƒ S(   so   Maximal possible score for this motif.

        returns the score computed for the consensus sequence.
        i    (   RK   Rª   (   R   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt	   max_score]  s    c         C   s   |  j  |  j ƒ  d ƒ S(   ss   Minimal possible score for this motif.

        returns the score computed for the anticonsensus sequence.
        i    (   RK   R­   (   R   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt	   min_scored  s    t   PNGc         K   sœ  d d l  } d d l } |  j ƒ  } d } i | d 6| d 6d d 6d d	 6d
 d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d  6d d! 6d d" 6d# d$ 6d d% 6d d& 6d' d( 6d) d* 6d+ d, 6d- d. 6d/ d0 6d1 d2 6d3 d4 6d/ d* 6} x* | j ƒ  D] \ }	 }
 t |
 ƒ | |	 <qW| j | ƒ } | j | | ƒ } | j | ƒ } t | d5 ƒ } | j	 ƒ  } | j
 | ƒ | j ƒ  d S(6   sÜ   
        uses the Berkeley weblogo service to download and save a weblogo of itself
        
        requires an internet connection.
        The parameters from **kwds are passed directly to the weblogo server.
        iÿÿÿÿNs$   http://weblogo.berkeley.edu/logo.cgiRA   t   formatt   18t	   logowidtht   5t
   logoheightt   cmt	   logounitst   AUTOt   kindt   1t   firstnums   Create Logot   commandt   ont   smallsamplecorrectioni    t   symbolsperlinet   96R7   t   ppit	   res_unitst	   antialiasR
   t   titlet   barbitst   xaxist   xaxis_labelt   yaxist   yaxis_labelt   showendss   0.5t   shrinkt	   fineprintt   ticbitst   DEFAULTt   colorschemet   greent   color1t   bluet   color2t   redt   color3t   blackt   color4t   purplet   color5t   oranget   color6t   w(   t   urllibt   urllib2Rˆ   Ri   R€   t	   urlencodet   Requestt   urlopent   opent   readR„   t   close(   R   t   fnameR±   t   kwdsRÝ   RÞ   t   alt   urlt   valuesR–   R—   t   datat   reqt   responset   ft   im(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   weblogok  sX    

c         C   s	  d } y | d |  j  7} Wn n X| d 7} x" |  j j D] } | d | 7} q< W| d 7} |  j st |  j ƒ  n  x„ t |  j ƒ D]s } | d k  r« | d | d 7} n | d	 | d 7} x- |  j j D] } | d
 |  j | | 7} qÊ W| d 7} q„ W| d 7} | S(   s?   Write the representation of a motif in TRANSFAC format
        s   XX
TY Motif
s   ID %s
s   BF undef
P0s    %ss   
i	   s   0%di   s   %ds    %ds   XX
(   R   R   R   R   R    R!   R   R   (   R   R7   R5   R%   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _to_transfac£  s(    

	
c         C   s˜   | d k r |  j j } n  t |  _ |  j d t ƒ } d } xU t |  j ƒ D]D } | d j g  | D] } t	 | | | ƒ ^ qb ƒ 7} | d 7} qL W| S(   sI   Return string representation of the motif as  a matrix.
        
        R.   R
   s   	s   
N(
   R   R   R   R   R   R1   R!   R   Rœ   R€   (   R   R   R1   R7   R%   R5   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _to_vertical_matrix¼  s    	4c         C   s  | d k r |  j j } n  d } | r t |  _ |  j d t ƒ } xÏ | D]M } | d j g  t |  j ƒ D] } t	 | | | ƒ ^ qh ƒ 7} | d 7} qI Wnw |  j
 s³ |  j ƒ  n  |  j } xU | D]M } | d j g  t |  j ƒ D] } t	 | | | ƒ ^ qâ ƒ 7} | d 7} qÃ W| S(   sI   Return string representation of the motif as  a matrix.
        
        R
   R.   s   	s   
N(   R   R   R   R   R   R1   Rœ   R!   R   R€   R   R    R   (   R   R   RG   R7   t   matR5   R%   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _to_horizontal_matrixÊ  s     	=		=c         C   s   |  j  d t d d ƒ S(   s4   Returns the pfm representation of the motif
        RG   R   RŽ   (   Ró   R   (   R   (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   _to_jaspar_pfmà  s    c         C   sW   i |  j  d 6|  j d 6|  j d 6} y | | ƒ  SWn t k
 rR t d ƒ ‚ n Xd S(   sý   Returns a string representation of the Motif in a given format

        Currently supported fromats:
         - jaspar-pfm : JASPAR Position Frequency Matrix
         - transfac : TRANSFAC like files
         - fasta : FASTA file with instances
        s
   jaspar-pfmt   transfact   fastas   Wrong format typeN(   Rô   Rð   Rˆ   t   KeyErrorR   (   R   R±   t
   formatters(    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyR±   å  s    	

N(1   t   __name__t
   __module__t   __doc__R   t   unambiguous_dnaR   R   R    R'   R+   R#   R1   R6   R8   R   R?   RC   RK   RR   RY   RS   Rh   Rf   Rw   Rp   R   R‚   Rƒ   R…   Rˆ   RL   R‘   R   R˜   R   R†   R    R£   R¥   Rª   R­   R®   R¯   Rï   Rð   Rñ   Ró   Rô   R±   (    (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyR      sT   				 							)											
				8		(   Rû   t   Bio.SeqR    t   Bio.SubsMatR   t   Bio.AlphabetR   R2   t   randomt   objectR   (    (    (    sƒ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/Motif/_Motif.pyt   <module>   s
   