
ξ
Ec           @   sd   d  Z  d d l Z d d l Z d d l Z d d l m Z d f  d     YZ d f  d     YZ d S(   s,   Deal with representations of Markov Models.
iN(   t
   MutableSeqt   MarkovModelBuilderc           B   s   e  Z d  Z d Z d   Z d   Z d   Z d   Z d   Z d   Z	 d   Z
 d d d	  Z d
   Z d   Z d   Z d   Z d   Z RS(   si  Interface to build up a Markov Model.

    This class is designed to try to separate the task of specifying the
    Markov Model from the actual model itself. This is in hopes of making
    the actual Markov Model classes smaller.

    So, this builder class should be used to create Markov models instead
    of trying to initiate a Markov Model directly.
    i   c         C   sR   | |  _  | |  _ i  |  _ |  j | |  |  _ i  |  _ |  j | |  |  _ d S(   s;  Initialize a builder to create Markov Models.

        Arguments:

        o state_alphabet -- An alphabet containing all of the letters that
        can appear in the states
       
        o emission_alphabet -- An alphabet containing all of the letters for
        states that can be emitted by the HMM.
        N(   t   _state_alphabett   _emission_alphabett   transition_probt
   _all_blankt   emission_probt   transition_pseudot   _all_pseudot   emission_pseudo(   t   selft   state_alphabett   emission_alphabet(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   __init__   s    						c         C   sB   i  } x5 | j  D]* } x! | j  D] } d | | | f <q  Wq W| S(   s!  Return a dictionary with all counts set to zero.

        This uses the letters in the first and second alphabet to create
        a dictionary with keys of two tuples organized as
        (letter of first alphabet, letter of second alphabet). The values
        are all set to 0.
        i    (   t   letters(   R
   t   first_alphabett   second_alphabett	   all_blankt   first_statet   second_state(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyR   1   s
    c         C   sE   i  } x8 | j  D]- } x$ | j  D] } |  j | | | f <q  Wq W| S(   sc  Return a dictionary with all counts set to a default value.

        This takes the letters in first alphabet and second alphabet and
        creates a dictionary with keys of two tuples organized as:
        (letter of first alphabet, letter of second alphabet). The values
        are all set to the value of the class attribute DEFAULT_PSEUDO.
        (   R   t   DEFAULT_PSEUDO(   R
   R   R   t
   all_countsR   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyR   @   s
    c         C   s[   t  j |  j  } t  j |  j  } t  j |  j  } t  j |  j  } t | | | |  S(   s   Return the markov model corresponding with the current parameters.

        Each markov model returned by a call to this function is unique
        (ie. they don't influence each other).
        (   t   copyt   deepcopyR   R   R   R	   t   HiddenMarkovModel(   R
   R   R   R   R	   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   get_markov_modelO   s    	c         C   s   t  d  t  t |  j j     } x$ |  j j   D] } | |  j | <q5 Wt  d  t  t |  j j     } x$ |  j j   D] } | |  j | <q Wd S(   s  Reset all probabilities to be an average value.

        This resets the values of all allowed transitions and all allowed
        emissions to be equal to 1 divided by the number of possible elements.

        This is useful if you just want to initialize a Markov Model to
        starting values (ie. if you have no prior notions of what the
        probabilities should be -- or if you are just feeling too lazy
        to calculate them :-).

        Warning 1 -- this will reset all currently set probabilities.

        Warning 2 -- This just sets all probabilities for transitions and
        emissions to total up to 1, so it doesn't ensure that the sum of
        each set of transitions adds up to 1.
        i   N(   t   floatt   lenR   t   keysR   (   R
   t   new_trans_probt   keyt   new_emission_prob(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   set_equal_probabilities]   s    %%c         C   s^   x* |  j  j   D] } t j   |  j  | <q Wx* |  j j   D] } t j   |  j | <q= Wd S(   s  Set all probabilities to randomly generated numbers.

        This will reset the value of all allowed transitions and emissions
        to random values.

        Warning 1 -- This will reset any currently set probabibilities.

        Warning 2 -- This does not check to ensure that the sum of
        all of the probabilities is less then 1. It just randomly assigns
        a probability to each
        N(   R   R   t   randomR   (   R
   R   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   set_random_probabilitiesy   s    c         C   s   |  j  |  j |  j  } |  j |  j |  j  } x( |  j j   D] } |  j | | | <q@ Wx( |  j j   D] } |  j | | | <qk W| |  _ | |  _ d S(   s   A convenience function to create transitions between all states.

        By default all transitions within the alphabet are disallowed; this
        is a way to change this to allow all possible transitions.
        N(   R   R   R   R   R   R   (   R
   t	   all_probst
   all_pseudot   set_key(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   allow_all_transitions   s    	c         C   s   x3 | | g D]% } | |  j  k s t d |   q W| | f |  j j   k r | | f |  j j   k r | d k r d } n  | |  j | | f <| d k r |  j } n  | |  j | | f <n t d | | f   d S(   s  Set a transition as being possible between the two states.

        probability and pseudocount are optional arguments
        specifying the probabilities and pseudo counts for the transition.
        If these are not supplied, then the values are set to the
        default values.

        Raises:
        KeyError -- if the two states already have an allowed transition.
        s/   State %s was not found in the sequence alphabeti    s+   Transtion from %s to %s is already allowed.N(   R   t   AssertionErrorR   R   R   t   NoneR   t   KeyError(   R
   t
   from_statet   to_statet   probabilityt   pseudocountt   statet
   pseudcount(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   allow_transition   s    	c         C   sR   y$ |  j  | | f =|  j | | f =Wn' t k
 rM t d | | f   n Xd S(   s   Restrict transitions between the two states.

        Raises:
        KeyError if the transition is not currently allowed.
        s/   Transition from %s to %s is already disallowed.N(   R   R   R)   (   R
   R*   R+   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   destroy_transition   s    c         C   sH   |  j  j | | f  r. | |  j  | | f <n t d | | f   d S(   s   Set the probability of a transition between two states.

        Raises:
        KeyError if the transition is not allowed.
        s(   Transition from %s to %s is not allowed.N(   R   t   has_keyR)   (   R
   R*   R+   R,   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   set_transition_score   s    c         C   sH   |  j  j | | f  r. | |  j  | | f <n t d | | f   d S(   s  Set the default pseudocount for a transition.

        To avoid computational problems, it is helpful to be able to
        set a 'default' pseudocount to start with for estimating
        transition and emission probabilities (see p62 in Durbin et al
        for more discussion on this. By default, all transitions have
        a pseudocount of 1.

        Raises:
        KeyError if the transition is not allowed.
        s(   Transition from %s to %s is not allowed.N(   R   R2   R)   (   R
   R*   R+   t   count(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   set_transition_pseudocount   s    c         C   sH   |  j  j | | f  r. | |  j  | | f <n t d | | f   d S(   s   Set the probability of a emission from a particular state.

        Raises:
        KeyError if the emission from the given state is not allowed.
        s&   Emission of %s from %s is not allowed.N(   R   R2   R)   (   R
   t	   seq_statet   emission_stateR,   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   set_emission_score   s    c         C   sH   |  j  j | | f  r. | |  j  | | f <n t d | | f   d S(   s  Set the default pseudocount for an emission.

        To avoid computational problems, it is helpful to be able to
        set a 'default' pseudocount to start with for estimating
        transition and emission probabilities (see p62 in Durbin et al
        for more discussion on this. By default, all emissions have
        a pseudocount of 1.

        Raises:
        KeyError if the emission from the given state is not allowed.
        s&   Emission of %s from %s is not allowed.N(   R	   R2   R)   (   R
   R6   R7   R4   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   set_emission_pseudocount  s    N(   t   __name__t
   __module__t   __doc__R   R   R   R   R   R    R"   R&   R(   R0   R1   R3   R5   R8   R9   (    (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyR      s    								 				R   c           B   sM   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 RS(   sK   Represent a hidden markov model that can be used for state estimation.
    c         C   s=   | |  _  | |  _ | |  _ | |  _ |  j |  j  |  _ d S(   s  Initialize a Markov Model.

        Note: You should use the MarkovModelBuilder class instead of
        initiating this class directly.

        Arguments:

        o transition_prob -- A dictionary of transition probabilities for all
        possible transitions in the sequence.

        o emission_prob -- A dictionary of emissions probabilities for all
        possible emissions from the sequence states.

        o transition_pseudo -- Pseudo-counts to be used for the transitions,
        when counting for purposes of estimating transition probabilities.

        o emission_pseduo -- Pseudo-counts fo tbe used for the emissions,
        when counting for purposes of estimating emission probabilities.
        N(   t   _transition_pseudot   _emission_pseudoR   R   t   _calculate_from_transitionst   _transitions_from(   R
   R   R   R   R	   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyR     s
    				c         C   sy   i  } xl | j    D]^ } y | | d j | d  Wq t k
 rp g  | | d <| | d j | d  q Xq W| S(   sf  Calculate which 'from transitions' are allowed for each letter.

        This looks through all of the trans_probs, and uses this dictionary
        to determine allowed transitions. It converts this information into
        a dictionary, whose keys are the transition letters and whose
        values are a list of allowed letters to transition to.
        i    i   (   R   t   appendR)   (   R
   t   trans_probst   from_transitionst	   trans_key(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyR?   7  s    !c         C   s   |  j  S(   s,  Get the default transitions for the model.

        Returns a dictionary of all of the default transitions between any
        two letters in the sequence alphabet. The dictionary is structured
        with keys as (letter1, letter2) and values as the starting number
        of transitions.
        (   R=   (   R
   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   get_blank_transitionsN  s    c         C   s   |  j  S(   s'  Get the starting default emmissions for each sequence.
        
        This returns a dictionary of the default emmissions for each
        letter. The dictionary is structured with keys as
        (seq_letter, emmission_letter) and values as the starting number
        of emmissions.
        (   R>   (   R
   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   get_blank_emissionsX  s    c         C   s(   y |  j  | SWn t k
 r# g  SXd S(   s   Get all transitions which can happen from the given state.

        This returns all letters which the given state_letter is allowed
        to transition to. An empty list is returned if no letters are possible.
        N(   R@   R)   (   R
   t   state_letter(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   transitions_fromb  s    c         C   s  |  j  |  j  } |  j  |  j  } i  } i  } | j } d | | d d f <x" | d D] } d | | d f <qX Wx t d t |   D] }	 x | D] }
 | |
 | |	 f } i  } xO |  j |
  D]> } | | |
 f } | | |	 d f } | | } | | | <q Wt | j    } | | | |
 |	 f <x< | j	   D]. } | | | k r:| | |	 d |
 f <Pq:q:Wq Wq Wi  } xJ | D]B } | | t |  d f } | | | d f } | | | | <qWt | j    } d } x- | j	   D] } | | | k r| } qqW| d k s't
 d   t d |  } t d t |   } | j   | } x/ | D]' }	 | j |  | |	 d | f } qbW| j   | j   | f S(   s   Calculate the most probable state path using the Viterbi algorithm.

        This implements the Viterbi algorithm (see pgs 55-57 in Durbin et
        al for a full explanation -- this is where I took my implementation
        ideas from), to allow decoding of the state path, given a sequence
        of emissions.

        Arguments:

        o sequence -- A Seq object with the emission sequence that we
        want to decode.

        o state_alphabet -- The alphabet of the possible state sequences
        that can be generated.
        i   i    it    s)   Didn't find the last state to trace from!(   t   _log_transformR   R   R   t   rangeR   RH   t   maxt   valuesR   R'   R    t   reverseRA   t   toseq(   R
   t   sequenceR   t	   log_transt   log_emissiont   viterbi_probst   pred_state_seqt   state_lettersRG   t   it
   main_statet   emission_partt   possible_state_probst	   cur_statet
   trans_partt   viterbi_partt   cur_probt   max_probR.   R#   t   transition_partt   state_path_probt
   last_statet   traceback_seqt   loop_seq(    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   viterbim  sV    	


c         C   sA   t  j  |  } x+ | j   D] } t j | |  | | <q W| S(   s  Return log transform of the given probability dictionary.

        When calculating the Viterbi equation, we need to deal with things
        as sums of logs instead of products of probabilities, so that we
        don't get underflow errors.. This copies the given probability
        dictionary and returns the same dictionary with everything
        transformed with a log.
        (   R   R   t   matht   log(   R
   R,   t   log_probR   (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyRJ     s    	(
   R:   R;   R<   R   R?   RE   RF   RH   Rd   RJ   (    (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyR     s   	!		
	
		h(   R<   R   Re   R!   t   Bio.SeqR    R   R   (    (    (    s   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/HMM/MarkovModel.pyt   <module>   s    	