ó
”wâCc           @   s{   d  d l  Z  d  d l Z d  d l Z d  d l m Z d  d l m Z d  d l m Z d  d l m	 Z	 d d d „  ƒ  YZ
 d S(	   iÿÿÿÿN(   t   kd(   t   Seq(   t   IUPAC(   t	   IUPACDatat   ProteinAnalysisc           B   s}   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 d „ Z d „  Z d „  Z RS(   s’  
	This class contains methods for protein analysis.  The class init method takes
	only one argument, the protein sequence as a string and build a sequence
	object using the Bio.Seq module. This is done just to make sure the sequence
	is a protein sequence and not anything else.
	
	methods:
	
	count_amino_acids:
	
	Simply counts the number times an amino acid is repeated in the protein
	sequence. Returns a dictionary {AminoAcid:Number} and also stores the
	dictionary in self.amino_acids_content.
	
	get_amino_acids_percent:
	
	The same as count_amino_acids only returns the Number in percentage of entire
	sequence. Returns a dictionary and stores the dictionary in
	self.amino_acids_content_percent.
	
	molecular_weight:
	Calculates the molecular weight of a protein.
	
	aromaticity:
	
	Calculates the aromaticity value of a protein according to Lobry, 1994. It is
	simply the relative frequency of Phe+Trp+Tyr.
	
	
	instability_index:
	
	Implementation of the method of Guruprasad et al. (Protein Engineering
	4:155-161,1990). This method tests a protein for stability. Any value above 40
	means the protein is unstable (=has a short half life). 
	
	flexibility:
	Implementation of the flexibility method of Vihinen et al. (Proteins. 1994 Jun;19(2):141-9).
	
	isoelectric_point:
	This method uses the module IsoelectricPoint to calculate the pI of a protein.
	
	secondary_structure_fraction:
	This methods returns a list of the fraction of amino acids which tend to be in Helix, Turn or Sheet.
	Amino acids in helix: V, I, Y, F, W, L.
	Amino acids in Turn: N, P, G, S.
	Amino acids in sheet: E, M, A, L.
	The list contains 3 values: [Helix, Turn, Sheet].
	
	
	protein_scale(Scale, WindwonSize, Edge):
	
	An amino acid scale is defined by a numerical value assigned to each type of
	amino acid. The most frequently used scales are the hydrophobicity or
	hydrophilicity scales and the secondary structure conformational parameters
	scales, but many other scales exist which are based on different chemical and
	physical properties of the amino acids.  You can set several  parameters that
	control the computation  of a scale profile, such as the window size and the
	window edge relative weight value.  WindowSize: The window size is the length
	of the interval to use for the profile computation. For a window size n, we
	use the i- ( n-1)/2 neighboring residues on each side of residue it compute
	the score for residue i. The score for residue is  the sum of the scale values
	for these amino acids,  optionally weighted according to their position in the
	window.  Edge: The central amino acid of the window always has a weight of 1.
	By default, the amino acids at the remaining window positions have the same
	weight, but  you can make the residue at the center of the window  have a
	larger weight than the others by setting the edge value for the  residues at
	the beginning and end of the interval to a value between 0 and 1. For
	instance, for Edge=0.4 and a window size of 5 the weights will be: 0.4, 0.7,
	1.0, 0.7, 0.4.  The method returns a list of values which can be plotted to
	view the change along a protein sequence.  Many scales exist. Just add your
	favorites to the ProtParamData modules.
	c         C   sg   | j  ƒ  r* t | j ƒ  t j ƒ |  _ n t | t j ƒ |  _ d  |  _ d  |  _ t	 |  j ƒ |  _
 d  S(   N(   t   islowerR   t   upperR   t   proteint   sequencet   Nonet   amino_acids_contentt   amino_acids_percentt   lent   length(   t   selft   ProtSequence(    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   __init__U   s    		c         C   sb   t  g  t j D] } | d f ^ q ƒ } x* | j ƒ  D] } |  j j | ƒ | | <q5 W| |  _ | S(   Ni    (   t   dictR   t   protein_letterst   keysR   t   countR
   (   R   t   kt   ProtDict   i(    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   count_amino_acids^   s
    (	c         C   s   |  j  s |  j ƒ  n  i  } xU |  j  j ƒ  D]D } |  j  | d k rf |  j  | t |  j ƒ | | <q, d | | <q, W| |  _ | S(   Ni    (   R
   R   R   t   floatR   R   (   R   t	   PercentAAR   (    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   get_amino_acids_percenth   s    	!	c         C   sa   i  } x, t  j j ƒ  D] } t  j | d | | <q Wd } x |  j D] } | | | 7} qE W| S(   Ng…ëQ¸2@(   R   t   protein_weightsR   R   (   R   t   MwDictR   t   MW(    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   molecular_weightw   s    c         C   s=   |  j  s |  j ƒ  n  |  j  d |  j  d |  j  d } | S(   Nt   Yt   Wt   F(   R   R   (   R   t   Arom(    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   aromaticity„   s    	#c         C   sl   t  j j ƒ  } d } xE t |  j d ƒ D]0 } | |  j | |  j | d } | | 7} q) Wd |  j | S(   Ng        i   g      $@(   t   ProtParamDatat   DIWVt   copyt   rangeR   R   (   R   R&   t   scoreR   t	   DiPeptide(    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   instability_index   s     c   	      C   sà   t  j j ƒ  } d } d d d d d g } g  } x© t |  j | ƒ D]” } |  j | | | !} d } xF t | d ƒ D]4 } | | | | | | | | d | | 7} qu W| | | | d d 7} | j | d	 ƒ qD W| S(
   Ni	   g      Ð?g      Ü?g      ä?g      ê?i   g        i   g      @(   R%   t   FlexR'   R(   R   R   t   append(	   R   R,   t   Windowt   Weightst   ListR   t   SubSeqR)   t   j(    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   flexibility™   s    2c         C   s3   d } x |  j  D] } | t | 7} q W| |  j S(   Ng        (   R   R    R   (   R   t	   ProtGravyR   (    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   gravy¨   s    c         C   sX   d | | d d } d g | d } x* t  | d ƒ D] } | | | | | <q8 W| S(   Ng      ð?i   i   g        (   R(   (   R   t   windowt   edget   unitt   listR   (    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   _weight_list³   s
    g      ð?c      	   C   st  |  j  | | ƒ } g  } d } x | D] } | | 7} q% W| d d } x&t |  j | d ƒ D]} |  j | | | !} d }	 x t | d ƒ D]~ }
 y: |	 | |
 | | |
 | |
 | | | |
 d 7}	 Wq t k
 rt j j d | |
 | | |
 d f ƒ q Xq W| | d | k r?|	 | | | d 7}	 n t j j d | | d ƒ | j |	 | ƒ q_ W| S(   Ng        i   i   s0   warning: %s or %s is not a standard amino acid.
s+   warning: %s  is not a standard amino acid.
(	   R:   R(   R   R   t   KeyErrort   syst   stderrt   writeR-   (   R   t	   ParamDictR.   t   Edget   weightR9   t   sum_of_weightsR   t   subsequenceR)   R2   (    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   protein_scaleÀ   s(     :&c         C   s8   |  j  s |  j ƒ  n  t j |  j |  j  ƒ } | j ƒ  S(   N(   R
   R   t   IsoelectricPointR   t   pi(   R   t   X(    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   isoelectric_pointà   s    	c         C   sÃ   |  j  s |  j ƒ  n  |  j  d |  j  d |  j  d |  j  d |  j  d |  j  d } |  j  d |  j  d |  j  d	 |  j  d
 } |  j  d |  j  d |  j  d |  j  d } | | | f S(   Nt   Vt   IR    R"   R!   t   Lt   Nt   Pt   Gt   St   Et   Mt   A(   R   R   (   R   t   Helixt   Turnt   Sheet(    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   secondary_structure_fractionç   s    	D..(   t   __name__t
   __module__t   __doc__R   R   R   R   R$   R+   R3   R5   R:   RD   RH   RV   (    (    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyR      s   H			
								 	(    (   R<   R%   RE   R    t   Bio.SeqR   t   Bio.AlphabetR   t   Bio.DataR   R   (    (    (    s‰   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/ProtParam.pyt   <module>   s   ï