ó
LIc           @   sb  d  d l  Z  d  d l m Z d  d l m Z i@ d d 6d d 6d d 6d d 6d d	 6d d
 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d 6d d  6d d! 6d d" 6d d# 6d d$ 6d d% 6d d& 6d d' 6d d( 6d d) 6d d* 6d d+ 6d d, 6d d- 6d d. 6d d/ 6d d0 6d d1 6d d2 6d d3 6d d4 6d d5 6d d6 6d d7 6d d8 6d d9 6d d: 6d d; 6d d< 6d d= 6d d> 6d d? 6d d@ 6d dA 6d dB 6d dC 6d dD 6Z i d5 d6 g dE 6d! d" g dF 6d% d( d' d& d> d= g dG 6d d g dH 6d g dI 6d d g dJ 6d) d, d+ d* g dK 6d  d g dL 6d d7 d g dM 6d. d/ d0 d- g dN 6d d g dO 6d3 d2 d4 d1 g dP 6dA dD dC dB g dQ 6d d d g dR 6d d d
 d	 d d g dS 6d d g dT 6d; d: d< d9 d@ d? g dU 6d8 g dV 6d d d d g dW 6d$ d# g dX 6d d g dY 6Z dZ d\ d[ „  ƒ  YZ d S(]   iÿÿÿÿN(   t   SharpEcoliIndex(   t   SeqIOi    t   TTTt   TTCt   TTAt   TTGt   CTTt   CTCt   CTAt   CTGt   ATTt   ATCt   ATAt   ATGt   GTTt   GTCt   GTAt   GTGt   TATt   TACt   TAAt   TAGt   CATt   CACt   CAAt   CAGt   AATt   AACt   AAAt   AAGt   GATt   GACt   GAAt   GAGt   TCTt   TCCt   TCAt   TCGt   CCTt   CCCt   CCAt   CCGt   ACTt   ACCt   ACAt   ACGt   GCTt   GCCt   GCAt   GCGt   TGTt   TGCt   TGAt   TGGt   CGTt   CGCt   CGAt   CGGt   AGTt   AGCt   AGAt   AGGt   GGTt   GGCt   GGAt   GGGt   CYSt   ASPt   SERt   GLNt   METt   ASNt   PROt   LYSt   STOPt   THRt   PHEt   ALAt   GLYt   ILEt   LEUt   HISt   ARGt   TRPt   VALt   GLUt   TYRt   CodonAdaptationIndexc           B   sD   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   s·  A codon adaptaion index (CAI) implementation.
    
    This class implements the codon adaptaion index (CAI) described by Sharp and
    Li (Nucleic Acids Res. 1987 Feb 11;15(3):1281-95).

    methods:

    set_cai_index(Index):

    This method sets-up an index to be used when calculating CAI for a gene.
    Just pass a dictionary similar to the SharpEcoliIndex in CodonUsageIndices
    module.

    generate_index(FastaFile):

    This method takes a location of a FastaFile and generates an index. This
    index can later be used to calculate CAI of a gene.

    cai_for_gene(DNAsequence):

    This method uses the Index (either the one you set or the one you generated)
    and returns the CAI for the DNA sequence.

    print_index():
    This method prints out the index you used.

    NOTE - This implementation does not currently cope with alternative genetic
    codes, only the synonymous codons in the standard table are considered.
    c         C   s   i  |  _  i  |  _ d  S(   N(   t   indext   codon_count(   t   self(    (    sŠ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/CodonUsage.pyt   __init__?   s    	c         C   s   | |  _  d  S(   N(   RX   (   RZ   t   Index(    (    sŠ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/CodonUsage.pyt   set_cai_indexD   s    c         C   s  |  j  i  k s |  j i  k r- t d ƒ ‚ n  |  j | ƒ xÏ t j ƒ  D]Á } d } g  } x# t | D] } | |  j | 7} qd Wx< t | D]0 } | j |  j | d t t | ƒ | ƒ qŠ Wt | ƒ } x; t	 t t | ƒ ƒ D]# } | | | |  j  t | | <qá WqG Wd S(   s;  Generate a codon usage index from a FASTA file of CDS sequences.
        
        This method takes a location of a Fasta file containing CDS sequences
        (which must all have a whole number of codons) and generates a codon
        usage index. This index can later be used to calculate CAI of a gene.
        sV   an index has already been set or a codon count has been done. cannot overwrite either.g        g      ð?N(
   RX   RY   t
   ValueErrort   _count_codonst   SynonymousCodonst   keyst   appendt   lent   maxt   range(   RZ   t	   FastaFilet   AAt   Sumt   RCSUt   codont   RCSUmaxt   i(    (    sŠ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/CodonUsage.pyt   generate_indexG   s    .c         C   s  d } d } |  j  i  k r+ |  j t ƒ n  | j ƒ  rF | j ƒ  } n  xª t d t | ƒ d ƒ D] } | | | d !} | |  j  k rÇ | d k rï | d k rï | t j |  j  | ƒ 7} | d 7} qï q_ | d k r_ t	 d	 | |  j  f ƒ ‚ q_ q_ Wt j
 | d
 | d ƒ S(   sÚ   Calculate the CAI (float) for the provided DNA sequence (string).
        
        This method uses the Index (either the one you set or the one you generated)
        and returns the CAI for the DNA sequence.
        i    i   R   R5   i   R4   R   R   s!   illegal codon in sequence: %s.
%sg      ð?(   R4   R   R   (   RX   R]   R    t   islowert   upperRe   Rc   t   matht   logt	   TypeErrort   exp(   RZ   t   DNAsequencet   caiValuet   LengthForCaiRl   Rj   (    (    sŠ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/CodonUsage.pyt   cai_for_genee   s     c         C   s÷   t  | d ƒ } t j ƒ  |  _ xÈ t j | d ƒ D]´ } t | j ƒ j ƒ  rd t | j ƒ j	 ƒ  } n t | j ƒ } xo t
 d t | ƒ d ƒ D]U } | | | d !} | |  j k rÈ |  j | c d 7<qŒ t d | | j f ƒ ‚ qŒ Wq1 W| j ƒ  d  S(   Nt   rt   fastai    i   i   s   illegal codon %s in gene: %s(   t   opent
   CodonsDictt   copyRY   R   t   parset   strt   seqRn   Ro   Re   Rc   Rr   t   idt   close(   RZ   Rf   t   handlet
   cur_recordRt   Rl   Rj   (    (    sŠ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/CodonUsage.pyR_   }   s    !c         C   sD   |  j  j ƒ  } | j ƒ  x$ | D] } d | |  j  | f GHq  Wd S(   s*   This method prints out the index you used.s   %s	%.3fN(   RX   Ra   t   sort(   RZ   t   XRl   (    (    sŠ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/CodonUsage.pyt   print_index“   s    
(	   t   __name__t
   __module__t   __doc__R[   R]   Rm   Rw   R_   R†   (    (    (    sŠ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/CodonUsage.pyRW   !   s   					(    (   Rp   t   CodonUsageIndicesR    t   BioR   R{   R`   RW   (    (    (    sŠ   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/SeqUtils/CodonUsage.pyt   <module>   s4   &###########$ # #&#0