Ñò
Xs*Jc           @   sÃ   d  Z  d d k Z d d k Z d d k Z d d k Z d d k Z d d k Td d k Te i d ƒ Z	 e i d ƒ Z
 d d k Z d Z d d d „  ƒ  YZ d	 d d
 „  ƒ  YZ d d d „  ƒ  YZ d S(   sE   
This module contains the classes to deal with UCSC
known gene files
iÿÿÿÿN(   t   *s   \+s   \-s   Error in UCSC classt   UCSCc           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   s¼   
    Class for keeping known gene information.  This might be too much
    information to carry around when the most usefull parts are really
    the chrom, strand, txStart and txEnd.
    c         C   s^   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ |	 |  _ |
 |  _	 d  S(   N(
   t   namet   chromt   strandt   txStartt   txEndt   cdsStartt   cdsEndt	   exonCountt
   exonStartst   exonEnds(   t   selfR   R   R   R   R   R   R   R	   R
   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   __init__,   s    									c         C   s^   | |  _  | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ | |  _ |	 |  _ |
 |  _	 d  S(   N(
   R   R   R   R   R   R   R   R	   R
   R   (   R   R   R   R   R   R   R   R   R	   R
   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   __set__9   s    									c         C   sÁ   |  i  d |  i d |  i d t |  i ƒ d t |  i ƒ d t |  i ƒ d t |  i ƒ d t |  i ƒ d t |  i	 ƒ d t |  i
 ƒ } y | SWn t i i d |  ƒ d SXd  S(   Nt    s&   No UCSC known gene information for %s
t    (   R   R   R   t   strR   R   R   R   R	   R
   R   t   syst   stderrt   write(   R   t	   outstring(    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   getAllF   s    –(   t   __name__t
   __module__t   __doc__R   R   R   (    (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyR   %   s   		t	   UCSC_litec           B   s)   e  Z d  Z d „  Z d „  Z d „  Z RS(   sh   
    Class for keeping known gene information.  Only partial (most
    usefull) information stored.
    c         C   s1   | |  _  | |  _ | |  _ | |  _ | |  _ d  S(   N(   R   R   R   R   R   (   R   R   R   R   R   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyR   [   s
    				c         C   s1   | |  _  | |  _ | |  _ | |  _ | |  _ d  S(   N(   R   R   R   R   R   (   R   R   R   R   R   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyR   a   s
    				c         C   sl   |  i  d |  i d |  i d t |  i ƒ d t |  i ƒ } y | SWn t i i d |  ƒ d SXd  S(   NR   s&   No UCSC known gene information for %s
R   (	   R   R   R   R   R   R   R   R   R   (   R   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyR   h   s    A(   R   R   R   R   R   R   (    (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyR   V   s   		t
   KnownGenesc           B   st   e  Z d  Z d d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d „  Z d	 „  Z d
 „  Z d „  Z RS(   sE   
    Class to read in UCSC known gene files and get all the info
    c         C   s  h  |  _  t | ƒ } xñ | D]é } t i d | ƒ pÐ | i ƒ  } | i ƒ  } | d |  i  i ƒ  j o g  |  i  | d <n t | d | d | d t | d ƒ t | d ƒ t | d ƒ t | d ƒ t | d	 ƒ | d
 | d ƒ
 } |  i  | d i	 | ƒ q q Wd S(   sÆ   
        Reads in a UCSC known gene file and builds a dictionary
        with chromosomes as keys and lists of bed vals representing
        the genes on each chromosome as values.
        
        t   #i   i    i   i   i   i   i   i   i   i	   N(
   t   gene_coordst   opent   ret   matcht   stript   splitt   keysR   t   atoit   append(   R   t   filet   infilet   linet   slinet   coord(    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyR   {   s    	 %'!c   	      C   s2  h  |  _  x|  i i ƒ  D]} |  i | } xø | D]ð } t i | i ƒ o | i | } | i | } n2 t i | i ƒ o | i | } | i | } n | d j o" t	 | i
 | | i | | ƒ } n t	 | i
 | | i d | ƒ } | |  i  i ƒ  j o g  |  i  | <n |  i  | i | ƒ q3 Wq W|  i  S(   sé   
        Return the promoter coordinates defined by given upstream and
        downstream distances in a bed dictionary

        NOTICE:  if gene is on negative strand, the promoter start
        and end are still sequential
        i    (   t   prom_coordsR   R#   t   plusR    R   R   t   minusR   R   R   R%   (	   R   t   upstreamt
   downstreamR   t   chrom_coordst   gt
   prom_startt   prom_endt   ucsc(    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   getPromoters‘   s*    	  c         C   s   |  i  i ƒ  S(   sV   
        Return a list of the keys - duplicating the function of a dictionary
        (   R   R#   (   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyR#   ¯   s    c         C   s;   d } x. |  i  i ƒ  D] } | t |  i  | ƒ 7} q W| S(   s,   
        Return the number of genes
        i    (   R   R#   t   len(   R   t   numt   c(    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   getNumGenesµ   s
     c         C   s   | |  i  | <d S(   s'   
        Sets a new gene coord
        N(   R   (   R   R   t   bedcoord(    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   __setitem__¿   s    c         C   s)   |  i  i | ƒ o |  i  | St ‚ d S(   sY   
        Returns a bed_val indexed by its name or None if no such bed_val exists
        N(   R   t   has_keyt	   UCSCError(   R   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   __getitem__Æ   s    c         C   s   |  i  i ƒ  d S(   s!   
        Delete, delete;
        N(   R   t   clear(   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   __del__Ï   s    c         C   s   |  i  i | ƒ S(   s+   
        Returns  mapping iterator
        (   R   R<   (   R   t   item(    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   __contains__Õ   s    c         C   s   |  i  i ƒ  S(   s*   
        Returns mapping iterator
        (   R   t   iterkeys(   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   __iter__Û   s    c         C   s   t  |  i ƒ S(   s/   
        Returns number of gene_coords
        (   R6   R   (   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   __len__á   s    c         C   s+   |  i  i | ƒ o |  i  | =n t ‚ d S(   sŽ   
        removes a chrom if its name exists in the dictionary
        -- I guess this could possibly be useful at some point
        
        N(   R   R<   R=   (   R   R   (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   __delitem__ç   s    N(   R   R   R   t   NoneR   R5   R#   R9   R;   R>   R@   RB   RD   RE   RF   (    (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyR   v   s   			
							(    (    (    (   R   R   t   ost   shutilt   timeR   t   matht   stringt   compileR,   R-   t   BEDR=   R   R   R   (    (    (    s@   /woldlab/castor/data00/home/georgi/SICER_v1.01/SICER/lib/UCSC.pyt   <module>   s   <

1 