
?Vc           @   s\  d  Z  d d l m Z d d l Z d d l Z d d l Z d d l Z d d l m Z d d l	 Z	 d d l
 Z d d l j Z d d l Z d d l Z d d l Z d d  Z d d  Z d d	  Z d
   Z e d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z  d   Z! d   Z" d   Z# d   Z$ d   Z% d S(   s:   
Module containing important auxilary screening functions
i(   t   divisionN(   t   defaultdicti   c         C   sO   |  d k r d St  t j t j t |      } | d | } t |  |  S(   sd   
    Function rounds number in a reasonable manner. Default is to three
    significant digits.
    i    i   (   t   intt   matht   floort   log10t   abst   round(   t   xt   numt   order_of_magnitudet   digits(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   sigDig   s
    $t   GenRefc         C   sX  t  j j |  d  } t  j j |  d  } t  j j |  d  } t d    } t d    } t d    } t d    } yZ t | d  E } t j | d	 d
 }	 x& |	 D] }
 |
 d j   | |
 d <q WWd QXWn t k
 r d d GHn Xy t | d  y } t j | d	 d } | j	   xP | D]H }
 |
 d j   | |
 d <|
 d | |
 d <|
 d | |
 d j   <q-WWd QXWn t k
 rd d GHn Xy t | d  y } t j | d	 d } | j	   xP | D]H }
 |
 d j   | |
 d <|
 d | |
 d <|
 d | |
 d j   <qWWd QXWn t k
 rGd d GHn X| | | | f S(   s   
    Retrieves gene info for the screen type. Location of reference
    files can be changed, defaults to nearby GenRef folder.
    s   Homo_sapiens.gene_infos   Mus_musculus.gene_infos
   ensRef.csvc           S   s   d S(   Ns   N/A(    (    (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   <lambda>=   s    c           S   s   d S(   Ns   N/A(    (    (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyR   >   s    c           S   s   d S(   Ns   N/A(    (    (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyR   ?   s    c           S   s   d S(   Ns   N/A(    (    (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyR   @   s    t   rt	   delimitert   ,i    i   Ns$   Ensembl information file not found.
s   Use -r to change file locations   	i   i   s"   Mouse information file not found.
s"   Human information file not found.
(
   t   ost   patht   joinR   t   opent   csvt   readert   uppert   IOErrort   next(   t   ref_baset   org_file_humant   org_file_mouset   ens_filet   geneID2Namet   geneID2Infot   geneName2IDt   geneEns2Namet   ens_opent   ens_csvt   linet   org_opent   org_csv(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   retrieveInfo/   sL    &

&

&
c      
   C   s`  t  j j |  d  } t t  } t t  } t t  } t  j j |  s] d GH| | | f St | d   } t j | d d } x | D] } t	 |  d k r | d d k r q n  | d	 d
 k r | | d c | d d 7<qI| d	 d k r| | d c | d d 7<qI| d	 d k rI| | d c | d d 7<qIq q WWd QX| | | f S(   sE   
    Returns GO component, process, and function data by geneID.
    t   gene2gos;   GO reference file not found; use -r to change file locationR   R   s   	i   i   t   NOTi   t	   Componenti   i   t   |t   Processt   FunctionN(
   R   R   R   R   t   strt   isfileR   R   R   t   len(   R   t   go_filet   geneID2Compt   geneID2Proct
   geneID2Funt   go_opent   go_csvR%   (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt
   retrieveGOy   s(    ,c      
      sJ  t    f d    } t    f d    } |  s: | | f S|  \ } } t | d  l } t j | d d } xM | D]E } t | d    k r t | d  | | d <qt   | | d <qt WWd  QXt | d  l }	 t j |	 d d }
 xM |
 D]E } t | d    k r(t | d  | | d <q   | | d <q WWd  QX| | f S(   Nc              s     S(   N(    (    (   t   thresh(    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyR      s    c              s     S(   N(    (    (   R9   (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyR      s    R   R   s   	i   i    (   R   R   R   R   R   (   t
   zero_filesR9   t   zero_untt   zero_trtt   zero_unt_filet   zero_trt_filet   zero_unt_opent   zero_unt_csvR%   t   zero_trt_opent   zero_trt_csv(    (   R9   s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   timeZero   s$    
c      
   C   s  t  | |  \ } } i  } i  } t |  d   }	 t j |	 d d }
 xf |
 D]^ } | sa qO n  | s t | d  | | d <qO | | d k rO t | d  | | d <qO qO WWd QXt | d   } t j | d d } xf | D]^ } | s q n  | st | d  | | d <q | | d k r t | d  | | d <q q WWd QXd \ } } d } i  } i  } i  } i  } x | D] } d } d } | | | k  rd } | d 7} n  | | k s| | | k  rd } | d 7} n  | r| r| d 7} q~n  | r| | | <n | | | | <| r2| | | <n | | | | <| | | | <| | | | <q~Wxx | D]p } | | k rg| | | k  r| d 7} q| | | | <| | | <| d 7} | | | | <| | | | <qgqgW| | | f } | | f } | | | | f S(   s^   
    Takes untreated and treated count files and filters them according
    to threshold.
    R   R   s   	i   i    N(   i    i    (   RC   R   R   R   R   (   t   unt_filet   trt_fileR9   R:   t   excludet   zero_unt_rawt   zero_trt_rawt   untreated_rawt   treated_rawt   unt_opent   unt_csvR%   t   trt_opent   trt_csvt   belowUntt   belowTrtt   removedt   treatedt	   untreatedR;   R<   t   entryt   unt   trt   statst	   time_zero(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   filterCounts   sv    %%


c
         C   s   t  |   | }
 t  |  | } t  |  | } t  |  | } t j |
 | d  t j | | d  } | | } | |	 } | S(   s/   
    Function calculates enrichment values
    i   (   t   floatR   t   log(   t   count1t   sum1t   zero1t	   sum_zero1t   count2t   sum2t   zero2t	   sum_zero2t   shiftt   normt   prop1t   prop2t
   prop_zero1t
   prop_zero2t
   log_enricht   shift_enricht   norm_enrich(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   enrichW  s    *

c         C   s  | \ } } t  | j    } t  | j    }	 t  |  j    }
 t  | j    } g  } xk |  D]c } | j |  d j |  ra | j t | | | | | |	 |  | |
 | | | d d 
  qa qa Wt j |  } i  } xI | D]A } t | | | | | |	 |  | |
 | | | | | 
 | | <q Wt t	  } t t	  } g  } g  } x | D] } | j |  d j |  r| j | |  qT| j |  d j
   } | | c | | g 7<| | c | | | f g 7<| j | |  qTW| | | | | f S(   s:   
    Auxilary function to calculate enrichment values
    i    i   (   t   sumt   valuest   splitt
   startswitht   appendRm   t   npt   medianR   t   listR   (   RS   RR   t   neg_namet
   split_markt   KRX   R;   R<   t   total_zero_untt   total_zero_trtt	   total_untt	   total_trtt   neg_rawRT   t	   neg_shiftt
   entry_rhost	   gene_rhost   gene_reft   neg_rhost   tar_rhost   gene(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt
   enrich_allr  sB    





c         C   sU  d } d | | }	 |	 d k  s, |	 d k r< t  j d  n  | t |  d }
 | d k  r.xt | | |   D] \ } } } | | k  r | t j | | | |  |
 |	 | | |  7} qo | d k r | t j | | |
 |	 | | |  7} qo | t j | | |
 |	 | | |  7} qo Wn#| d k rxt | | |   D] \ } } } | | k r| t j | | | |  |
 |	 | | |  7} qM| d k  r| t j | | |
 |	 | | |  7} qM| t j | | |
 |	 | | |  7} qMWnE xB t | | |   D]. \ } } } | t j |	 | | |  7} qW| S(   se   
    Takes precomputed likelihoods and free parameter values and returns the
    log likelihood.
    i    i   g{Gzg)\(?s!   Error: Impossible off target rate(   t   syst   exitR   t   zipR   R[   (   t   rhost   It   hit_ratet   hit_liket
   back_likest	   back_distt	   off_likest   off_ratet   liket	   back_ratet   hit_normt	   back_liket   off_liket   rho(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   likeEB  s8    "	/	%	)"	/	%	)"#c         C   s  d } d t  |	 }
 |
 d k  s, |
 d k r< t j d  n  | d k  r xvt | | |   D]z \ }	 } } | d k r | | k r | t j | d t |  |
 |	 | |  7} q[ | t j |
 |	 | |  7} q[ Wn | d k r|x t | | |   D]z \ }	 } } | d k rV| | k rV| t j | d t |  |
 |	 | |  7} q | t j |
 |	 | |  7} q WnE xB t | | |   D]. \ }	 } } | t j |
 |	 | |  7} qW| S(   se   
    Takes precomputed likelihoods and free parameter values and returns the
    log likelihood.
    i    i   g{Gzg)\(?s   Impossible off target rateg      ?(   t   on_rateR   R   R   R   R[   R   (   R   R   R   R   R   R   R   R   R   R   R   R   R   (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt	   likeshRNA  s&    "	+&"	+&"#c         C   s   d } d | | }	 |	 d k  s, |	 d k r< t  j d  n  xT t | | |   D]@ \ }
 } } | t j | | | |  |	 |
 | |  7} qO W| S(   se   
    Takes precomputed likelihoods and free parameter values and returns the
    log likelihood.
    i    i   g{Gzg)\(?s   Impossible off target rate(   R   R   R   R   R[   (   R   R   R   R   R   R   R   R   R   R   R   R   R   (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   casLike4  s    "	,c         C   sB  t  t |    } g  } x; |  D]3 } | d k  rA t j d  n  | j | |  q W| | d } | } | } x | | k r Pn  | d k s |  | d }	 n d }	 | t |   d k s |  | d }
 n d }
 |	 |
 k r | |	 7} | d 8} n | |
 7} | d 7} |
 d k rs |	 d k rs t j d  qs qs | | | f S(   Ni    s   Negative likelihood errori   s   Interval calculation failure(   RZ   Rn   R   R   Rr   R1   (   t   datat   targett   startt   totalt   new_datat   datumt   total_weightt   min_indt   max_indt   leftt   right(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   findIntervalO  s4    


c   "      C   s`  | d  } i  } i  }	 i  }
 i  } x)|  D]!} |  | } | |  d } | |  d } | | \ } } t  j d d d  } d } d } | | | | | | | | |  } d g } | g } i  } | g | d <x | | k  r| | 7} | j |  g  } x< | D]4 } | | | | | | | | |  } | j |  qW| | t | d  <t j j |  t j t	 |   } | j |  q Wd } x | | k rE| | 8} | j
 d |  g  } x< | D]4 } | | | | | | | | |  } | j |  qW| | t | d  <t j j |  t j t	 |   } | j
 d |  qW| t j j |  } t  j |  } t |  d k st |  d k rt |  GHt j d	  n  t | d | j    \ } } }  t |  }! | | | <| | j |!  | | <d |! | |	 | <| | | |  | f |
 | <| s+ d | | <d |	 | <d |
 | <q+ q+ W| |	 |
 | f S(   s)   
    Function that runs subprocesses
    i    gHz>g?g?i	   i   gffffff?g?s   Error: Unstable computationi   (   i    i    i   (   t   numpyt   linspaceRr   R   t   scipyt   misct	   logsumexpR   R[   R1   t   insertt   expRn   R   R   R   t   argmaxt   maxt   index("   R   R   t   off_distR   t   like_funt	   gene_spant   I_stepR   t   geneIt   geneLt   geneIntervalt   geneDistR   R   R   R   t   min_It   max_It   pos_hit_rateR   R   t   like0t   pos_It   marg_dist_logt   all_distt   distR   t   marg_logt   norm_marg_dist_logt   norm_marg_distR   R   R   t   maxL(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   trial  sv    
		
%
%$	


c   "   
   C   st  | d k r t  } n t } t j |  }	 t j |  }
 y | sQ d d  l } n  Wn t k
 rp d GHt  } n X| s | j |  } | j | t | t	 f d	  } n  | r t |  |	 |
 | | | |  \ } } } } n  | sdi  } i  } i  } i  } g  } |  j
   } t t |  |  d } g  t d t |  |  D] } | | | | !^ q=} x@ | D]8 } i  } x | D] } |  | | | <qtW| j |  qaWg  } x6 | D]. } | j | j | |	 |
 | | | |   qWx | D]z } |   } y | \ } } }  }! Wn t k
 r(t j d  n X| j |  | j |  | j |   | j |!  qWn  | | | | f S(
   Ni   is?   Parallel Python package pp not found. Defaulting to single coreR   R   s
   scipy.misci    s   Subprocess failed(   s   numpys   maths
   scipy.misc(   t   Truet   Falset   stt   gaussian_kdet   ppt   ImportErrort   Servert   TemplateR   R   t   keysR   R1   t   rangeRr   t   submitt	   TypeErrorR   R   t   update("   R   t	   back_rhost   off_rhosR   R   t   numsR   R   t   singleR   R   R   t
   job_servert   fnR   R   R   R   t   GeneRhosSplitR   t   nt   it	   keysSplitt   keyListt	   dictChunkt   keyt   jobst   splitst   jobt   valt   geneIit   geneLit   geneIntervalit	   geneDisti(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   retrieveLikelihoods  s`    	
	6	c         C   s   i  } i  } x |  D] } t  j j j t j j d |  | d t  t j j d | d t   d | | <t  j j j t j j d |  | d t  t j j d | d t   d | | <q W| | f S(   NR   t   maski   (	   R   RW   t   mstatst
   ks_twosampR   t   mat   arrayR   t   mannwhitneyu(   R   R   t   rhoMWt   rhoKSR   (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   calculatePvalsZ  s    "(c         C   s  | d k r t  } n t } y | s3 d d  l } n  Wn t k
 rR d GHt  } n X| s | j |  } | j | t d d	  } n  | r t |  |  \ } } n  | si  } i  } g  }	 |  j   }
 t t	 |
  |  d } g  t
 d t	 |
  |  D] } |
 | | | !^ q } x@ | D]8 } i  } x | D] } |  | | | <q/W|	 j |  qWg  } x' |	 D] } | j | j | |   qeWxe | D]Z } |   } y | \ } } Wn t k
 rt j d  n X| j |  | j |  qWn  | | f S(
   Ni   is?   Parallel Python package pp not found. Defaulting to single coreR   s   scipy.stats.mstatsi    s   Subprocess failed(    (   s   numpys   scipy.stats.mstats(   R   R   R   R   R   R   R   R   R   R1   R   Rr   R   R   R   R   R   (   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   R   t   rhoMWit   rhoKSi(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   retrievePvalso  sN    	
6	c   "      C   sp  t  g  t |  D] } | t j | |   f ^ q  } i  } xX | j   D]J \ } } d t | d g  } d t | d g  } | | f | | <qJ Wt | | | | | | | |	  \ } } } } |
 r d GHt j	 |  n  t
 | d  U } t j | d d d d } x0 | D]( } | j | | | | | | g  qWWd  QXg  } t
 | d	  J } t j | d d d d } x% | D] } | j t | d
   qWWd  QX| j   } g  | D] } | d ^ q} g  | D] } | d
 ^ q} t j t |  | d  } t  t | |   }  i  }! x4 |  D], } d
 |  | d
 t t |   |! | <q*W|! t t |   f S(   Ni   i    s   Previous reference removedt   aR   R   t   lineterminators   
R   i   R   (   t   dictR   t   randomt   samplet   itemsR   t   minR   R   t   removeR   R   t   writert   writerowR   Rr   RZ   R   t   searchsortedt   sortedR   R1   R/   ("   t   draw_numt   perm_numR   R   R   R   R   t   ref_filet   gene2ratR   t   eraseR   t	   perm_rhosR   R   R   R   R   t   permIt   permLt   permIntervalR   t   ref_opent   ref_csvt   numbert   all_perm_ratR%   t
   genes_ratsR   t   genest   ratst   rat_rankt	   gene_rankt   geneP(    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   retrievePerm  s<    7		,!*c   #      C   s  |  \ } } } } | \ } }	 }
 } i  } i  } i  } x|| D]t} | | k r | | | | <| | | | <| | | | <q= n  | | k r | | | | <|	 | | | <|
 | | | <q= n  | | } | | } | d d | d d } | | \ } } | g } d g } d } x | | k  r| | 7} | j  |  g  } t } xz | d d g | t | d  D]V } xM | d d g | t | d  D]) } | rt } qn  | j  | |  qWqfWt j j |  t j t	 |   } | j  |  qWd } x | | k r| | 8} | j
 d |  g  } t } xz | d d g | t | d  D]V } xM | d d g | t | d  D]) } | rt } q}n  | j  | |  q}WqTWt j j |  t j t	 |   } | j
 d |  qW| t j j |  } t j |  } t |  d k s0t |  d k r@t j d  n  t | d | j    \ } }  }! t |  }" | | j |"  | | <d |" | | | <| |  | |! | f | | <q= W| | | f S(   s/   
    Function that calculates combo scores
    i    i   gffffff?g?s   Error: Unstable computation(   Rr   R   R   R   R   R   R   R   R[   R1   R   R   R   Rn   R   R   R   R   R   R   (#   t   data1t   data2R   R   t   geneI1t   geneL1t   geneInterval1t	   geneDist1t   geneI2t   geneL2t   geneInterval2t	   geneDist2R   R   R   R   t   dist1t   dist2R   R   R   R   R   R   R   t   skipt   E_liket   T_likeR   R   R   R   R   R   R   (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   retrieveCombo  sz    

		
))%
))%$	c         C   se  t    \ } } } } i  } i  } i  } x2|  j   D]$\ }	 }
 |	 | k r_ |	 } | | } nN |	 | k r~ |	 } | |	 } n/ |	 | k r | |	 } | | } n |	 } |	 } |	 | k r | |	 } n8 | | k r | | } n | | k r | | } n g  } d t |
 | d g  } d t |
 | d g  } | | f | | <|
 | | <| | | <q4 Wx | j   D] \ }	 } |	 | k r|	 } | | } nN |	 | k r|	 } | |	 } n/ |	 | k r| |	 } | | } n |	 } |	 } | | k rqin g  }
 d t |
 | d g  } d t |
 | d g  } | | f | | <|
 | | <| | | <qiW| | | f S(   Ni   i    (   R(   R   R   R   (   t
   gene_rhos1t
   gene_rhos2R   R    R!   R"   R   t   add_gene_rhos1t   add_gene_rhos2R   t   rhos1t   geneIDt   namet   rhos2R   R   (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt	   comboSpanl  sb    



c   (      C   s  t  g  t |  D] } | t j | |   f ^ q  } t  g  t |  D] } | t j | |  f ^ qG  } t | |  \ } } } t | | | | |	 | | |  } t | | | | |
 | | |  } t | | | |  \ } } } | rd GHt j |  n  t	 | d  U } t
 j | d d d d } x0 | D]( } | j | | | | | | g  q5WWd  QXg  } t	 | d  J } t
 j | d d d d } x% | D] } | j t | d   qWWd  QX| j   }  g  |  D] }! |! d	 ^ q}" g  |  D] }! |! d ^ q}# t j t |  |# d
  }$ t  t |" |$   }% i  }& x4 |% D], }' d |% |' d t t |   |& |' <qKW|& t t |   f S(   Ns   Previous reference removedR   R   R   R   s   
R   i   i    R   (   R   R   R   R   R-  R   R$  R   R   R   R   R   R   R   Rr   RZ   R   R   R   R   R   R1   R/   ((   t	   draw_num1t	   draw_num2R  t
   back_rhos1t
   back_rhos2t	   tar_rhos1t	   tar_rhos2t	   off_rate1t	   off_rate2t	   like_fun1t	   like_fun2R   R  R  R   R  R   t
   perm_rhos1t
   perm_rhos2t   add_perm_rhos1t   add_perm_rhos2t	   perm_spanR  R  R  R  R	  R
  R  R  R  R%   R  R   R  R  R  R  R  R   (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt	   comboPerm  sB    77		,!*(&   t   __doc__t
   __future__R    R   Rs   R   R   t   collectionsR   R   t
   scipy.miscR   t   scipy.statsRW   R   R   R   t   reR   R(   R8   RC   R   RY   Rm   R   R   R   R   R   R   R   R   R   R  R$  R-  R=  (    (    (    s9   /mnt/lab_data/bassik/dmorgens/castle/Scripts/screenFun.pyt   <module>	   s>   J.	.		I	K	.		7	|	X		K	?	s	[