
N>%Lc           @   sp  d  d l  Z  d  d l Z d  d l Z d  d l m Z d  d l m Z d Z d Z d  d l	 Z	 d  d l
 Z
 d  d l Z d  d l m Z e j d  r e j d  Z n d Z e e	 _ d	   Z d
 d e e e d d d e e e d d  Z d e e d e d d  Z i  e g  e d d  Z d d e i  e e d e e d 	 Z i  d e d  d  d  Z d d d     YZ d S(   iN(   t   strftime(   t   arrayg      @g?(   t   environt   CISTEMATIC_TEMPs   /tmpc         C   sp   y t  |   } Wn  t k
 r2 t  |  d  } n Xt  |  d  } | j d t d  | | f  | j   d S(   sZ    create a log file to write a message from a messenger or append to an existing file.
    t   wt   as   %s: [%s] %s
s   %Y-%m-%d %H:%M:%SN(   t   opent   IOErrort
   writelinesR    t   close(   t   logFilet	   messengert   messaget   logfile(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   writeLog   s     i  i    i   i   c   -      C   s$  i  } t  |   } | j   } d } d } d | k  oG t |  k  n r'g  } x | D] } | d d k r d | k r d } n  d | k rY d } qY qY n  | j   j d  } | d d k r qY n  t | | j    } | j |  qY W| j   d | } | | } | | k r'| } q'n  d } t |  } d } x`| D]X} | d d k rd | k rqd } n  d | k rFd } qFqFn  | j   j d  } | d k ry t | | j    } Wn
 qFn X| | k  rqFqn  |	 rK| | j d	  \ } } | s| d
 } n  | j d  \ } } t |  } t |  } n | d k rt	 j
 | |  d  } | r| | } n | | d
 } t | | d  | } t | | d  | } n[ | d } | r| | } n | | d
 } t | | d  | } t | | d  | } t | |  }  | ret | d | |  }! t | d | |  }" n  | | k r~g  | | <n  t }# |
 rt | |  d k rx/t t | |   D]}$ | r| r| | |$ \ }% }& }' }( }) }* n` | r| | |$ \ }% }& }' }( n= | r5| | |$ \ }& }' }( }) }* n | | |$ \ }& }' }( |& |' d }+ |& | k oq|' k n s|& | k o|' k n s| |& k o| k n s| |' k o| k n r| |& k  r| }& n  |' | k  r| }' n  t |' |&  }( | r(|" |* k r(|" }* |! }) q(n  | rW| rW| |& |' |( |) |* f | | |$ <n` | rz| |& |' |( f | | |$ <n= | r|& |' |( |) |* f | | |$ <n |& |' |( f | | |$ <| d 7} t }# PqqWn  |# s| r
| r
| | j | | | |  |! |" f  ni | r0| | j | | | |  f  nC | rY| | j | | |  |! |" f  n | | j | | |  f  | d 7} n  | rF| d d k rF| GHqFqFW| j   d }, xP | D]H } |, t | |  7}, | r| | j d d    q| | j   qW| r d | GHd |, GHn  | S(   s\   returns a list of merged overlapping regions; 
    can optionally filter regions that have a scoreField fewer than minHits.
    Can also optionally return the label of each region, as well as the 
    peak, if supplied (peakPos and peakHeight should be the last 2 fields).
    Can return the top regions based on score if higher than minHits.
    i    t   #t   pvaluei   t	   readShifts   	t   chrit   :i   t   -i   ii t   cmpc         S   s   t  |  d | d  S(   Ni   (   R   (   t   xt   y(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   <lambda>   t    s   merged %d timess   returning %d regions(   R   t	   readlinest   lent   stript   splitt   floatt   appendt   sortt   intt   stringt   joint   abst   Falset   ranget   TrueR	   (-   t   regionfilenamet   maxDistt   minHitst   verboset	   keepLabelt	   fullChromt
   chromFieldt
   scoreFieldt   padt   compactt   doMerget   keepPeakt	   returnTopt   regionst   infilet   linest	   hasPvaluet   hasShiftt   scorest   linet   fieldst   hitst   minScoret
   mergeCountt   countt   chromt   post   frontt   backt   startt   stopt   labelt   lengtht   peakPost
   peakHeightt   mergedt   indext   rlabelt   rstartt   rstopt   rlent   rpeakPost   rpeakHeightt   midpointt   regionCount(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getMergedRegions#   s    "		


			
# p		# 
& #
	i   iK   c	         C   s[  t  d d g |  }	 t  d d g |  }
 d } d } g  } | d k rd } d } x t | d  D]} t  d d g |  } x |  D] } | d | } | d d k r | | 7} n
 | | 8} | d k  s | | k r q n  | r | d } n d	 } | d d k r| | c | 7<q | | c | 8<q Wd } x | D] } | t |  7} q@W| | k  rg | } | } qg qg W| } n  x+|  D]#} | d | } | d d k r| | 7} n
 | | 8} | d | k  s| | k rqn  d } | r| d } n d	 } | | 7} | r%| j |  n  x$ | d k  rK| d 7} | d 7} q(Wx@ | | k  r| | k  r|	 | c | 7<| d 7} | d 7} qOW| d d k r| | 7} qqWxi t d | d  D]T } |	 | d d |	 | d d
 |	 | d |	 | d |	 | d d |
 | <qWd } g  } xZ t |  D]L } | |
 | k  ra|
 | } | g } q5| |
 | k r5| j |  q5q5W| r.d } | d } x^ | D]V } | r| d } n d	 } | d | } | | k r| d d k r| | 7} qqW| r| | |
 | | | f S| | |
 | | f Sn) | rG| | |
 | | f S| | |
 | f Sd S(   s   find the peak in a list of reads (hitlist) in a region 
    of a given length and absolute start point. returns a 
    list of peaks, the number of hits, a triangular-smoothed 
    version of hitlist, and the number of reads that are 
    forward (plus) sense.
    If doWeight is True, weight the reads accordingly.
    If leftPlus is True, return the number of plus reads left of 
    the peak, taken to be the first TopPos position.
    t   fg        t   autoi    I    i   t   +i   g      ?i   g      "@N(   R   t   xrangeR$   R   R&   (   t   hitListRE   RH   t   readlent   doWeightt   leftPlust   shiftt   returnShiftt   maxshiftt   seqArrayt   smoothArrayt   numHitst   numPlust   regionArrayt	   bestShiftt   lowestScoret	   testShiftt
   shiftArrayt   readt
   currentpost   weightt   currentScoret   scoret   hitIndexRB   t   topNucleotidet   topPost   numLeftPlust   maxPost
   currentPos(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   findPeak   s    
	



R

ic   !      C   sX  |  j    } t } t |  d k r- t } n  t |  d k rg  } x | D] }	 x | |	 D] \ }
 } } } |
 | k r | j |
  n  |
 | k r g  | |
 <d } n | |
 d d } | |
 j d |	 | | | f  q] WqL Wx( | D] } | | j d d    q Wn  i  } x| D]} | r:| | k r:qn  | | } g  } t } xk| D]c\ } }	 } } } | d k rt } n  | | | f | k rWt } g  } xf | D]^ \ } } } | | k r| | k rt } n  | | k  r| | k r| j | | f  qqWt |  d k rg  } t } xc | D][ \ } } } | | f | k r0| j | | | f  | | k r| | k rt } qq0q0W| } n  | r| j | | | f  qqWqWW| r| rqn  |	 | k rg  | |	 <n  x7 | D]/ \ } } } | |	 j | | | | | f  qWqWx | D] }	 | |	 j   q.W| rPi  } d } x | D] }	 g  | |	 <t | |	  } | d k r_d } x{ t |  D]m } | |	 | d }  | d 7} | |  k  r| |	 j | |  d	 t |  d
 d	 f  n  | |	 | d } qW| }  | |	 j | |  d	 t |  d
 d	 f  q_q_W| | f S| Sd S(   s   return a dictionary of cistematic gene features. Requires
    cistematic, obviously. Can filter-out pseudogenes. Will use
    additional regions dict to supplement gene models, if available. 
    Can restrict output to a list of GIDs.
    If regionComplement is set to true, returns the regions *outside* of the
    calculated boundaries, which is useful for retrieving intronic and 
    intergenic regions. maxStop is simply used to define the uppermost 
    boundary of the complement region.
    i    RX   it   customR   c         S   s   t  |  d | d  S(   Ni   (   R   (   R   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR   7  R   t   PSEUDOi   t   nonExont   FN(   t   getallGeneFeaturesR%   R   R'   R   R    R&   t   str(!   t   genomeObjectt   additionalRegionsDictt   ignorePseudot   restrictListt   regionComplementt   maxStopt   featuresDictt   restrictGIDt   sortListRA   RG   RE   RF   RH   t   senset   gidt   featuresByChromDictt   featureListt   newFeatureListt   isPseudot   ftypet   notContainedt   containedListt   fstartt   fstopt   ftype2t   newFListt   complementByChromDictt   complementIndext
   listLengtht   currentStartRL   t   currentStop(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getFeaturesByChromDict  s    
	
	(
			 (

-1
c
         C   s  i  }
 | d k r. | d k r. | r. d GH|
 S| d k rV | d k rV | rV d GH|
 S| d k rr | rr d GH|
 S| r | d k r d GH|
 S| d k r | d k r d GH|
 S| d k  r | d k r d GH|
 S|  j  } |  j   } t |  d k rg  } x | D] } x | | D] \ } } } } | | k rB| j |  n  | | k rag  | | <d } n | | d d	 } | | j d
 | | | | f  qWqWx( | D] } | | j d d    qWn  x| D]} | | } g  } t } xE | D]= \ } } } } } | d k rt } n  | j | | f  qW| rI| d k rIqn  | j   | d d	 } | d d } | d	 d } t | |  } | d k rg| r| d k r| r| | | d k r| | d } qq| } n | r| d k r| } n  | d k r| r| d } n | } |	 rf|  j | | f | d  } | | d k  rf| d } qfn  | d k  r{d } n  | | 8} n  | d k r| } |	 r|  j	 | | f | d  } | | d k  r| d } qn  | d k  rd } n  | | 7} n  | d k r.| | k  r.| d d | } q.n  | d k  r2t |  | k  rd| d	 d | } qdq2n| r| d k r| r| | | d k  r| | d } qq| } n | r| d k r| } n  | d k rV| r| d } n | } |	 r4|  j	 | | f | d  } | | d k  r4| d } q4n  | d k  rId } n  | | 7} n  | d k r| } |	 r|  j | | f | d  } | | d k  r| d } qn  | d k  rd } n  | | 8} n  | d k r| | k  r| d	 d	 | } qn  | d k  r2t |  | k  r2| d d | } q2n  t | |  } | |
 k r[g  |
 | <n  | r|
 | j | | | | | f  q|
 | j | | | | f  qWx |
 D] } |
 | j   qW|
 S(   s   return a dictionary of gene loci. Can be used to retrieve additional 
    sequence upstream or downstream of gene, up to the next gene. Requires
    cistematic, obviously. 
    Can filter-out pseudogenes and use additional regions outside of existing 
    gene models. Use upstreamSpanTSS to overlap half of the upstream region 
    over the TSS.
    If lengthCDS > 0 bp, e.g. X, return only the starting X bp from CDS. If 
    lengthCDS < 0bp, return only the last X bp from CDS.
    i    sA   getLocusByChromDict: asked for no sequence - returning empty dictsR   getLocusByChromDict: asked for only upstream and downstream - returning empty dictsP   getLocusByChromDict: asked for partial CDS but not useCDS - returning empty dictsR   getLocusByChromDict: asked for TSS spanning and partial CDS - returning empty dictsi   getLocusByChromDict: asked for discontinuous partial CDS from start and downstream - returning empty dictsf   getLocusByChromDict: asked for discontinuous partial CDS from stop and upstream - returning empty dictRX   iRv   R   c         S   s   t  |  d | d  S(   Ni   (   R   (   R   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR     R   Rw   i   Ry   i   (
   t   genomeRz   R   R   R    R%   R'   R$   t   leftGeneDistancet   rightGeneDistance(   R|   t   upstreamt
   downstreamt   useCDSR}   R~   t   upstreamSpanTSSt	   lengthCDSt	   keepSenset   adjustToNeighbort   locusByChromDictR   R   R   RA   RG   RE   RF   RH   R   R   R   R   R   R   t   gstartt   gstopt   glent   distancet   nextGene(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getLocusByChromDictu  s    
	
	(
	
								#!g      ?c	   "      C   s  d }	 i  }
 i  } | r3 d } d } d } d } n d } d } d } d } d } d GHt  |  d k r xY | D] } d g | |
 | <qo Wn7 x4 |  D], } x# |  | D] } d g | |
 | <q Wq Wx| D]} | |  k r q n  x$ |  | D] } | | | | | <q Wd | GHd } x| | D]w\ } } } |	 d 7}	 |	 d	 d k rXd
 |	 Gn  | | } | d k  rwd } n  x!|  | | D]} | | } | | } | | } | | } y | | } Wn d } n X| | k r| d 7} qn  | | k r| d 8} Pn  | | k o| k n r| d k  r=| | } n | } | | } | d k r| | }  | d k r| | k  rd }  n | d k rd }  n  |  | k r| d }  n  y |
 | |  c | | 7<Wqd | t |   f GHqXn | | }! |! | }  | d k r%|! | k  r%d }  n | d k r:d }  n  |  | k rS| d }  n  y |
 | |  c | | 7<Wn d | t |   f GHn X| } qqWq$Wq W|
 | f S(   s    returns 2 dictionaries of bin counts and region lengths, given a dictionary of predefined regions,
        a dictionary of reads, a number of bins, the length of reads, and optionally a list of regions 
        or a different weight / tag.
    i    i   i   i   i   s   entering computeRegionBinsg        s   
i s   read %d Ry   i
   s   %s %s(   R   R{   ("   t   regionsByChromDictt   hitDictt   binsR[   t
   regionListt   normalizedTagt   defaultRegionFormatt   fixedFirstBint	   binLengthRL   t   regionsBinst
   regionsLent   regionIDFieldt
   startFieldt	   stopFieldt   lengthFieldt
   senseFieldt   readIDRA   t   regionTuplet   startRegiont   tagStartR   Rl   t	   stopPointRE   RF   t   regionIDRP   t   rsenset   regionBinLengtht	   startdistt   binIDt   rdist(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   computeRegionBins  s    		

	








		

		t   readDatasetc           B   s  e  Z d  Z e d e e e d  Z d   Z d   Z d   Z d   Z	 d   Z
 d   Z d	   Z d
 d d  Z d d  Z d   Z d d  Z d   Z d   Z d d  Z d   Z d   Z d e d  Z e e e e d  Z e e e e d d e e e e e e e e e e d d d d e e d d  Z e e e d d e e e e e e e e e e d d d d  Z d d d e e e e d d  Z d d d d e d  Z d d d d d  Z d d d d d  Z e e e e d d   Z d4 e e d!  Z  d d e e d" e d d#  Z! d$   Z" d d%  Z# d&   Z$ d'   Z% d(   Z& e e e d d)  Z' e e e d*  Z( e e e d+  Z) d,   Z* d- d.  Z+ e d/  Z, e e d0  Z- d d1  Z. d2   Z/ d e d3  Z0 RS(5   sF   Class for storing reads from experiments. Assumes that custom scripts
    will translate incoming data into a format that can be inserted into the 
    class using the insert* methods. Default class subtype ('DNA') includes 
    tables for unique and multireads, whereas 'RNA' subtype also includes a 
    splices table.
    R   c         C   s  d |  _  d |  _ d |  _ d |  _ t |  _ d |  _ d |  _ d |  _ | ru | r\ d GHn  |  j	 |  |  j } n | } t
 j |  |  _  t
 j |  j  _ |  j  j d  | r | d k r d |  _ n	 | |  _ |  j |  j   n |  j d  } | d |  _ y  |  j d  } | d |  _ Wn9 y |  j d t f g  Wq^d GHd	 |  _ q^Xn X| r| rvd
 | GHn	 d | GH|  j   } d GH| j   }	 |	 j   x# |	 D] }
 d |
 d | |
 GHqW| r|  j   } |  j   } |  j d k r;| r;y d t |  t |  f GHWqd | | f GHqXq|  j d k r| r|  j   } y( d t |  t |  t |  f GHWqd | | | f GHqXqn  d |  j   GH|  j   rd GHqd GHn  d S(   s    creates an rds datafile if initialize is set to true, otherwise 
        will append to existing tables. datasetType can be either 'DNA' or 'RNA'.
        R   s   1.1s   caching ....s   PRAGMA temp_store = MEMORYt   DNAt   dataTypet
   rdsVersions&   could not add rdsVersion - read-only ?s   pre-1.0s   INITIALIZED dataset %ss
   dataset %ss	   metadata:s   	s"   
%d unique reads and %d multireadss"   
%s unique reads and %s multireadst   RNAs4   
%d unique reads, %d spliced reads and %d multireadss4   
%s unique reads, %s spliced reads and %s multireadss   default cache size is %d pagess   found indexs   not indexedN(   t   dbcont   memconR   R   R%   t	   memBackedt   memChromt	   memCursort   cachedDBFilet   cacheDBt   sqlitet   connectt   Rowt   row_factoryt   executet   initializeTablest   getMetadatat   insertMetadatat   currentRDSversiont   keysR    t   getUniqsCountt   getMultiCountR!   t   getSplicesCountt   getDefaultCacheSizet   hasIndex(   t   selft   datafilet
   initializet   datasetTypeR+   t   cachet   reportCountt   dbfilet   metadatat	   pnameListt   pnamet   ucountt   mcountt   scount(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   __init__r  sz    										
(c         C   s   y |  j    } Wn d } n Xy | |  j   7} Wn n X|  j d k rn y | |  j   7} Wqn qn Xn  y t |  } Wn d } n X| S(   s;    return the number of usable reads in the dataset.
        i    R   (   R   R   R   R   R!   (   R   t   total(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   __len__  s$    

c         C   s    |  j  d k r |  j   n  d S(   s3    cleanup copy in local cache, if present. 
        R   N(   R   t	   uncacheDB(   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   __del__  s    c         C   s*   t  j   d |  _ t j | |  j  d S(   s,    copy geneinfoDB to a local cache. 
        s   .dbN(   t   tempfilet   mktempR   t   shutilt   copyfile(   R   t   filename(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR     s    c         C   s   t  j |  j |  d S(   s,    copy geneinfoDB to a local cache. 
        N(   R   R   R   (   R   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   saveCacheDB  s    c         C   sI   |  j  d k rE y t j |  j   Wn d |  j  GHn Xd |  _ n  d S(   s.    delete geneinfoDB from local cache. 
        R   s   could not delete %sN(   R   t   ost   removet   cachedDB(   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR     s    c         C   s!   d | | f } |  j  |  d S(   s:    attach another database file to the readDataset.
        s   attach '%s' as %sN(   R   (   R   R   t   asnamet   stmt(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   attachDB  s    c         C   s   d | } |  j  |  d S(   s4    detach a database file to the readDataset.
        s	   detach %sN(   R   (   R   R   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   detachDB  s    
t   *c         C   s0   d | | | | | f } |  j  | d t d S(   s    import into current RDS the table (with columns destcolumns, 
            with default all columns) from the database file asname,
            using the column specification of ascolumns (default all). 
        s&   insert into %s %s select %s from %s.%st   forceCommitN(   R   R'   (   R   R   t   tablet	   ascolumnst   destcolumnsR   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   importFromDB  s    c         C   s   g  } |  j  r! |  j j   } n |  j j   } | d k rI | d 7} n  d | } | j |  | j   } x | D] } | j | d  qs W| S(   sB    get a list of table names in a particular database file.
        R   t   .s3   select name from %ssqlite_master where type='table't   name(   R   R   t   cursorR   R   t   fetchallR   (   R   R   t
   resultListt   sqlR   t   resultst   row(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt	   getTables   s    	
c         C   s=   d } t  |  j | d t d d  } | d k r9 t St S(   s=    check whether the RDS file has at least one index. 
        s5   select count(*) from sqlite_master where type='index't   returnResultsi    (   R!   R   R'   R%   (   R   R   R@   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR     s
    #i c         C   sy   | j  d |  | j  d  | j  d |  j  | j  d  | j  d  |  j d k rk | j  d  n  | j   d S(	   s    creates table schema in database connection acon, which is 
        typically a database file or an in-memory database.
        s   PRAGMA DEFAULT_CACHE_SIZE = %ds3   create table metadata (name varchar, value varchar)s,   insert into metadata values("dataType","%s")s   create table uniqs (ID INTEGER PRIMARY KEY, readID varchar, chrom varchar, start int, stop int, sense varchar, weight real, flag varchar, mismatch varchar)s   create table multi (ID INTEGER PRIMARY KEY, readID varchar, chrom varchar, start int, stop int, sense varchar, weight real, flag varchar, mismatch varchar)R   s   create table splices (ID INTEGER PRIMARY KEY, readID varchar, chrom varchar, startL int, stopL int, startR int, stopR int, sense varchar, weight real, flag varchar, mismatch varchar)N(   R   R   t   commit(   R   t   aconR   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR     s    c         C   s   |  j  j   S(   s\    returns a cursor to file database for low-level (SQL) 
        access to the data.
        (   R   R  (   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getFileCursor,  s    c         C   s   |  j  j   S(   s^    returns a cursor to memory database for low-level (SQL) 
        access to the data.
        (   R   R  (   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getMemCursor2  s    c         C   s
  d } g  } i  } | d k r+ d | } n  |  j  rF |  j j   } n |  j j   } | j d |  | j   } x | D] } | d } | d } | | k r | | | <qy t }	 d }
 xG |	 r| d t |
  } | | k r | | | <t }	 n  |
 d 7}
 q Wqy W| S(	   s+    returns a dictionary of metadata.
        R   s    where name = "%s" s    select name, value from metadataR  t   valuei   R   i   (	   R   R   R  R   R   R  R'   R{   R%   (   R   t	   valueNamet   whereClauseR
  t   resultsDictR	  R  R   R   t   tryingRL   t   newName(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR   8  s0    	

	
	c         C   s@   |  j    } d | k r! d GHd St | d j d  d  Sd S(   s2    returns readsize if defined in metadata.
        t   readsizes+   no readsize parameter defined - returning 0i    t    N(   R   R!   R   (   R   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getReadSizeZ  s
    c         C   s!   t  |  j d d t d d  S(   s)    returns the default cache size.
        s   PRAGMA DEFAULT_CACHE_SIZER  i    (   R!   R   R'   (   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR   d  s    t   uniqsc         C   s   d | } |  j  r% |  j j   } n |  j j   } | j |  g  } x | D] } t | d d j    d k  rz qN n  | r | d | k r | j | d  q qN | d d | k rN | j | d d  qN qN W| j   | S(   s:    returns a list of distinct chromosomes in table.
        s   select distinct chrom from %sRA   i   i   (	   R   R   R  R   R   R   R   R   R    (   R   R   R-   t	   statementR	  R
  R  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getChromosomesj  s     
	 
c   
      C   sE  d } |  j  r! |  j j   } n |  j j   } | r{ y/ | j d |  t | j   d d  } Wq{ d | GHq{ Xn  | r | j d |  y3 t | j   d d  } | | k r | } n  Wq q Xn  | r)| j d |  y3 t | j   d d  }	 |	 | k r|	 } n  Wq)q)Xn  | rAd | | f GHn  | S(   sI    returns the maximum coordinate for reads on a given chromosome.
        i    s/   select max(start) from uniqs where chrom = "%s"s,   couldn't retrieve coordMax for chromosome %ss2   select max(startR) from splices where chrom = "%s"s/   select max(start) from multi where chrom = "%s"s   %s maxCoord: %d(   R   R   R  R   R   R!   R  (
   R   RA   R+   t   doUniqst   doMultit	   doSplicest   maxCoordR	  t	   spliceMaxt   multiMax(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getMaxCoordinate  s:    	ic   (      C   s(  d } g  } i  } d } | d k r< | |  j  k r< d } n  | d k rh | |  j  k rh | d | 7} n  | d k r | d k r | d 7} n d } | r | d 7} | d | 7} | d 7} q | d | 7} n  | d	 k r| d k r | d 7} n d } | d
 | 7} n  | d	 k rG| d k r0| d 7} n d } | d | 7} n  t |  d k r| d k rr| d 7} n d } | d | d 7} n  | r| d k r| d 7} n d } | d 7} n  | d k s| d k r| d k r| d 7} n d } | d | 7} n  d } | r"d } d } ne d } | r;| d 7} n  | sN| d 7} n  | ra| d 7} n  | rt| d 7} n  |	 r| d 7} n  | d k r| d | 7} n  | r| d | | } | r| d | d | | 7} qn | d | } | rE|  j rd/ |  j _ |  j j   } n d/ |  j _ |  j j   } | d 7} nq | r|  j rf|  j j   } n |  j j   } | d  7} n4 |  j r|  j j   } n |  j j   } | d! 7} | j |  | r4g  | D]- } t	 | d  | d" t
 | d#  g ^ q| | <|  j r"t j |  j _ q$t j |  j _ nd }  d }! d }" x| D]} | d$ }# | rp| d% } n | d% d& } | r| |  k rg  | | <| }  | }$ n | r,|# }% d' |# k r|# j d'  \ }% }& n  d( |% k r| r|# j d(  \ }% }" n  |% |! k r,g  | |% <|% }! |% }$ q,n  t	 | d)  g }' | r_|' j t	 | d*   n  | sy|' j | d+  n  | r|' j t
 | d,   n  | r|' j | d-  n  |	 r|' j | d.  n  |
 r|' j |#  n  | r|' j |  n  | r|' j |"  n  | |$ j |'  qMW| S(0   s    returns a dictionary of reads in a variety of formats 
        and which can be restricted by chromosome or custom-flag.
        Returns unique reads by default, but can return multireads 
        with doMulti set to True.
        R   s    where s    chrom = "%s" s    and s    flag LIKE "%s   %ss   %"s    flag = "%s" is    start > %d s    stop < %d i    s    readID LIKE  's   %' s    mismatch != '' RX   R   s    sense = '%s' s!   select start, sense, sum(weight) s    GROUP BY start, sense s   select ID, chrom, start, readIDs   ,stops   ,senses   ,weights   ,flags	   ,mismatchs
    LIMIT %d s    from uniqs s    UNION ALL s    from multi s    order by starts    order by readID, start s    order by chrom, starti   i   R   RA   i   s   ::t   /RE   RF   R   Rl   t   flagt   mismatchN(   R   R   R   t   NoneR   R   R  R   R   R!   R   R   R   R   R   ((   R   R+   t   bothEndst   noSenseR-   RA   R&  t
   withWeightt   withFlagt   withMismatcht   withIDt	   withChromt
   withPairIDR  R  t   findallOptimizet
   readIDDictt   readLikeRE   RF   t   limitt   hasMismatcht   flagLiket   strandR  R
  R  t   groupByt   selectClauseR   R	  R  t   currentChromt   currentReadIDt   pairIDR   t   dictKeyt	   theReadIDt   multiplicityt   newrow(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getReadsDict  s    	
	 			
>	

	
c   !      C   s  d } g  } i  } t  } | d k r3 | |  j k sE | d k sE | rN d } n  | d k r | |  j k r | d | 7} t } n  | d k r | r | d 7} n  | r | d 7} | d | 7} | d 7} n | d | 7} t } n  | r| r | d 7} n  | d	 7} n  | d k rC| d k r,| d 7} n d } | d
 | 7} n  | d k r| d k rh| d 7} n d } | d | 7} n  | d k r| d k r| d 7} n d } | d | 7} n  d } | s| d 7} n  | r| d 7} n  | r| d 7} n  | r| d 7} n  |  j r(|  j j   } n |  j j   } | d k r| rd | } | j |  x\ | D]1 } | r| d } n | d d } g  | | <qhWn  | d k r| rg  | | <n  | d | d } | j |  d } x| D]} d } | d } | r| d } n | d d } | r{d | k rM| j d  \ } } n | } | | k rg  | | <| } | } qn | } t	 | d  g }  |  j
 t	 | d   |  j
 t	 | d   |  j
 t	 | d   | s|  j
 | d  n  | r|  j
 t | d    n  | r-|  j
 | d!  n  | rG|  j
 | d"  n  |	 r]|  j
 |  n  |
 rs|  j
 |  n  | r|  j
 |  n  | r| | j
 |  d#  |  d$  | | j
 |  d#  q| | j
 |   qW| S(%   s    returns a dictionary of spliced reads in a variety of 
        formats and which can be restricted by chromosome or custom-flag.
        Returns unique spliced reads for now.
        R   s    where s    chrom = "%s" s    and s    flag LIKE "%s   %ss   %"s    flag = "%s" s    mismatch != '' s    sense = '%s' is    startL > %d s    stopR < %d s6   select ID, chrom, startL, stopL, startR, stopR, readIDs   , senses   , weights   , flags
   , mismatchs#   select distinct chrom from splices RA   i   s    from splices s    order by chrom, startLi    R   R%  t   startLt   stopLt   startRt   stopRR   Rl   R&  R'  i   i   (   R%   R   R'   R   R   R  R   R   R   R!   R   R   (!   R   R+   R*  R-   RA   R&  R+  R,  R-  R.  R/  R0  R2  t	   splitReadR5  R6  RE   RF   R7  R  R
  R  t
   conditionsR9  R	  R   R  R;  R<  R   R>  R=  R@  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getSplicesDictI  s    -		
		


t   bothc	         C   s  d }	 d }
 d } d } | d k r1 d | } n  | rl y" t  |  j | | | |   }	 Wql d }	 ql Xn  | r y" t  |  j | | | |   }
 Wq d }
 q Xn  | r y" t  |  j | | | |   } Wq d } q Xn  | r |	 |
 | } | S|	 |
 | f Sd S(   s0    return read counts for a given region.
        i    R   RX   R   s    sense ='%s' N(   RX   R   (   R   R   R   R   (   R   RA   t   rmint   rmaxR  t   multit   splicest   reportCombinedR   R   R   R   t   restrictR   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt	   getCounts  s2    """c   
      C   s  d } d } | d k sK | d k r3 | |  j  k sK | d k sK | d k rT d } n  | d k r | |  j  k r | d | d 7} n  | d k r t |  d k r | d 7} n  | d t |  7} n  | d k rt |  d k r | d 7} n  | d	 t |  7} n  | d k r@t |  d k r3| d 7} n  | | 7} n  |  j r[|  j j   } n |  j j   } | r| j d
 |  n | j d |  | j   }	 y t	 |	 d  } Wn d } n X| S(   s7    returns the number of row in the uniqs table.
        R   i    s    where s   chrom="s   " i   s    and s   start >= %ss   start <= %ss3   select count(distinct chrom+start+sense) from uniqss   select sum(weight) from uniqs(
   R   R   R{   R   R   R  R   R   t   fetchoneR!   (
   R   RA   RJ  RK  RO  t   distinctt   whereclauseR@   R	  t   result(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR     s<    ?		
c   	      C   s  d } d } | d k sK | d k r3 | |  j  k sK | d k sK | d k rT d } n  | d k r | |  j  k r | d | d 7} n  | d k r t |  d k r | d 7} n  | d t |  7} n  | d k rt |  d k r | d 7} n  | d	 t |  7} n  | d k r@t |  d k r3| d 7} n  | | 7} n  |  j r[|  j j   } n |  j j   } |  j r|  j j   } n |  j j   } | j d
 |  | j   } y t	 | d  } Wn d } n X| S(   s9    returns the number of row in the splices table.
        R   i    s    where s   chrom="s   " i   s    and s   startL >= %ss   startL <= %ss   select sum(weight) from splices(
   R   R   R{   R   R   R  R   R   RQ  R!   (	   R   RA   RJ  RK  RO  RS  R@   R	  RT  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR     s>    ?			
c   	      C   s  d } d } | d k sK | d k r3 | |  j  k sK | d k sK | d k rT d } n  | d k r | |  j  k r | d | d 7} n  | d k r t |  d k r | d 7} n  | d t |  7} n  | d k rt |  d k r | d 7} n  | d	 t |  7} n  | d k r@t |  d k r3| d 7} n  | | 7} n  |  j r[|  j j   } n |  j j   } | j d
 |  | j   } y t	 | d  } Wn d } n X| S(   sD    returns the number of distinct readIDs in the multi table.
        R   i    s    where s   chrom="s   " i   s    and s   start >= %ss   start <= %ss   select sum(weight) from multi(
   R   R   R{   R   R   R  R   R   RQ  R!   (	   R   RA   RJ  RK  RO  RS  R@   R	  RT  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR   5  s8    ?		
c         C   s:  d } d } | d k r% d | } n  | r8 | d 7} n  | rj t  |  d k r] | d 7} n  | d 7} n  | r t  |  d k r | d 7} n  | d 7} n  | d | 7} |  j r |  j j   } n |  j j   } | j |  | j   }	 | rg  |	 D] }
 |
 j d	  d d ^ q Sg  |	 D] }
 |
 d ^ q"Sd
 S(   s    get readID's.
        R   i    s
    LIMIT %d s   select readID from uniqs s    union s   select readID from multi s   select readID from splices s    group by readID R%  N(   R   R   R   R  R   R   R  R   (   R   R  RL  RM  t   pairedR4  R   t	   limitPartR	  RT  R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt
   getReadIDsY  s.    	(c         C   s  i d d 6d d 6d d 6d d 6d d 6} |  j    } | rG | g } n |  j   } | j   i  } x<| D]4} | r d | GHn  g  | | <|  j d t d | d	 t d
 t d t  }	 | ry|  j d k ry|  j d t d | d	 t d t d t  }
 |
 j   } xu| D]j} |
 | d \ } } } } } } | j	 d  } x2| D]*} d | k r\qDn  t
 |  } | d k r| d } | | d } t | d | d ! } nN | d k r| | d } | | | d } | t | d | d ! d } n  t |  t |  d } d } t |  t |  k r=| | d } n | | } | | } | | j | | | | g  qDWqWn  | |	 k rqj n  x|	 | D]\ } } } | j	 d  } x | D] } d | k rqn  t
 |  } | d k r| d } | | d } t | d | d ! } nN | d k rk| | d } | | | d } | t | d | d ! d } n  | | d } | | j | | | | g  qWqWqj W| S(   sB    returns the uniq and spliced mismatches in a dictionary.
        t   Tt   At   Ct   Gt   Ns%   getting mismatches from chromosome %sR-   RA   R-  R1  R5  R   R2  i    t   ,RX   i   R   (   R  R  R    RA  R'   R%   R   RH  R   R   R   R!   R   (   R   t   mischromR+   t
   useSplicest   revcompR[   t   hitChromListt   snpDictt   achromR   t
   spliceDictt   spliceIDListt   kt   startpost   lefthalft
   rightstartt   endsposR   t
   mismatchest   spMismatchListR'  t
   change_lent   change_fromt   change_baset
   change_post	   firsthalft
   secondhalft	   change_atRE   t   mismatchList(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getMismatchesx  sp    )

** 
"

(
")g      ?c	         C   s_  |  j    }	 t |	 d j d  d  }
 |	 d } t |
  } d | } | d k rw |  j | d | d | |
 } n | | |
 } t d d	 g |  } |  j d
 t d | d t d | d | d | d t  } | d k  r d } n  x | | D] \ } } } x | D]y } | | | } | s:| d k rQ| d k rQ| | c | | 7<q| d k r| d k r| | c | | 8<qqWq W~ | r[| d k r[| d k r|  j d
 t d | d t d | d |  } n |  j d
 t d | d t  } | | k rUxR| | D]C\ } } } } } } | | | k  rx t t	 | |   D]q } | | | } | s}| d k r| d k r| | c | 7<qJ| d k rJ| d k rJ| | c | 8<qJqJWx t t	 | |   D]q } | | | } | s	| d k r| d k r| | c | 7<q| d k r| d k r| | c | 8<qqWqqWn  ~ n  | S(   s   return a profile of the chromosome as an array of per-base read coverage....
            keepStrand = 'both', 'plusOnly', or 'minusOnly'
        R  R  i    R   g      ?R  R   RV   g        R-   RA   R+  RE   RF   R1  RX   t	   minusOnlyR   t   plusOnlyR   R.  (
   R   R!   R   R&   R$  R   RA  R'   RH  R$   (   R   t
   chromosomet   cstartt   cstopt   useMultiR_  t   normalizationFactort   trackStrandt
   keepStrandR   R[   R   t   readlenRanget   scalet   lastNTt
   chromModelR   t   hstartR   Rl   RL   Rk   Rd  t   Lstartt   Lstopt   Rstartt   RstopR   t   readName(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   getChromProfile  sR    

"6	-#!c         C   s$   |  j  j d |  |  j  j   d S(   sL    inserts a list of (pname, pvalue) into the metadata
        table.
        s.   insert into metadata(name, value) values (?,?)N(   R   t   executemanyR  (   R   t
   valuesList(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR     s    c         C   sZ   d t  |  | f } | d k r9 | d t  |  7} n  |  j j |  |  j j   d S(   sM    update a metadata field given the original value and the new value.
        s.   update metadata set value='%s' where name='%s'R   s    and value='%s' N(   R{   R   R   R  (   R   R   t   newValuet   originalValueR   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   updateMetadata  s
    c         C   s$   |  j  j d |  |  j  j   d S(   su    inserts a list of (readID, chrom, start, stop, sense, weight, flag, mismatch)
        into the uniqs table.
        sn   insert into uniqs(ID, readID, chrom, start, stop, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?)N(   R   R  R  (   R   R  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   insertUniqs  s    c         C   s$   |  j  j d |  |  j  j   d S(   su    inserts a list of (readID, chrom, start, stop, sense, weight, flag, mismatch)
        into the multi table.
        sn   insert into multi(ID, readID, chrom, start, stop, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?)N(   R   R  R  (   R   R  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   insertMulti  s    c         C   s$   |  j  j d |  |  j  j   d S(   s    inserts a list of (readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch)
        into the splices table.
        s   insert into splices(ID, readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?,?,?)N(   R   R  R  (   R   R  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   insertSplices  s    c         C   s   d } | d k r d } n  | r; |  j  j d | |  n  | r[ |  j  j d | |  n  |  j d k r | r |  j  j d | |  |  j  j d | |  n  |  j  j   d	 S(
   s7   update reads on file database in a list region of regions for a chromosome to have a new flag.
            regionsList must have 4 fields per region of the form (flag, chrom, start, stop) or, with 
            sense set to '+' or '-', 5 fields per region of the form (flag, chrom, start, stop, sense).
        R   RI  s    and sense = ? sG   UPDATE uniqs SET flag = ? where chrom = ? and start >= ? and start < ? sG   UPDATE multi SET flag = ? where chrom = ? and start >= ? and start < ? R   s\   UPDATE splices SET flag = flag || ' L:' || ? where chrom = ? and startL >= ? and startL < ? s\   UPDATE splices SET flag = flag || ' R:' || ? where chrom = ? and startR >= ? and startR < ? N(   R   R  R   R  (   R   t   regionsListR  RL  RM  R   RO  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt	   flagReads  s    	c         C   sw   | r |  j  j d |  n  | r: |  j  j d |  n  |  j d k rf | rf |  j  j d |  n  |  j  j   d S(   sl    set the flag fields in the entire dataset to clear. Useful for rerunning an analysis from scratch.
        s   UPDATE uniqs SET flag = '%s's   UPDATE multi SET flag = '%s'R   s   UPDATE splices SET flag = '%s'N(   R   R   R   R  (   R   R&  R  RL  RM  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   setFlags&  s    c         C   sk   | r |  j  j d  n  | r2 |  j  j d  n  |  j d k rZ | rZ |  j  j d  n  |  j  j   d S(   sn    reset the flag fields in the entire dataset to clear. Useful for rerunning an analysis from scratch.
        s   UPDATE uniqs SET flag = ''s   UPDATE multi SET flag = ''R   s   UPDATE splices SET flag = ''N(   R   R   R   R  (   R   R  RL  RM  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt
   resetFlags3  s    c         C   s   |  j  j d |  d  S(   NsI   UPDATE multi SET weight = ? where chrom = ? and start = ? and readID = ? (   R   R  (   R   t   readList(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   reweighMultireads@  s    t   ONc         C   s/   y |  j  j d |  Wn d | GHn Xd  S(   Ns   PRAGMA SYNCHRONOUS = %ss-   warning: couldn't set PRAGMA SYNCHRONOUS = %s(   R   R   (   R   R  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   setSynchronousPragmaC  s    c         C   s5   |  j  j d |  | r1 |  j  j d |  n  d  S(   Ns   PRAGMA CACHE_SIZE = %ds   PRAGMA DEFAULT_CACHE_SIZE = %d(   R   R   (   R   R   t   default(    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt
   setDBcacheI  s    c         C   s   |  j  r |  j j   } n |  j j   } | j |  | rM | j   } | S| r| |  j  rl |  j j   q| |  j j   n  d  S(   N(   R   R   R  R   R   R  R  (   R   R  R  R   R	  RT  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR   N  s    		c         C   s   | |  j    k r" |  j |  n  |  j d  |  j j d  d GH|  j j d  d GH|  j j d  d GH|  j j d  d	 GH|  j d
 k r |  j j d  d GH|  j j d  d GH|  j j d  d GHn  |  j j   |  j d  d S(   s    Builds the file indeces for the main tables.
            Cache is the number of 1.5 kb pages to keep in memory. 
            100000 pages translates into 150MB of RAM, which is our default.
        t   OFFs-   CREATE INDEX uPosIndex on uniqs(chrom, start)s   built uPosIndexs(   CREATE INDEX uChromIndex on uniqs(chrom)s   built uChromIndexs-   CREATE INDEX mPosIndex on multi(chrom, start)s   built mPosIndexs(   CREATE INDEX mChromIndex on multi(chrom)s   built mChromIndexR   s0   CREATE INDEX sPosIndex on splices(chrom, startL)s   built sPosIndexs1   CREATE INDEX sPosIndex2 on splices(chrom, startR)s   built sPosIndex2s*   CREATE INDEX sChromIndex on splices(chrom)s   built sChromIndexR  N(   R   R  R  R   R   R   R  (   R   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt
   buildIndex]  s(    c         C   s   y |  j  d  |  j j d  |  j j d  |  j j d  |  j j d  |  j d k r |  j j d  y |  j j d  Wn n X|  j j d	  n  |  j j   Wn d
 GHn X|  j  d  d S(   s5    drops the file indices for the main tables.
        R  s   DROP INDEX uPosIndexs   DROP INDEX uChromIndexs   DROP INDEX mPosIndexs   DROP INDEX mChromIndexR   s   DROP INDEX sPosIndexs   DROP INDEX sPosIndex2s   DROP INDEX sChromIndexs   problem dropping indexR  N(   R  R   R   R   R  (   R   (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt	   dropIndexy  s"    	c         C   s  d |  _  t j d  |  _  |  j |  j   |  j j   } d } | d k rk d | GHd | } | |  _ n	 d |  _ |  j  j d  |  j  j d  |  j  j d  | j d  } g  } x) | D]! } | j | d	 | d
 f  q W|  j  j	 d |  | j d |  } g  } x_ | D]W } | j | d | d t
 | d  t
 | d  | d | d | d | d f  qW|  j  j	 d |  | j d |  } g  } x_ | D]W } | j | d | d t
 | d  t
 | d  | d | d | d | d f  qW|  j  j	 d |  |  j d k r| j d |  } g  } xy | D]q } | j | d | d t
 | d  t
 | d  t
 | d  t
 | d  | d | d | d | d f
  qCW|  j  j	 d |  n  | r| d k r5|  j  j d  |  j  j d   |  j d k r|  j  j d!  |  j  j d"  qq|  j  j d#  |  j  j d$  |  j d k r|  j  j d%  |  j  j d&  qn  t |  _ t j |  j  _ |  j  j   d' S((   s    makes a copy of the dataset into memory for faster access.
        Can be restricted to a "full" chromosome. Can also build the 
        memory indices.
        R   s   :memory:s
   memSync %ss    where chrom = "%s" s   PRAGMA temp_store = MEMORYs   PRAGMA CACHE_SIZE = 1000000s   delete from metadatas    select name, value from metadataR  R  s.   insert into metadata(name, value) values (?,?)sK   select chrom, start, stop, sense, weight, flag, mismatch, readID from uniqsR   RA   RE   RF   R   Rl   R&  R'  sn   insert into uniqs(ID, readID, chrom, start, stop, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?)sK   select chrom, start, stop, sense, weight, flag, mismatch, readID from multisn   insert into multi(ID, readID, chrom, start, stop, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?)R   s^   select chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch, readID from splicesRB  RC  RD  RE  s   insert into splices(ID, readID, chrom, startL, stopL, startR, stopR, weight, sense, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?,?,?)s&   CREATE INDEX uPosIndex on uniqs(start)s&   CREATE INDEX mPosIndex on multi(start)s*   CREATE INDEX sPosLIndex on splices(startL)s*   CREATE INDEX sPosRIndex on splices(startR)s-   CREATE INDEX uPosIndex on uniqs(chrom, start)s-   CREATE INDEX mPosIndex on multi(chrom, start)s1   CREATE INDEX sPosLIndex on splices(chrom, startL)s1   CREATE INDEX sPosRIndex on splices(chrom, startR)N(   R   R   R   R   R   R  R   R   R   R  R!   R   R'   R   R   R   R  (   R   RA   RL   R  RS  R
  t   results2R  (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   memSync  sb    		
	UUo	N(1   t   __name__t
   __module__t   __doc__R%   R'   R   R   R   R   R   R   R   R   R  R  R   R   R  R  R   R  R   R  R$  RA  RH  RP  R   R   R   RW  R(  Ru  R  R   R  R  R  R  R  R  R  R  R  R  R   R  R  R  (    (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyR   j  sX   H								
		"	
	'N?}! '($A5							(    (   t   randomR"   t   sqlite3R   t   timeR    R   t   commoncodeVersionR   R   R   R   R   t   gett   cisTempt   tempdirR   R%   R'   RU   Ru   R   R   R   R   (    (    (    s9   /oak/stanford/groups/akundaje/marinovg/code/commoncode.pyt   <module>   s$   $		-jY$^