ó
æZËIc           @   sœ   d  Z  d d l Z d d l Z d d l m Z d d l m Z d d l m Z d e j f d „  ƒ  YZ	 e
 d k r˜ e j d	 d
 ƒ Z e j d e ƒ n  d S(   s2   Test for the SwissProt parser on SwissProt files.
iÿÿÿÿN(   t   SeqIO(   t	   SwissProt(   t	   SeqRecordt   TestSwissProtc           B   s   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z RS(   c         C   sq  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d d d	 g ƒ |  j
 | j d
 d d d d d d d d d d g ƒ |  j
 | j d4 ƒ |  j
 t | j ƒ d ƒ |  j
 | j d d5 ƒ |  j
 | j d d6 ƒ |  j
 | j d" d7 ƒ |  j
 | j d% d8 ƒ |  j
 | j d( d9 ƒ |  j
 | j d+ d: ƒ |  j
 t | j ƒ d ƒ |  j
 | j d j d/ ƒ |  j
 | j d j d0 ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d d; ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d3 S(<   s   Parsing SwissProt file sp001t   sp001R   t   swisst   Q13454t	   N33_HUMANs   N33 PROTEIN.sV   Seq('MGARGAPSRRRQAGRRLRYLPTGSFPFLLLLLLLCIQLGGGQKKKENLLAEKVE...DFE', ProteinAlphabet())t   Q14911t   Q14912t	   Eukaryotat   Metazoat   Chordatat   Craniatat
   Vertebratat   Mammaliat   Eutheriat   Primatest
   Catarrhinit	   Hominidaet   Homoi\  iüš  t   75818910i   i    t   TRANSMEMi   i(   s
   POTENTIAL.t    i   iÅ   iÙ   i   iÞ   iò   i   i  i)  i   i9  iM  i   t   VARSPLICiX  s   DLDFE -> FLIK (IN FORM 2).s<   MACGROGAN D., LEVY A., BOVA G.S., ISAACS W.B., BOOKSTEIN R.;s   "Structure and methylation-associated silencing of a gene within a
homozygously deleted region of human chromosome band 8p22.";t   MEDLINEt   96299740N(   i\  iüš  R   (   R   i   i(   s
   POTENTIAL.R   (   R   iÅ   iÙ   s
   POTENTIAL.R   (   R   iÞ   iò   s
   POTENTIAL.R   (   R   i  i)  s
   POTENTIAL.R   (   R   i9  iM  s
   POTENTIAL.R   (   R   iX  i\  s   DLDFE -> FLIK (IN FORM 2).R   (   R   R   (   t   ost   patht   joint   openR    t   readt   closet   assert_t
   isinstanceR   t   assertEqualt   idt   namet   descriptiont   reprt   seqR   t
   entry_namet
   accessionst   organism_classificationt   seqinfot   lent   featurest
   referencest   authorst   titlet   tostringt   sequencet   listt   parset   Record(   t   selft   filenamet   datafilet   test_handlet
   seq_recordt   recordt   records(    (    s   test_SwissProt.pyt
   test_sp001   sh    

4 
&
c         C   sØ  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d g ƒ |  j
 | j d d	 d
 d d d d d d d d d g ƒ |  j
 | j d, ƒ |  j
 t | j ƒ d ƒ |  j
 | j d d- ƒ |  j
 | j d d. ƒ |  j
 t | j ƒ d" ƒ |  j
 | j d j d# ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d$ ƒ |  j
 | j d j d% ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d d/ ƒ |  j
 | j d j d( ƒ |  j
 | j d j d) ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d d0 ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d+ S(1   s   Parsing SwissProt file sp002t   sp002R   R   t   P54101t	   CSP_MOUSEs   CYSTEINE STRING PROTEIN (CSP).sV   Seq('MADQRQRSLSTSGESLYHVLGLDKNATSDDIKKSYRKLALKYHPDKNPDNPEAA...GFN', ProteinAlphabet())R
   R   R   R   R   R   R   t   Rodentiat   Sciurognathit   Muridaet   Murinaet   MusiÆ   iTV  t   9DF0142Bi   i    t   DOMAINi   iR   s
   DNAJ-LIKE.R   i   iv   i€   s	   POLY-CYS.i   s   QIN N., LIN T., BIRNBAUMER L.;s!   MASTROGIACOMO A., GUNDERSEN C.B.;sS   "The nucleotide and deduced amino acid sequence of a rat cysteine
string protein.";R   t   95223109s   BRAUN J.E., SCHELLER R.H.;s[   "Cysteine string protein, a DnaJ family member, is present on diverse
secretory vesicles.";t   96188189N(   iÆ   iTV  RG   (   RH   i   iR   s
   DNAJ-LIKE.R   (   RH   iv   i€   s	   POLY-CYS.R   (   R   RI   (   R   RJ   (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp002a   sp    

7   
&
c         C   s7	  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d d d	 g ƒ |  j
 | j d
 d d d d d d d d d g
 ƒ |  j
 | j dT ƒ |  j
 t | j ƒ d ƒ |  j
 | j d dU ƒ |  j
 | j d dV ƒ |  j
 | j d  dW ƒ |  j
 | j d# dX ƒ |  j
 | j d& dY ƒ |  j
 t | j ƒ d) ƒ |  j
 | j d j d* ƒ |  j
 | j d j d+ ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d dZ ƒ |  j
 | j d j d. ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d  j d/ ƒ |  j
 | j d  j d0 ƒ |  j
 t | j d  j ƒ d ƒ |  j
 | j d  j d d[ ƒ |  j
 | j d# j d2 ƒ |  j
 | j d# j d3 ƒ |  j
 t | j d# j ƒ d ƒ |  j
 | j d# j d d\ ƒ |  j
 | j d& j d5 ƒ |  j
 | j d& j d6 ƒ |  j
 t | j d& j ƒ d ƒ |  j
 | j d j d7 ƒ |  j
 | j d j d8 ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d d] ƒ |  j
 | j d: j d; ƒ |  j
 | j d: j d< ƒ |  j
 t | j d: j ƒ d ƒ |  j
 | j d: j d d^ ƒ |  j
 | j d> j d? ƒ |  j
 | j d> j d@ ƒ |  j
 t | j d> j ƒ d ƒ |  j
 | j d> j d d_ ƒ |  j
 | j dB j dC ƒ |  j
 | j dB j dD ƒ |  j
 t | j dB j ƒ d ƒ |  j
 | j dB j d d` ƒ |  j
 | j dF j dG ƒ |  j
 | j dF j d ƒ |  j
 t | j dF j ƒ d ƒ |  j
 | j dH j dI ƒ |  j
 | j dH j dJ ƒ |  j
 t | j dH j ƒ d ƒ |  j
 | j dK j dL ƒ |  j
 | j dK j dM ƒ |  j
 t | j dK j ƒ d ƒ |  j
 | j dK j d da ƒ |  j
 | j dO j dP ƒ |  j
 | j dO j dQ ƒ |  j
 t | j dO j ƒ d ƒ |  j
 | j dO j d db ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ dS S(c   s   Parsing SwissProt file sp003t   sp003R   R   t   P42655t
   143E_HUMANs…   14-3-3 PROTEIN EPSILON (MITOCHONDRIAL IMPORT STIMULATION FACTOR L
SUBUNIT) (PROTEIN KINASE C INHIBITOR PROTEIN-1) (KCIP-1) (14-3-3E).sV   Seq('MDDREDLVYQAKLAEQAERYDEMVESMKKVAGMDVELTVEERNLLSVAYKNVIG...ENQ', ProteinAlphabet())t   P29360t   Q63631t	   EUKARYOTAt   METAZOAt   CHORDATAt
   VERTEBRATAt   MAMMALIAt   EUTHERIAt   PRIMATESt
   CATARRHINIt	   HOMINIDAEt   HOMOiÿ   iöq  t   40A43E62i   i    t   MOD_RESi   s   ACETYLATION.R   t   CONFLICTiI   s   K -> T (IN REF. 8).i   ix   s   F -> S (IN REF. 8).i   i{   s   K -> Y (IN REF. 8).i   i   s   H -> Y (IN REF. 13).i   s'   CONKLIN D.S., GALAKTIONOV K., BEACH D.;s5   "14-3-3 proteins associate with cdc25 phosphatases.";R   t   95372385s"   LUK S.C.W., LEE C.Y., WAYE M.M.Y.;s+   JIN D.Y., LYU M.S., KOZAK C.A., JEANG K.T.;s   "Function of 14-3-3 proteins.";t   96300316s6   CHONG S.S., TANIGAMI A., ROSCHKE A.V., LEDBETTER D.H.;sŽ   "14-3-3 epsilon has no homology to LIS1 and lies telomeric to it on
chromosome 17p13.3 outside the Miller-Dieker syndrome chromosome
region.";t   97011338s(   TANIGAMI A., CHONG S.S., LEDBETTER D.H.;s#   "14-3-3 epsilon genomic sequence.";si   ROSEBOOM P.H., WELLER J.L., BABILA T., AITKEN A., SELLERS L.A.,
MOFFET J.R., NAMBOODIRI M.A., KLEIN D.C.;sX   "Cloning and characterization of the epsilon and zeta isoforms of the
14-3-3 proteins.";t   94296566i   s`   ALAM R., HACHIYA N., SAKAGUCHI M., SHUN-ICHIRO K., IWANAGA S.,
KITAJIMA M., MIHARA K., OMURA T.;sv   "cDNA cloning and characterization of mitochondrial import
stimulation factor (MSF) purified from rat liver cytosol.";t   95122474i   s+   GAO L., GU X.B., YU D.S., YU R.K., ZENG G.;sS   "Association of a 14-3-3 protein with CMP-NeuAc:GM1 alpha 2,3-
sialyltransferase.";t   96280718i   s*   MCCONNELL J.E., ARMSTRONG J.F., BARD J.B.;s‰   "The mouse 14-3-3 epsilon isoform, a kinase regulator whose
expression pattern is modulated in mesenchyme and neuronal
differentiation.";t   95269876i	   sF   TAKIHARA Y., IRIE K., NOMURA M., MOTALEB M., MATSUMOTO K.,
SHIMADA K.;i
   sQ   JONES J.M., NIIKURA T., PINKE R.M., GUO W., MOLDAY L., LEYKAM J.,
MCCONNELL D.G.;sB   "Expression of 14-3-3 proteins in bovine retinal photoreceptors.";i   sA   TOKER A., SELLERS L.A., AMESS B., PATEL Y., HARRIS A., AITKEN A.;sƒ   "Multiple isoforms of a protein kinase C inhibitor (KCIP-1/14-3-3)
from sheep brain. Amino acid sequence of phosphorylated forms.";t   92283271i   s.   TOKER A., ELLIS C.A., SELLERS L.A., AITKEN A.;s€   "Protein kinase C inhibitor proteins. Purification from sheep brain
and sequence similarity to lipocortins and 14-3-3 protein.";t   90345949N(   iÿ   iöq  R[   (   R\   i   i   s   ACETYLATION.R   (   R]   iI   iI   s   K -> T (IN REF. 8).R   (   R]   ix   ix   s   F -> S (IN REF. 8).R   (   R]   i{   i{   s   K -> Y (IN REF. 8).R   (   R]   i   i   s   H -> Y (IN REF. 13).R   (   R   R^   (   R   R_   (   R   R`   (   R   Ra   (   R   Rb   (   R   Rc   (   R   Rd   (   R   Re   (   R   Rf   (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp003¸   sÜ    

1             
&
c         C   s!  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d d d	 g ƒ |  j
 | j d
 d d d d g ƒ |  j
 | j d@ ƒ |  j
 t | j ƒ d ƒ |  j
 | j d dA ƒ |  j
 | j d dB ƒ |  j
 | j d dC ƒ |  j
 | j d dD ƒ |  j
 | j d dE ƒ |  j
 | j d  dF ƒ |  j
 | j d# dG ƒ |  j
 | j d& dH ƒ |  j
 | j d) dI ƒ |  j
 | j d, dJ ƒ |  j
 | j d/ dK ƒ |  j
 | j d2 dL ƒ |  j
 t | j ƒ d ƒ |  j
 | j d j d4 ƒ |  j
 | j d j d5 ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d dM ƒ |  j
 | j d j d8 ƒ |  j
 | j d j d9 ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d dN ƒ |  j
 | j d j d; ƒ |  j
 | j d j d< ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d dO ƒ |  j
 | j d j d> ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d? S(P   s   Parsing SwissProt file sp004t   sp004R   R   t   P23082t
   NDOA_PSEPUs8   NAPHTHALENE 1,2-DIOXYGENASE SYSTEM FERREDOXIN COMPONENT.sV   Seq('TVKWIEAVALSDILEGDVLGVTVEGKELALYEVEGEIYATDNLCTHGSARMSDG...DLS', ProteinAlphabet())t   Q52123t   O07829t   Bacteriat   Proteobacterias   gamma subdivisions   Pseudomonas groupt   Pseudomonasig   i3,  t   9F91B3C8i   i    t   INIT_METR   i   t   METALi,   s!   IRON-SULFUR (2FE-2S) (POTENTIAL).i   i.   i   i?   i   iB   i   t   VARIANTs   V -> E (IN STRAIN G7).i   i   s   L -> P (IN STRAIN G7).i   i0   s   S -> A (IN STRAIN G7).i   iL   s   K -> R (IN STRAIN G7).i	   iT   s   Q -> E (IN STRAIN G7).i
   iZ   s   P -> A (IN STRAIN G7).i   s   S -> GEF (IN STRAIN G7).s4   KURKELA S., LEHVAESLAIHO H., PALVA E.T., TEERI T.H.;s…   "Cloning, nucleotide sequence and characterization of genes encoding
naphthalene dioxygenase of Pseudomonas putida strain NCIB9816.";R   t   89211973s€   SIMON M.J., OSSLUND T.D., SAUNDERS R., ENSLEY B.D., SUGGS S.,
HARCOURT A.A., SUEN W.-C., CRUDEN D.L., GIBSON D.T., ZYLSTRA G.J.;sh   "Sequences of genes encoding naphthalene dioxygenase in Pseudomonas
putida strains G7 and NCIB 9816-4.";t   93252277s2   DENOME S.A., STANLEY D.C., OLSON E.S., YOUNG K.D.;sŠ   "Metabolism of dibenzothiophene and naphthalene in Pseudomonas
strains: complete DNA sequence of an upper naphthalene catabolic
pathway.";t   94042852s
   HAMANN C.;N(   ig   i3,  Rp   (   Rq   i    i    R   R   (   Rr   i,   i,   s!   IRON-SULFUR (2FE-2S) (POTENTIAL).R   (   Rr   i.   i.   s!   IRON-SULFUR (2FE-2S) (POTENTIAL).R   (   Rr   i?   i?   s!   IRON-SULFUR (2FE-2S) (POTENTIAL).R   (   Rr   iB   iB   s!   IRON-SULFUR (2FE-2S) (POTENTIAL).R   (   Rs   i   i   s   V -> E (IN STRAIN G7).R   (   Rs   i   i   s   L -> P (IN STRAIN G7).R   (   Rs   i0   i0   s   S -> A (IN STRAIN G7).R   (   Rs   iL   iL   s   K -> R (IN STRAIN G7).R   (   Rs   iT   iT   s   Q -> E (IN STRAIN G7).R   (   Rs   iZ   iZ   s   P -> A (IN STRAIN G7).R   (   Rs   ig   ig   s   S -> GEF (IN STRAIN G7).R   (   R   Rt   (   R   Ru   (   R   Rv   (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp004R  s    

"    
&
c         C   sV  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d g ƒ |  j
 | j d d	 d
 d d d d d d d d d g ƒ |  j
 | j d" ƒ |  j
 t | j ƒ d ƒ |  j
 t | j ƒ d ƒ |  j
 | j d j d ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d d# ƒ |  j
 | j d j d ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d d$ ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d! S(%   s   Parsing SwissProt file sp005t   sp005R   R   t   P24973t
   NU3M_BALPHs4   NADH-UBIQUINONE OXIDOREDUCTASE CHAIN 3 (EC 1.6.5.3).sV   Seq('MNLLLTLLTNTTLALLLVFIAFWLPQLNVYAEKTSPYECGFDPMGSARLPFSMK...WAE', ProteinAlphabet())R
   R   R   R   R   R   R   t   Cetartiodactylat   Cetaceat	   Mysticetit   Balaenopteridaet   Balaenopterais   iÞ2  t   ACF02965i    i   s%   ARNASON U., GULLBERG A., WIDEGREN B.;se   "The complete nucleotide sequence of the mitochondrial DNA of the fin
whale, Balaenoptera physalus.";i   R   t   92139449s   ARNASON U., GULLBERG A.;s{   "Comparison between the complete mtDNA sequences of the blue and the
fin whale, two species that can hybridize in nature.";t   94141932N(   is   iÞ2  R€   (   R   R   (   R   R‚   (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp005¼  sf    

7  
&
c      
   C   sØ  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d g ƒ |  j
 | j d d	 d
 d d d d d g ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ |  j
 t | j ƒ d ƒ |  j
 | j d j d ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d d ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d S(   s   Parsing SwissProt file sp006t   sp006R   R   t   P39896t
   TCMO_STRGAsM   TETRACENOMYCIN POLYKETIDE SYNTHESIS 8-O-METHYL TRANSFERASE TCMO
(EC 2.1.1.-).sV   Seq('MTPHTHVRGPGDILQLTMAFYGSRALISAVELDLFTLLAGKPLPLGELCERAGI...KPR', ProteinAlphabet())t   BACTERIAt
   FIRMICUTESt   ACTINOBACTERIAt   ACTINOBACTERIDAEt   ACTINOMYCETALESt   STREPTOMYCINEAEt   STREPTOMYCETACEAEt   STREPTOMYCESiS  i«  t   848B7337i    i   s@   SUMMERS R.G., WENDT-PIENKOWSKI E., MOTAMEDI H., HUTCHINSON C.R.;sâ   "Nucleotide sequence of the tcmII-tcmIV region of the tetracenomycin
C biosynthetic gene cluster of Streptomyces glaucescens and evidence
that the tcmN gene encodes a multifunctional
cyclase-dehydratase-O-methyl transferase.";R   t   92193265N(   iS  i«  R   (   R   R   (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp006  s^    

+ 
&
c         C   s¿  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d g ƒ |  j
 | j d d	 d
 d d d d d d d d g ƒ |  j
 | j d4 ƒ |  j
 t | j ƒ d ƒ |  j
 | j d d5 ƒ |  j
 | j d d6 ƒ |  j
 | j d  d7 ƒ |  j
 | j d# d8 ƒ |  j
 | j d& d9 ƒ |  j
 | j d* d: ƒ |  j
 t | j ƒ d  ƒ |  j
 | j d j d- ƒ |  j
 | j d j d. ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d d; ƒ |  j
 | j d j d1 ƒ |  j
 | j d j d2 ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d3 S(<   s   Parsing SwissProt file sp007t   sp007R   R   t   O95832t
   CLD1_HUMANs>   CLAUDIN-1 (SENESCENCE-ASSOCIATED EPITHELIAL MEMBRANE PROTEIN).sV   Seq('MANAGLQLLGFILAFLGWIGAIVSTALPQWRIYSYAGDNIVTAQAMYEGLWMSC...DYV', ProteinAlphabet())R
   R   R   R   R   R   R   R   R   R   R   iÓ   iØX  t   07269000E6C214F0i   i    R   i   i   s
   POTENTIAL.R   i   iR   if   i   it   iˆ   i   i¤   i¸   i   R]   i>   s   I -> V (IN REF. 2).i   i‡   s   V -> A (IN REF. 2).sL   Swisshelm K.L., Machl A., Planitzer S., Robertson R., Kubbies M.,
Hosier S.;s   "SEMP1, a senescence-associated cDNA isolated from human mammary
epithelial cells, is a member of an epithelial membrane protein
superfamily.";R   t   99132301s   Mitic L.M., Anderson J.M.;s-   "Human claudin-1 isolated from Caco-2 mRNA.";N(   iÓ   iØX  R•   (   R   i   i   s
   POTENTIAL.R   (   R   iR   if   s
   POTENTIAL.R   (   R   it   iˆ   s
   POTENTIAL.R   (   R   i¤   i¸   s
   POTENTIAL.R   (   R]   i>   i>   s   I -> V (IN REF. 2).R   (   R]   i‡   i‡   s   V -> A (IN REF. 2).R   (   R   R–   (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp007`  sr    

4  
&
c         C   sš  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d d d	 d
 d d d d d d d g ƒ |  j
 | j d d d d d d d d d d d g ƒ |  j
 | j dDƒ |  j
 t | j ƒ d  ƒ |  j
 | j d! dEƒ |  j
 | j d# dFƒ |  j
 | j d) dGƒ |  j
 | j d- dHƒ |  j
 | j d1 dIƒ |  j
 | j d5 dJƒ |  j
 | j d9 dKƒ |  j
 | j d= dLƒ |  j
 | j d@ dMƒ |  j
 | j dC dNƒ |  j
 | j dG dOƒ |  j
 | j dJ dPƒ |  j
 | j dN dQƒ |  j
 | j dQ dRƒ |  j
 | j dU dSƒ |  j
 | j dX dTƒ |  j
 | j d[ dUƒ |  j
 | j d] dVƒ |  j
 | j da dWƒ |  j
 | j dd dXƒ |  j
 | j dg dYƒ |  j
 | j di dZƒ |  j
 | j dk d[ƒ |  j
 | j dn d\ƒ |  j
 | j d$ d]ƒ |  j
 | j d' d^ƒ |  j
 | j du d_ƒ |  j
 | j dL d`ƒ |  j
 | j dz daƒ |  j
 | j d} dbƒ |  j
 | j d dcƒ |  j
 | j d‚ ddƒ |  j
 | j d… deƒ |  j
 | j dˆ dfƒ |  j
 | j dŠ dgƒ |  j
 | j d dhƒ |  j
 | j dM diƒ |  j
 | j d‘ djƒ |  j
 | j d“ dkƒ |  j
 | j d” dlƒ |  j
 | j d— dmƒ |  j
 | j dš dnƒ |  j
 | j d doƒ |  j
 | j d  dpƒ |  j
 | j d£ dqƒ |  j
 | j dO drƒ |  j
 | j d¨ dsƒ |  j
 | j d« dtƒ |  j
 | j d® duƒ |  j
 | j d± dvƒ |  j
 | j d´ dwƒ |  j
 | j d· dxƒ |  j
 | j dP dyƒ |  j
 | j dS dzƒ |  j
 | j dT d{ƒ |  j
 | j dV d|ƒ |  j
 | j dÃ d}ƒ |  j
 | j dÇ d~ƒ |  j
 | j dË dƒ |  j
 | j dÐ d€ƒ |  j
 | j dÔ dƒ |  j
 | j dW d‚ƒ |  j
 | j dY dƒƒ |  j
 | j dZ d„ƒ |  j
 | j dà d…ƒ |  j
 | j dä d†ƒ |  j
 | j dç d‡ƒ |  j
 | j dÄ dˆƒ |  j
 | j dí d‰ƒ |  j
 | j dð dŠƒ |  j
 | j d\ d‹ƒ |  j
 t | j ƒ dL ƒ |  j
 | j d! j dö ƒ |  j
 | j d! j d÷ ƒ |  j
 t | j d! j ƒ d# ƒ |  j
 | j d! j d! dŒƒ |  j
 | j d# j dú ƒ |  j
 | j d# j dû ƒ |  j
 t | j d# j ƒ d# ƒ |  j
 | j d# j d! dƒ |  j
 | j d) j dý ƒ |  j
 | j d) j dþ ƒ |  j
 t | j d) j ƒ d# ƒ |  j
 | j d) j d! dŽƒ |  j
 | j d- j d ƒ |  j
 | j d- j dƒ |  j
 t | j d- j ƒ d# ƒ |  j
 | j d- j d! dƒ |  j
 | j d1 j dƒ |  j
 | j d1 j dƒ |  j
 t | j d1 j ƒ d# ƒ |  j
 | j d1 j d! dƒ |  j
 | j d5 j dƒ |  j
 | j d5 j dƒ |  j
 t | j d5 j ƒ d# ƒ |  j
 | j d5 j d! d‘ƒ |  j
 | j d9 j d	ƒ |  j
 | j d9 j d
ƒ |  j
 t | j d9 j ƒ d# ƒ |  j
 | j d9 j d! d’ƒ |  j
 | j d= j dƒ |  j
 | j d= j dƒ |  j
 t | j d= j ƒ d# ƒ |  j
 | j d= j d! d“ƒ |  j
 | j d@ j dƒ |  j
 | j d@ j dƒ |  j
 t | j d@ j ƒ d# ƒ |  j
 | j d@ j d! d”ƒ |  j
 | j dC j dƒ |  j
 | j dC j d% ƒ |  j
 t | j dC j ƒ d! ƒ |  j
 | j dG j dƒ |  j
 | j dG j dƒ |  j
 t | j dG j ƒ d# ƒ |  j
 | j dG j d! d•ƒ |  j
 | j dJ j dƒ |  j
 | j dJ j dƒ |  j
 t | j dJ j ƒ d# ƒ |  j
 | j dJ j d! d–ƒ |  j
 | j dN j dƒ |  j
 | j dN j dƒ |  j
 t | j dN j ƒ d# ƒ |  j
 | j dN j d! d—ƒ |  j
 | j dQ j dƒ |  j
 | j dQ j dƒ |  j
 t | j dQ j ƒ d# ƒ |  j
 | j dQ j d! d˜ƒ |  j
 | j dU j dƒ |  j
 | j dU j d ƒ |  j
 t | j dU j ƒ d# ƒ |  j
 | j dU j d! d™ƒ |  j
 | j dX j d"ƒ |  j
 | j dX j d#ƒ |  j
 t | j dX j ƒ d# ƒ |  j
 | j dX j d! dšƒ |  j
 | j d[ j d%ƒ |  j
 | j d[ j d&ƒ |  j
 t | j d[ j ƒ d# ƒ |  j
 | j d[ j d! d›ƒ |  j
 | j d] j d(ƒ |  j
 | j d] j d)ƒ |  j
 t | j d] j ƒ d# ƒ |  j
 | j d] j d! dœƒ |  j
 | j da j dƒ |  j
 | j da j d+ƒ |  j
 t | j da j ƒ d# ƒ |  j
 | j da j d! dƒ |  j
 | j dd j d-ƒ |  j
 | j dd j d.ƒ |  j
 t | j dd j ƒ d# ƒ |  j
 | j dd j d! džƒ |  j
 | j dg j d0ƒ |  j
 | j dg j d1ƒ |  j
 t | j dg j ƒ d# ƒ |  j
 | j dg j d! dŸƒ |  j
 | j di j d3ƒ |  j
 | j di j d4ƒ |  j
 t | j di j ƒ d# ƒ |  j
 | j di j d! d ƒ |  j
 | j dk j d6ƒ |  j
 | j dk j d7ƒ |  j
 t | j dk j ƒ d! ƒ |  j
 | j dn j d8ƒ |  j
 | j dn j d9ƒ |  j
 t | j dn j ƒ d# ƒ |  j
 | j dn j d! d¡ƒ |  j
 | j d$ j d;ƒ |  j
 | j d$ j d<ƒ |  j
 t | j d$ j ƒ d! ƒ |  j
 | j d' j d=ƒ |  j
 | j d' j d>ƒ |  j
 t | j d' j ƒ d# ƒ |  j
 | j d' j d! d¢ƒ |  j
 | j du j d@ƒ |  j
 | j du j dAƒ |  j
 t | j du j ƒ d# ƒ |  j
 | j du j d! d£ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d# ƒ |  j t | d! t	 ƒ ƒ |  j
 | d! j j ƒ  | j j ƒ  ƒ |  j
 | d! j | j ƒ |  j
 | d! j | j ƒ |  j
 | d! j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d# ƒ |  j t | d! t j ƒ ƒ |  j
 | d! j | j ƒ |  j
 | d! j | j ƒ |  j
 | d! j | j ƒ |  j
 | d! j | j ƒ dCS(¤  s   Parsing SwissProt file sp008t   sp008R   R   t   P01892t
   1A02_HUMANsB   HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-2 ALPHA CHAIN PRECURSOR.sV   Seq('MAVMAPRTLVLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDD...CKV', ProteinAlphabet())t   P06338t   P30514t   P30444t   P30445t   P30446t   Q29680t   Q29899t   Q95352t   Q29837t   Q95380R
   R   R   R   R   R   R   R   R   R   R   im  iÚŸ  t   B54A97B24B337C08iG   i    t   SIGNALi   i   R   t   CHAINi   s8   HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-2 ALPHA CHAIN.i   RH   ir   s   EXTRACELLULAR ALPHA-1.i   is   iÎ   s   EXTRACELLULAR ALPHA-2.i   iÏ   i*  s   EXTRACELLULAR ALPHA-3.i   i+  i4  s   CONNECTING PEPTIDE.i   R   i5  iL  i   iM  s   CYTOPLASMIC TAIL.i   t   CARBOHYDin   i	   t   DISULFIDi}   i¼   i
   iã   i  i   t   STRANDi   i$   i   i-   i4   i   t   TURNi5   i6   i   i7   i=   i   i>   i?   i   iF   i   t   HELIXiJ   iL   i   iM   iN   i   iQ   il   i   im   i   iq   i   iv   i   i   i   i‚   i…   iŽ   i   i   i   i‘   i–   i˜   i™   i   i   iŸ   i   i£   i   i¤   i­   i   i®   i¯   i    i°   i¹   i!   iº   i"   i»   iÆ   i#   iÇ   iÈ   iË   i%   iÌ   i&   i'   iÒ   iÛ   i(   iÜ   iÝ   i)   iÞ   ié   i*   iî   ió   i+   iô   iõ   i,   iö   i÷   iù   iû   i.   iý   iþ   i/   i  i  i0   i	  i  i1   i  i  i2   i  i  i3   i  i  i   i!  i&  i)  Rs   sH   F -> Y (IN A*0205, A*0206, A*0208, A*0210 AND A*0221). /FTId=VAR_004334.t
   VAR_004334s%   D -> N (IN A*0221). /FTId=VAR_004335.t
   VAR_004335i8   iC   s9   Q -> R (IN A*0202, A*0205, AND A*0208). /FTId=VAR_004336.t
   VAR_004336i9   iZ   s0   K -> N (IN A*0208 AND A*0220). /FTId=VAR_004337.t
   VAR_004337i:   ia   ib   s'   TH -> ID (IN A*0211). /FTId=VAR_004338.t
   VAR_004338i;   iw   s@   V -> L (IN A*0202, A*0205, A*0208 AND A*0217). /FTId=VAR_004339.t
   VAR_004339i<   iy   s0   R -> M (IN A*0204 AND A*0217). /FTId=VAR_004340.t
   VAR_004340i{   s0   Y -> C (IN A*0207 AND A*0218). /FTId=VAR_004341.t
   VAR_004341s0   Y -> F (IN A*0210 AND A*0217). /FTId=VAR_004342.t
   VAR_004342iƒ   s%   W -> G (IN A*0210). /FTId=VAR_004343.t
   VAR_004343i@   i¢   s%   M -> K (IN A*0218). /FTId=VAR_004344.t
   VAR_004344iA   s%   A -> T (IN A*0203). /FTId=VAR_004345.t
   VAR_004345iB   s0   V -> E (IN A*0203 AND A*0213). /FTId=VAR_004346.t
   VAR_004346i´   s@   L -> W (IN A*0202, A*0203, A*0205 AND A*0208). /FTId=VAR_004347.t
   VAR_004347iD   s0   L -> Q (IN A*0212 AND A*0213). /FTId=VAR_004348.t
   VAR_004348iE   s%   T -> E (IN A*0216). /FTId=VAR_004349.t
   VAR_004349i  s%   A -> E (IN A*0209). /FTId=VAR_004350.t
   VAR_004350s   Koller B.H., Orr H.T.;sj   "Cloning and complete sequence of an HLA-A2 gene: analysis of two
HLA-A alleles at the nucleotide level.";R   t   85132727so   Cianetti L., Testa U., Scotto L., la Valle R., Simeone A.,
Boccoli G., Giannella G., Peschle C., Boncinelli E.;s^   "Three new class I HLA alleles: structure of mRNAs and alternative
mechanisms of processing.";t   89122144s/   Ennis P.D., Zemmour J., Salter R.D., Parham P.;s‚   "Rapid cloning of HLA-A,B cDNA by using the polymerase chain
reaction: frequency and nature of errors produced in amplification.";t   90207291sl   Belich M.P., Madrigal J.A., Hildebrand W.H., Zemmour J.,
Williams R.C., Luz R., Petzl-Erler M.L., Parham P.;s<   "Unusual HLA-B alleles in two tribes of Brazilian Indians.";t   92269955s   Krangel M.S.;sh   "Unusual RNA splicing generates a secreted form of HLA-A2 in a
mutagenized B lymphoblastoid cell line.";t   85230571sH   Orr H.T., Lopez de Castro J.A., Parham P., Ploegh H.L.,
Strominger J.L.;sŒ   "Comparison of amino acid sequences of two human histocompatibility
antigens, HLA-A2 and HLA-B7: location of putative alloantigenic
sites.";t   80056745s=   Lopez de Castro J.A., Strominger J.L., Strong D.M., Orr H.T.;s‘   "Structure of crossreactive human histocompatibility antigens HLA-A28
and HLA-A2: possible implications for the generation of HLA
polymorphism.";t   82247941sP   Mattson D.H., Handy D.E., Bradley D.A., Coligan J.E., Cowan E.P.,
Biddison W.E.;sU   "DNA sequences of the genes that encode the CTL-defined HLA-A2
variants M7 and DK1.";t   87306734s6   Holmes N., Ennis P., Wan A.M., Denney D.W., Parham P.;st   "Multiple genetic mechanisms have contributed to the generation of
the HLA-A2/A28 family of class I MHC molecules.";t   87252273s   Domena J.D.;s#   Castano A.R., Lopez de Castro J.A.;s{   "Structure of the HLA-A*0204 antigen, found in South American
Indians. Spatial clustering of HLA-A2 subtype polymorphism.";t   92039809sž   Watkins D.I., McAdam S.N., Liu X., Stang C.R., Milford E.L.,
Levine C.G., Garber T.L., Dogon A.L., Lord C.I., Ghim S.H.,
Troup G.M., Hughes A.L., Letvin N.L.;sw   "New recombinant HLA-B alleles in a tribe of South American
Amerindians indicate rapid evolution of MHC class I loci.";t   92269956s/   Parham P., Lawlor D.A., Lomen C.E., Ennis P.D.;s6   "Diversity and diversification of HLA-A,B,C alleles.";t   89235215s\   Ezquerra A., Domenech N., van der Poel J., Strominger J.L., Vega M.A.,
Lopez de Castro J.A.;s]   "Molecular analysis of an HLA-A2 functional variant CLA defined by
cytolytic T lymphocytes.";t   86305811s;   Domenech N., Ezquerra A., Castano R., Lopez de Castro J.A.;s{   "Structural analysis of HLA-A2.4 functional variant KNE. Implications
for the mapping of HLA-A2-specific T-cell epitopes.";t   88113844s9   Domenech N., Castano R., Goulmy E., Lopez de Castro J.A.;s€   "Molecular analysis of HLA-A2.4 functional variant KLO: close
structural and evolutionary relatedness to the HLA-A2.2 subtype.";t   88314183s;   Castano R., Ezquerra A., Domenech N., Lopez de Castro J.A.;sS   "An HLA-A2 population variant with structural polymorphism in the
alpha 3 region.";t   88186100s"   Epstein H., Kennedy L., Holmes N.;sY   "An Oriental HLA-A2 subtype is closely related to a subset of
Caucasoid HLA-A2 alleles.";t   89122133sx   "Structure of the HLA-A*0211 (A2.5) subtype: further evidence for
selection-driven diversification of HLA-A2 antigens.";t   92218010s2   Barber D.F., Fernandez J.M., Lopez de Castro J.A.;s9   "Primary structure of a new HLA-A2 subtype: HLA-A*0213.";t   94222455s@   Barouch D., Krausa P., Bodmer J., Browning M.J., McMichael A.J.;s8   "Identification of a novel HLA-A2 subtype, HLA-A*0216.";t   95278976sK   Selvakumar A., Granja C.B., Salazar M., Alosco S.M., Yunis E.J.,
Dupont B.;s\   "A novel subtype of A2 (A*0217) isolated from the South American
Indian B-cell line AMALA.";t   95381236sa   Kashiwase K., Tokunaga K., Ishikawa Y., Oohashi H., Hashimoto M.,
Akaza T., Tadokoro K., Juji T.;s+   "A new A2 sequence HLA-A2K from Japanese.";sL   Fleischhauer K., Zino E., Mazzi B., Severini G.M., Benazzi E.,
Bordignon C.;s]   "HLA-A*02 subtype distribution in Caucasians from northern Italy:
identification of A*0220.";t   97161038s   Szmania S., Baxter-Lowe L.A.;s.   "Nucleotide sequence of a novel HLA-A2 gene.";sR   Bjorkman P.J., Saper M.A., Samraoui B., Bennett W.S.,
Strominger J.L., Wiley D.C.;sE   "Structure of the human class I histocompatibility antigen, HLA-A2.";t   88014204s&   Saper M.A., Bjorkman P.J., Wiley D.C.;sX   "Refined structure of the human histocompatibility antigen HLA-A2 at
2.6-A resolution.";t   91245570N(   im  iÚŸ  R¥   (   R¦   i   i   R   R   (   R§   i   im  s8   HLA CLASS I HISTOCOMPATIBILITY ANTIGEN, A-2 ALPHA CHAIN.R   (   RH   i   ir   s   EXTRACELLULAR ALPHA-1.R   (   RH   is   iÎ   s   EXTRACELLULAR ALPHA-2.R   (   RH   iÏ   i*  s   EXTRACELLULAR ALPHA-3.R   (   RH   i+  i4  s   CONNECTING PEPTIDE.R   (   R   i5  iL  R   R   (   RH   iM  im  s   CYTOPLASMIC TAIL.R   (   R¨   in   in   R   R   (   R©   i}   i¼   R   R   (   R©   iã   i  R   R   (   Rª   i   i$   R   R   (   Rª   i-   i4   R   R   (   R«   i5   i6   R   R   (   Rª   i7   i=   R   R   (   R«   i>   i?   R   R   (   Rª   iF   iG   R   R   (   R¬   iJ   iL   R   R   (   R«   iM   iN   R   R   (   R¬   iQ   il   R   R   (   R«   im   in   R   R   (   R«   iq   ir   R   R   (   Rª   iv   i   R   R   (   R«   i   i‚   R   R   (   Rª   i…   iŽ   R   R   (   R«   i   i   R   R   (   Rª   i‘   i–   R   R   (   R«   i˜   i™   R   R   (   Rª   i   iŸ   R   R   (   R«   i£   i£   R   R   (   R¬   i¤   i­   R   R   (   R«   i®   i¯   R   R   (   R¬   i°   i¹   R   R   (   R«   iº   iº   R   R   (   R¬   i»   iÆ   R   R   (   R«   iÇ   iÇ   R   R   (   R¬   iÈ   iË   R   R   (   R«   iÌ   iÌ   R   R   (   Rª   iÏ   iÏ   R   R   (   Rª   iÒ   iÛ   R   R   (   R«   iÜ   iÝ   R   R   (   Rª   iÞ   ié   R   R   (   Rª   iî   ió   R   R   (   R«   iô   iõ   R   R   (   Rª   iö   i÷   R   R   (   R¬   iù   iû   R   R   (   Rª   iý   iþ   R   R   (   Rª   i  i  R   R   (   Rª   i	  i  R   R   (   R«   i  i  R   R   (   R¬   i  i  R   R   (   Rª   i  i  R   R   (   R«   i   i!  R   R   (   Rª   i&  i)  R   R   (   Rs   i!   i!   sH   F -> Y (IN A*0205, A*0206, A*0208, A*0210 AND A*0221). /FTId=VAR_004334.R­   (   Rs   i6   i6   s%   D -> N (IN A*0221). /FTId=VAR_004335.R®   (   Rs   iC   iC   s9   Q -> R (IN A*0202, A*0205, AND A*0208). /FTId=VAR_004336.R¯   (   Rs   iZ   iZ   s0   K -> N (IN A*0208 AND A*0220). /FTId=VAR_004337.R°   (   Rs   ia   ib   s'   TH -> ID (IN A*0211). /FTId=VAR_004338.R±   (   Rs   iw   iw   s@   V -> L (IN A*0202, A*0205, A*0208 AND A*0217). /FTId=VAR_004339.R²   (   Rs   iy   iy   s0   R -> M (IN A*0204 AND A*0217). /FTId=VAR_004340.R³   (   Rs   i{   i{   s0   Y -> C (IN A*0207 AND A*0218). /FTId=VAR_004341.R´   (   Rs   i{   i{   s0   Y -> F (IN A*0210 AND A*0217). /FTId=VAR_004342.Rµ   (   Rs   iƒ   iƒ   s%   W -> G (IN A*0210). /FTId=VAR_004343.R¶   (   Rs   i¢   i¢   s%   M -> K (IN A*0218). /FTId=VAR_004344.R·   (   Rs   i­   i­   s%   A -> T (IN A*0203). /FTId=VAR_004345.R¸   (   Rs   i°   i°   s0   V -> E (IN A*0203 AND A*0213). /FTId=VAR_004346.R¹   (   Rs   i´   i´   s@   L -> W (IN A*0202, A*0203, A*0205 AND A*0208). /FTId=VAR_004347.Rº   (   Rs   i´   i´   s0   L -> Q (IN A*0212 AND A*0213). /FTId=VAR_004348.R»   (   Rs   i»   i»   s%   T -> E (IN A*0216). /FTId=VAR_004349.R¼   (   Rs   i  i  s%   A -> E (IN A*0209). /FTId=VAR_004350.R½   (   R   R¾   (   R   R¿   (   R   RÀ   (   R   RÁ   (   R   RÂ   (   R   RÃ   (   R   RÄ   (   R   RÅ   (   R   RÆ   (   R   RÇ   (   R   RÈ   (   R   RÉ   (   R   RÊ   (   R   RË   (   R   RÌ   (   R   RÍ   (   R   RÎ   (   R   RÏ   (   R   RÐ   (   R   RÑ   (   R   RÒ   (   R   RÓ   (   R   RÔ   (   R   RÕ   (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp008º  sú   

44                           
&
c         C   s×  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d g ƒ |  j
 | j d d	 d
 d d d d d d d g
 ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ |  j
 | j d d ƒ |  j
 t | j ƒ d ƒ |  j
 | j d j d ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d S(    s   Parsing SwissProt file sp009t   sp009R   R   t   O23729t
   CHS3_BROFIsC   CHALCONE SYNTHASE 3 (EC 2.3.1.74) (NARINGENIN-CHALCONE SYNTHASE 3).sV   Seq('MAPAMEEIRQAQRAEGPAAVLAIGTSTPPNALYQADYPDYYFRITKSEHLTELK...GAE', ProteinAlphabet())R
   t   Viridiplantaet   Embryophytat   Tracheophytat   Spermatophytat   Magnoliophytat
   Liliopsidat   Asparagalest   Orchidaceaet
   BromheadiaiŠ  i½§  t   2F8D14AF4870BBB2i   i    t   ACT_SITEi¥   s   BY SIMILARITY.R   s(   Liew C.F., Lim S.H., Loh C.S., Goh C.J.;sb   "Molecular cloning and sequence analysis of chalcone synthase cDNAs of
Bromheadia finlaysoniana.";N(   iŠ  i½§  Rã   (   Rä   i¥   i¥   s   BY SIMILARITY.R   (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp009ô  s\    

1 
&
c         C   s#  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d d d	 d
 d d d g ƒ |  j
 | j d d d d d d d d d d d d g ƒ |  j
 | j d€ ƒ |  j
 t | j ƒ d ƒ |  j
 | j d d ƒ |  j
 | j d  d‚ ƒ |  j
 | j d( dƒ ƒ |  j
 | j d, d„ ƒ |  j
 | j d0 d… ƒ |  j
 | j d3 d† ƒ |  j
 | j d7 d‡ ƒ |  j
 | j d: dˆ ƒ |  j
 | j d> d‰ ƒ |  j
 | j dA dŠ ƒ |  j
 | j dE d‹ ƒ |  j
 | j dH dŒ ƒ |  j
 | j dL d ƒ |  j
 | j dO dŽ ƒ |  j
 | j dS d ƒ |  j
 | j dU d ƒ |  j
 | j dX d‘ ƒ |  j
 | j d\ d’ ƒ |  j
 | j d` d“ ƒ |  j
 | j d! d” ƒ |  j
 | j d% d• ƒ |  j
 | j dg d– ƒ |  j
 | j dj d— ƒ |  j
 t | j ƒ d7 ƒ |  j
 | j d j dl ƒ |  j
 | j d j dm ƒ |  j
 t | j d j ƒ d  ƒ |  j
 | j d j d d˜ ƒ |  j
 | j d  j dp ƒ |  j
 | j d  j dq ƒ |  j
 t | j d  j ƒ d ƒ |  j
 | j d( j dr ƒ |  j
 | j d( j ds ƒ |  j
 t | j d( j ƒ d  ƒ |  j
 | j d( j d d™ ƒ |  j
 | j d, j du ƒ |  j
 | j d, j dv ƒ |  j
 t | j d, j ƒ d ƒ |  j
 | j d0 j dw ƒ |  j
 | j d0 j dx ƒ |  j
 t | j d0 j ƒ d  ƒ |  j
 | j d0 j d dš ƒ |  j
 | j d3 j dz ƒ |  j
 | j d3 j d{ ƒ |  j
 t | j d3 j ƒ d( ƒ |  j
 | j d3 j d d› ƒ |  j
 | j d3 j d  dœ ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d  ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d  ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d S(   s   Parsing SwissProt file sp010t   sp010R   R   t   Q13639t	   5H4_HUMANsE   5-HYDROXYTRYPTAMINE 4 RECEPTOR (5-HT-4) (SEROTONIN RECEPTOR) (5-HT4).sV   Seq('MDKLDANVSSEEGFGSVEKVVLLTFLSTVILMAILGNLLVMVAVCWDRQLRKIK...SDT', ProteinAlphabet())t   Q9UBM6t   Q9UQR6t   Q9UE22t   Q9UE23t   Q9UBT4t   Q9NY73R
   R   R   R   R   t   EuteleostomiR   R   R   R   R   R   i„  iñª  t   7FCFEC60E7BDF560i   i    RH   i   i   s   EXTRACELLULAR (POTENTIAL).R   R   i   i(   s   1 (POTENTIAL).i   i)   i:   s   CYTOPLASMIC (POTENTIAL).i   i;   iO   s   2 (POTENTIAL).i   iP   i]   i   i^   it   s   3 (POTENTIAL).i   iu   i‰   i   iŠ   iž   s   4 (POTENTIAL).i   iŸ   iÀ   i	   iÁ   iÕ   s   5 (POTENTIAL).i
   iÖ   i  i   i  i  s   6 (POTENTIAL).i   i  i&  i   i'  i;  s   7 (POTENTIAL).i   i<  i   R¨   s!   N-LINKED (GLCNAC...) (POTENTIAL).i   R©   i¸   s   BY SIMILARITY.i   t   LIPIDiI  s   PALMITATE (BY SIMILARITY).i   R   i©   s,   L -> LERSLNQGLGQDFHA (IN ISOFORM 5- HT4(F)).ig  sM   RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SGCSPVSSFLLLFCNRPVPV (IN ISOFORM 5-HT4(E)).sO   RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SSGTETDRRNFGIRKRRLTKPS (IN ISOFORM 5-HT4(D)).i   ih  s9   DAVECGGQWESQCHPPATSPLVAAQPSDT -> F (IN ISOFORM 5-HT4(C)).i   sU   DAVECGGQWESQCHPPATSPLVAAQPSDT -> YTVLHRGHHQELEKLPIHNDPESLESCF (IN ISOFORM 5- HT4(A)).sD   Blondel O., Gastineau M., Dahmoune Y., Langlois M., Fischmeister R.;s˜   "Cloning, expression, and pharmacology of four human 5-
hydroxytryptamine receptor isoforms produced by alternative splicing
in the carboxyl terminus.";t   PubMedt   9603189sg   Van den Wyngaert I., Gommeren W., Jurzak M., Verhasselt P., Gordon R.,
Leysen J., Luyten W., Bender E.;sH   "Cloning and expression of 5-HT4 receptor species and splice
variants.";sD   Claeysen S., Faye P., Sebben M., Lemaire S., Bockaert J., Dumuis A.;sv   "Cloning and expression of human 5-HT4S receptors. Effect of receptor
density on their coupling to adenylyl cyclase.";t   9351641s;   Claeysen S., Sebben M., Becamel C., Bockaert J., Dumuis A.;s‡   "Novel brain-specific 5-HT4 receptors splice variants show marked
constitutive activity: role of the c-terminal intracellular domain.";sk   Bender E., Pindon A., van Oers I., Zhang Y.B., Gommeren W.,
Verhasselt P., Jurzak M., Leysen J., Luyten W.;sd   "Structure of the human serotonin 5-HT4 receptor gene and cloning of a
novel 5-HT4 splice variant.";t   10646498s0   Ullmer C., Schmuck K., Kalkman H.O., Lubbert H.;s;   "Expression of serotonin receptor mRNAs in blood vessels.";R   t   95385798t   7656980N(   i„  iñª  Rð   (   RH   i   i   s   EXTRACELLULAR (POTENTIAL).R   (   R   i   i(   s   1 (POTENTIAL).R   (   RH   i)   i:   s   CYTOPLASMIC (POTENTIAL).R   (   R   i;   iO   s   2 (POTENTIAL).R   (   RH   iP   i]   s   EXTRACELLULAR (POTENTIAL).R   (   R   i^   it   s   3 (POTENTIAL).R   (   RH   iu   i‰   s   CYTOPLASMIC (POTENTIAL).R   (   R   iŠ   iž   s   4 (POTENTIAL).R   (   RH   iŸ   iÀ   s   EXTRACELLULAR (POTENTIAL).R   (   R   iÁ   iÕ   s   5 (POTENTIAL).R   (   RH   iÖ   i  s   CYTOPLASMIC (POTENTIAL).R   (   R   i  i  s   6 (POTENTIAL).R   (   RH   i  i&  s   EXTRACELLULAR (POTENTIAL).R   (   R   i'  i;  s   7 (POTENTIAL).R   (   RH   i<  i„  s   CYTOPLASMIC (POTENTIAL).R   (   R¨   i   i   s!   N-LINKED (GLCNAC...) (POTENTIAL).R   (   R©   i]   i¸   s   BY SIMILARITY.R   (   Rñ   iI  iI  s   PALMITATE (BY SIMILARITY).R   (   R   i©   i©   s,   L -> LERSLNQGLGQDFHA (IN ISOFORM 5- HT4(F)).R   (   R   ig  i„  sM   RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SGCSPVSSFLLLFCNRPVPV (IN ISOFORM 5-HT4(E)).R   (   R   ig  i„  sO   RDAVECGGQWESQCHPPATSPLVAAQPSDT -> SSGTETDRRNFGIRKRRLTKPS (IN ISOFORM 5-HT4(D)).R   (   R   ih  i„  s9   DAVECGGQWESQCHPPATSPLVAAQPSDT -> F (IN ISOFORM 5-HT4(C)).R   (   R   ih  i„  sU   DAVECGGQWESQCHPPATSPLVAAQPSDT -> YTVLHRGHHQELEKLPIHNDPESLESCF (IN ISOFORM 5- HT4(A)).R   (   Rò   Ró   (   Rò   Rô   (   Rò   Rõ   (   R   Rö   (   Rò   R÷   (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp010A  s¾    

(7      
&
c         C   s­  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d d d	 d
 d g ƒ |  j
 | j d d d d d d d d d d d d d g ƒ |  j
 | j dî ƒ |  j
 t | j ƒ d ƒ |  j
 | j d dï ƒ |  j
 | j d dð ƒ |  j
 | j d% dñ ƒ |  j
 | j d) dò ƒ |  j
 | j d. dó ƒ |  j
 | j d2 dô ƒ |  j
 | j d6 dõ ƒ |  j
 | j d9 dö ƒ |  j
 | j d= d÷ ƒ |  j
 | j d@ dø ƒ |  j
 | j dD dù ƒ |  j
 | j dG dú ƒ |  j
 | j dK dû ƒ |  j
 | j dN dü ƒ |  j
 | j dR dý ƒ |  j
 | j dU dþ ƒ |  j
 | j dY dÿ ƒ |  j
 | j d[ d ƒ |  j
 | j d_ dƒ |  j
 | j dc dƒ |  j
 | j dg dƒ |  j
 | j dk dƒ |  j
 | j do dƒ |  j
 | j ds dƒ |  j
 | j dw dƒ |  j
 | j d{ dƒ |  j
 | j d  d	ƒ |  j
 | j d# d
ƒ |  j
 | j d dƒ |  j
 | j dƒ dƒ |  j
 | j d… dƒ |  j
 | j d‡ dƒ |  j
 | j dŒ dƒ |  j
 | j d dƒ |  j
 | j d“ dƒ |  j
 | j d— dƒ |  j
 | j dš dƒ |  j
 | j d dƒ |  j
 | j d¡ dƒ |  j
 | j d£ dƒ |  j
 | j d¥ dƒ |  j
 | j d© dƒ |  j
 | j d¬ dƒ |  j
 | j d¯ dƒ |  j
 | j d³ dƒ |  j
 | j d¶ dƒ |  j
 | j d¹ dƒ |  j
 | j d¼ dƒ |  j
 | j dÀ dƒ |  j
 | j dÃ d ƒ |  j
 | j dÆ d!ƒ |  j
 | j dÈ d"ƒ |  j
 t | j ƒ d= ƒ |  j
 | j d j dË ƒ |  j
 | j d j dÌ ƒ |  j
 t | j d j ƒ d% ƒ |  j
 | j d j d d#ƒ |  j
 | j d j d d$ƒ |  j
 | j d j dÑ ƒ |  j
 | j d j dÒ ƒ |  j
 t | j d j ƒ d% ƒ |  j
 | j d j d d%ƒ |  j
 | j d j d d&ƒ |  j
 | j d% j dÕ ƒ |  j
 | j d% j dÖ ƒ |  j
 t | j d% j ƒ d% ƒ |  j
 | j d% j d d'ƒ |  j
 | j d% j d d(ƒ |  j
 | j d) j dÙ ƒ |  j
 | j d) j dÚ ƒ |  j
 t | j d) j ƒ d% ƒ |  j
 | j d) j d d)ƒ |  j
 | j d) j d d*ƒ |  j
 | j d. j dÝ ƒ |  j
 | j d. j dÞ ƒ |  j
 t | j d. j ƒ d% ƒ |  j
 | j d. j d d+ƒ |  j
 | j d. j d d,ƒ |  j
 | j d2 j dá ƒ |  j
 | j d2 j dâ ƒ |  j
 t | j d2 j ƒ d% ƒ |  j
 | j d2 j d d-ƒ |  j
 | j d2 j d d.ƒ |  j
 | j d6 j då ƒ |  j
 | j d6 j dæ ƒ |  j
 t | j d6 j ƒ d% ƒ |  j
 | j d6 j d d/ƒ |  j
 | j d6 j d d0ƒ |  j
 | j d9 j dé ƒ |  j
 | j d9 j dê ƒ |  j
 t | j d9 j ƒ d% ƒ |  j
 | j d9 j d d1ƒ |  j
 | j d9 j d d2ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ dí S(3  s   Parsing SwissProt file sp011t   sp011R   R   t   P16235t   LSHR_RATsh   LUTROPIN-CHORIOGONADOTROPIC HORMONE RECEPTOR PRECURSOR (LH/CG-R)
(LSH-R) (LUTEINIZING HORMONE RECEPTOR).sV   Seq('MGRRVPALRQLLVLAVLLLKPSQLQSRELSGSRCPEPCDCAPDGALRCPGPRAG...LTH', ProteinAlphabet())t   P70646t   Q63807t   Q63808t   Q63809R
   R   R   R   R   Rï   R   R   RB   RC   RD   RE   t   Rattusi¼  iÓ0 t   31807E73BAC94F1Fi4   i    R¦   i   i   R   R§   i   s-   LUTROPIN-CHORIOGONADOTROPIC HORMONE RECEPTOR.i   RH   ij  s   EXTRACELLULAR (POTENTIAL).i   R   ik  i†  s   1 (POTENTIAL).i   i‡  i  s   CYTOPLASMIC (POTENTIAL).i   i  i¦  s   2 (POTENTIAL).i   i§  i»  i   i¼  iÒ  s   3 (POTENTIAL).i   iÓ  iæ  i	   iç  iý  s   4 (POTENTIAL).i
   iþ  i  i   i  i'  s   5 (POTENTIAL).i   i(  i>  i   i?  iV  s   6 (POTENTIAL).i   iW  ia  i   ib  iw  s   7 (POTENTIAL).i   ix  i   t   REPEATiK   s   LRR 1.i   i~   i–   s   LRR 2.i   i˜   i¯   s   LRR 3.i   i°   iÈ   s   LRR 4.i   iÊ   ià   s   LRR 5.i   iá   iø   s   LRR 6.i   iú   i  s   LRR 7.i   R©   i  s   BY SIMILARITY.i   R¨   ig   s!   N-LINKED (GLCNAC...) (POTENTIAL).i²   iÇ   i   i'  i   i/  i   i=  i   R   iS   i„   s   MISSING (IN ISOFORM 1950).i    i…   i   s   MISSING (IN ISOFORM 1759).i!   i¸   s   MISSING (IN ISOFORM C2).i"   iè   iû   s?   DISSTKLQALPSHGLESIQT -> PCRATGWSPFRRSSPCLPTH (IN ISOFORM 2075).i#   i%  s5   MISSING (IN ISOFORM E/A2, ISOFORM EB AND ISOFORM B1).i$   iü   s   MISSING (IN ISOFORM 2075).i%   i&  io  s¸   QNFSFSIFENFSKQCESTVRKADNETLYSAIFEENELSGWDYDYGFCSPKTLQCAPEPDAFNPCEDIMGYAFLR -> IFHFPFLKTSPNNAKAQLEKQITRRFIPPSLRRMNSVAGIMIMASVHPRHSNVLQNQMLSTPVKILWAMPSLGS (IN ISOFORM B1 AND ISOFORM B3).i&   s   Q -> P (IN ISOFORM C1).i'   s   MISSING (IN ISOFORM C1).i(   iA  iV  s_   YSAIFEENELSGWDYDYGFCSP -> LHGALPAAHCLRGLPNKRPVL (IN ISOFORM 1834, ISOFORM 1759 AND ISOFORM EB).i)   iW  s8   MISSING (IN ISOFORMS 1834, ISOFORM 1759 AND ISOFORM EB).i*   ip  s'   MISSING (IN ISOFORM B1 AND ISOFORM B3).i+   Rs   iR   s   I -> M (IN ISOFORM 1950).i,   i³   s   E -> G (IN ISOFORM 1759).i-   ié   s   I -> T (IN ISOFORM 1950).i.   i†  s   G -> S (IN ISOFORM 1950).i/   t   MUTAGENi™  s'   D->N: SIGNIFICANT REDUCTION OF BINDING.i0   i´  s(   D->N: NO CHANGE IN BINDING OR CAMP PROD.i1   iÇ  s(   E->Q: NO CHANGE IN BINDING OR CAMP PROD.i2   iF  i3   R]   s   R -> L (IN REF. 7).so   McFarland K.C., Sprengel R., Phillips H.S., Koehler M.,
Rosemblit N., Nikolics K., Segaloff D.L., Seeburg P.H.;sd   "Lutropin-choriogonadotropin receptor: an unusual member of the G
protein-coupled receptor family.";R   t   89332512Rò   t   2502842s=   Aatsinki J.T., Pietila E.M., Lakkakorpi J.T., Rajaniemi H.J.;s‹   "Expression of the LH/CG receptor gene in rat ovarian tissue is
regulated by an extensive alternative splicing of the primary
transcript.";t   92347604t   1353463s"   Koo Y.B., Slaughter R.G., Ji T.H.;s`   "Structure of the luteinizing hormone receptor gene and multiple
exons of the coding sequence.";t   91209270t   2019252s%   Bernard M.P., Myers R.V., Moyle W.R.;sS   "Cloning of rat lutropin (LH) receptor analogs lacking the soybean
lectin domain.";t   91006819t   1976554s3   Segaloff D.L., Sprengel R., Nikolics K., Ascoli M.;s9   "Structure of the lutropin/choriogonadotropin receptor.";t   91126285t   2281186s1   Tsai-Morris C.H., Buczko E., Wang W., Dufau M.L.;s„   "Intronic nature of the rat luteinizing hormone receptor gene defines
a soluble receptor subspecies with hormone binding activity.";t   91060531t   2174034s   Roche P.C., Ryan R.J.;sˆ   "Purification, characterization, and amino-terminal sequence of rat
ovarian receptor for luteinizing hormone/human choriogonadotropin.";t   89174723t   2925659s   Ji I., Ji T.H.;s‰   "Asp383 in the second transmembrane domain of the lutropin receptor
is important for high affinity hormone binding and cAMP production.";t   91332007t   1714448N(   i¼  iÓ0 R  (   R¦   i   i   R   R   (   R§   i   i¼  s-   LUTROPIN-CHORIOGONADOTROPIC HORMONE RECEPTOR.R   (   RH   i   ij  s   EXTRACELLULAR (POTENTIAL).R   (   R   ik  i†  s   1 (POTENTIAL).R   (   RH   i‡  i  s   CYTOPLASMIC (POTENTIAL).R   (   R   i  i¦  s   2 (POTENTIAL).R   (   RH   i§  i»  s   EXTRACELLULAR (POTENTIAL).R   (   R   i¼  iÒ  s   3 (POTENTIAL).R   (   RH   iÓ  iæ  s   CYTOPLASMIC (POTENTIAL).R   (   R   iç  iý  s   4 (POTENTIAL).R   (   RH   iþ  i  s   EXTRACELLULAR (POTENTIAL).R   (   R   i  i'  s   5 (POTENTIAL).R   (   RH   i(  i>  s   CYTOPLASMIC (POTENTIAL).R   (   R   i?  iV  s   6 (POTENTIAL).R   (   RH   iW  ia  s   EXTRACELLULAR (POTENTIAL).R   (   R   ib  iw  s   7 (POTENTIAL).R   (   RH   ix  i¼  s   CYTOPLASMIC (POTENTIAL).R   (   R  i4   iK   s   LRR 1.R   (   R  i~   i–   s   LRR 2.R   (   R  i˜   i¯   s   LRR 3.R   (   R  i°   iÈ   s   LRR 4.R   (   R  iÊ   ià   s   LRR 5.R   (   R  iá   iø   s   LRR 6.R   (   R  iú   i  s   LRR 7.R   (   R©   i»  i  s   BY SIMILARITY.R   (   R¨   ig   ig   s!   N-LINKED (GLCNAC...) (POTENTIAL).R   (   R¨   i²   i²   s!   N-LINKED (GLCNAC...) (POTENTIAL).R   (   R¨   iÇ   iÇ   s!   N-LINKED (GLCNAC...) (POTENTIAL).R   (   R¨   i'  i'  s!   N-LINKED (GLCNAC...) (POTENTIAL).R   (   R¨   i/  i/  s!   N-LINKED (GLCNAC...) (POTENTIAL).R   (   R¨   i=  i=  s!   N-LINKED (GLCNAC...) (POTENTIAL).R   (   R   iS   i„   s   MISSING (IN ISOFORM 1950).R   (   R   i…   i   s   MISSING (IN ISOFORM 1759).R   (   R   i¸   i¼  s   MISSING (IN ISOFORM C2).R   (   R   iè   iû   s?   DISSTKLQALPSHGLESIQT -> PCRATGWSPFRRSSPCLPTH (IN ISOFORM 2075).R   (   R   iè   i%  s5   MISSING (IN ISOFORM E/A2, ISOFORM EB AND ISOFORM B1).R   (   R   iü   i¼  s   MISSING (IN ISOFORM 2075).R   (   R   i&  io  s¸   QNFSFSIFENFSKQCESTVRKADNETLYSAIFEENELSGWDYDYGFCSPKTLQCAPEPDAFNPCEDIMGYAFLR -> IFHFPFLKTSPNNAKAQLEKQITRRFIPPSLRRMNSVAGIMIMASVHPRHSNVLQNQMLSTPVKILWAMPSLGS (IN ISOFORM B1 AND ISOFORM B3).R   (   R   i&  i&  s   Q -> P (IN ISOFORM C1).R   (   R   i'  i¼  s   MISSING (IN ISOFORM C1).R   (   R   iA  iV  s_   YSAIFEENELSGWDYDYGFCSP -> LHGALPAAHCLRGLPNKRPVL (IN ISOFORM 1834, ISOFORM 1759 AND ISOFORM EB).R   (   R   iW  i¼  s8   MISSING (IN ISOFORMS 1834, ISOFORM 1759 AND ISOFORM EB).R   (   R   ip  i¼  s'   MISSING (IN ISOFORM B1 AND ISOFORM B3).R   (   Rs   iR   iR   s   I -> M (IN ISOFORM 1950).R   (   Rs   i³   i³   s   E -> G (IN ISOFORM 1759).R   (   Rs   ié   ié   s   I -> T (IN ISOFORM 1950).R   (   Rs   i†  i†  s   G -> S (IN ISOFORM 1950).R   (   R  i™  i™  s'   D->N: SIGNIFICANT REDUCTION OF BINDING.R   (   R  i´  i´  s(   D->N: NO CHANGE IN BINDING OR CAMP PROD.R   (   R  iÇ  iÇ  s(   E->Q: NO CHANGE IN BINDING OR CAMP PROD.R   (   R  iF  iF  s(   D->N: NO CHANGE IN BINDING OR CAMP PROD.R   (   R]   i!   i!   s   R -> L (IN REF. 7).R   (   R   R  (   Rò   R  (   R   R  (   Rò   R  (   R   R  (   Rò   R	  (   R   R
  (   Rò   R  (   R   R  (   Rò   R  (   R   R  (   Rò   R  (   R   R  (   Rò   R  (   R   R  (   Rò   R  (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp011Å  s    

":        
&
c         C   s  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d g ƒ |  j
 | j d d d	 d
 d d d d d g	 ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ |  j
 t | j ƒ d ƒ |  j
 | j d j d ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d S(   s   Parsing SwissProt file sp012t   sp012R   R   t   Q9Y736s
   UBIQUITIN.sV   Seq('MQIFVKTLTGKTITLEVESSDTIDNVKTKIQDKEGIPPDQQRLIFAGKQLEDGR...GGN', ProteinAlphabet())R
   t   Fungit
   Ascomycotat   Pezizomycotinat   Eurotiomycetest
   Onygenalest   Arthrodermataceaes   mitosporic Arthrodermataceaet   Trichophytoni™   iVC  t   01153CF30C2DEDFFi    i   s/   Kano R., Nakamura Y., Watanabe S., Hasegawa A.;s2   "Trichophyton mentagrophytes mRNA for ubiquitin.";i   s   Kano R.;s6   "Microsporum canis mRNA for ubiquitin, complete cds.";N(   i™   iVC  R  (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp012}  sb    

.  
&
c         C   s  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d g ƒ |  j
 | j d d d	 d
 d d d d d d d d g ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ |  j
 t | j ƒ d ƒ |  j
 | j d j d ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d S(   s   Parsing SwissProt file sp013t   sp013R   R   t   P82909s2   MITOCHONDRIAL 28S RIBOSOMAL PROTEIN S36 (MRP-S36).sV   Seq('MGSKMASASRVVQVVKPHTPLIRFPDRRDNPKPNVSEALRSAGLPSHSSVISQH...GPE', ProteinAlphabet())R
   R   R   R   R   Rï   R   R   R   R   R   R   if   iG,  t   83EF107B42E2FCFDi    i   s   Strausberg R.;R   i   s@   Koc E.C., Burkhart W., Blackburn K., Moseley A., Spremulli L.L.;s   "The small subunit of the mammalian mitochondrial ribosome.
Identification of the full complement ribosomal proteins present.";N(   if   iG,  R"  (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp013Ì  s`    

7  
&
c         C   sK  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d d d	 g ƒ |  j
 | j d
 d d d d d d d d d d d g ƒ |  j
 | j dz ƒ |  j
 t | j ƒ d ƒ |  j
 | j d d{ ƒ |  j
 t | j ƒ d ƒ |  j
 | j d j d ƒ |  j
 | j d j d ƒ |  j
 t | j d j ƒ d ƒ |  j
 | j d j d ƒ |  j
 | j d j d  ƒ |  j
 t | j d j ƒ d! ƒ |  j
 | j d j d d| ƒ |  j
 | j d j d d} ƒ |  j
 | j d! j d ƒ |  j
 | j d! j d ƒ |  j
 t | j d! j ƒ d ƒ |  j
 | j d& j d' ƒ |  j
 | j d& j d( ƒ |  j
 t | j d& j ƒ d ƒ |  j
 | j d) j d* ƒ |  j
 | j d) j d+ ƒ |  j
 t | j d) j ƒ d! ƒ |  j
 | j d) j d d~ ƒ |  j
 | j d) j d d ƒ |  j
 | j d. j d/ ƒ |  j
 | j d. j d0 ƒ |  j
 t | j d. j ƒ d! ƒ |  j
 | j d. j d d€ ƒ |  j
 | j d. j d d ƒ |  j
 | j d3 j d4 ƒ |  j
 | j d3 j d5 ƒ |  j
 t | j d3 j ƒ d! ƒ |  j
 | j d3 j d d‚ ƒ |  j
 | j d3 j d dƒ ƒ |  j
 | j d8 j d9 ƒ |  j
 | j d8 j d: ƒ |  j
 t | j d8 j ƒ d ƒ |  j
 | j d; j d< ƒ |  j
 | j d; j d= ƒ |  j
 t | j d; j ƒ d! ƒ |  j
 | j d; j d d„ ƒ |  j
 | j d; j d d… ƒ |  j
 | j d@ j dA ƒ |  j
 | j d@ j dB ƒ |  j
 t | j d@ j ƒ d! ƒ |  j
 | j d@ j d d† ƒ |  j
 | j d@ j d d‡ ƒ |  j
 | j dE j dF ƒ |  j
 | j dE j dG ƒ |  j
 t | j dE j ƒ d! ƒ |  j
 | j dE j d dˆ ƒ |  j
 | j dE j d d‰ ƒ |  j
 | j dJ j dK ƒ |  j
 | j dJ j d ƒ |  j
 t | j dJ j ƒ d ƒ |  j
 | j dL j dM ƒ |  j
 | j dL j dN ƒ |  j
 t | j dL j ƒ d! ƒ |  j
 | j dL j d dŠ ƒ |  j
 | j dL j d d‹ ƒ |  j
 | j dQ j dR ƒ |  j
 | j dQ j dS ƒ |  j
 t | j dQ j ƒ d! ƒ |  j
 | j dQ j d dŒ ƒ |  j
 | j dQ j d d ƒ |  j
 | j dV j dW ƒ |  j
 | j dV j dX ƒ |  j
 t | j dV j ƒ d! ƒ |  j
 | j dV j d dŽ ƒ |  j
 | j dV j d d ƒ |  j
 | j d[ j d\ ƒ |  j
 | j d[ j d] ƒ |  j
 t | j d[ j ƒ d ƒ |  j
 | j d^ j d_ ƒ |  j
 | j d^ j d` ƒ |  j
 t | j d^ j ƒ d! ƒ |  j
 | j d^ j d d ƒ |  j
 | j d^ j d d‘ ƒ |  j
 | j dc j dd ƒ |  j
 | j dc j de ƒ |  j
 t | j dc j ƒ d! ƒ |  j
 | j dc j d d’ ƒ |  j
 | j dc j d d“ ƒ |  j
 | j dh j di ƒ |  j
 | j dh j dj ƒ |  j
 t | j dh j ƒ d! ƒ |  j
 | j dh j d d” ƒ |  j
 | j dh j d d• ƒ |  j
 | j dm j dn ƒ |  j
 | j dm j d ƒ |  j
 t | j dm j ƒ d ƒ |  j
 | j do j dp ƒ |  j
 | j do j dq ƒ |  j
 t | j do j ƒ d! ƒ |  j
 | j do j d d– ƒ |  j
 | j do j d d— ƒ |  j
 | j dt j du ƒ |  j
 | j dt j dv ƒ |  j
 t | j dt j ƒ d! ƒ |  j
 | j dt j d d˜ ƒ |  j
 | j dt j d d™ ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ dy S(š   s   Parsing SwissProt file sp014t   sp014R   R   t   P12166t
   PSBL_ORYSAs>   PHOTOSYSTEM II REACTION CENTER L PROTEIN (PSII 5 KDA PROTEIN).s?   Seq('TQSNPNEQNVELNRTSLYWGLLLIFVLAVLFSNYFFN', ProteinAlphabet())t   P12167t   Q34007R
   RÚ   RÛ   RÜ   RÝ   RÞ   Rß   t   Poalest   Poaceaet   Ehrhartoideaet   Oryzeaet   Oryzai%   i  t   CC537AEC50B2C784i   i    Rq   R   i   s   Sugiura M.;s»   Hiratsuka J., Shimada H., Whittier R., Ishibashi T., Sakamoto M.,
Mori M., Kondo C., Honji Y., Sun C.-R., Meng B.-Y., Li Y.-Q.,
Kanno A., Nishizawa Y., Hirai A., Shinozaki K., Sugiura M.;sÐ   "The complete sequence of the rice (Oryza sativa) chloroplast genome:
intermolecular recombination between distinct tRNA genes accounts for
a major plastid DNA inversion during the evolution of the cereals.";i   R   t   89364698Rò   t   2770692i   s  Shinozaki K., Ohme M., Tanaka M., Wakasugi T., Hayashida N.,
Matsubayashi T., Zaita N., Chunwongse J., Obokata J.,
Yamaguchi-Shinozaki K., Ohto C., Torazawa K., Meng B.Y., Sugita M.,
Deno H., Kamogashira T., Yamada K., Kusuda J., Takaiwa F., Kato A.,
Tohdoh N., Shimada H., Sugiura M.;sl   "The complete nucleotide sequence of the tobacco chloroplast genome:
its gene organization and expression.";i   s   Chaudhuri S., Maliga P.;s„   "Sequences directing C to U editing of the plastid psbL mRNA are
located within a 22 nucleotide segment spanning the editing site.";t   97076156t   8918473i   sQ   Chakhmakhcheva O.G., Andreeva A.V., Buryakova A.A., Reverdatto S.V.,
Efimov V.A.;sW   "Nucleotide sequence of the barley chloroplast psbE, psbF genes and
flanking regions.";t   89240046t   2654886i   sA   Efimov V.A., Andreeva A.V., Reverdatto S.V., Chakhmakhcheva O.G.;sŒ   "Photosystem II of rye. Nucleotide sequence of the psbB, psbC, psbE,
psbF, psbH genes of rye and chloroplast DNA regions adjacent to
them.";t   92207253t   1804121i   s;   Webber A.N., Hird S.M., Packman L.C., Dyer T.A., Gray J.C.;sŒ   "A photosystem II polypeptide is encoded by an open reading frame
co-transcribed with genes for cytochrome b-559 in wheat chloroplast
DNA.";i   s;   Kudla J., Igloi G.L., Metzlaff M., Hagemann R., Koessel H.;s   "RNA editing in tobacco chloroplasts leads to the formation of a
translatable psbL mRNA by a C to U substitution within the initiation
codon.";t   92191997t   1547774i	   s   Zolotarev A.S., Kolosov V.L.;s[   "Nucleotide sequence of the rye chloroplast DNA fragment, comprising
psbE and psbF genes.";t   89160331t   2646599i
   s<   Kolosov V.L., Klezovich O.N., Abdulaev N.G., Zolotarev A.S.;se   "Photosystem II of rye. Nucleotide sequence of genes psbE, psbF, psbL
and OPC40 of chloroplast DNA.";t   90073796t   2686655i   s   Haley J., Bogorad L.;i   s2   Maier R.M., Neckermann K., Igloi G.L., Koessel H.;s˜   "Complete sequence of the maize chloroplast genome: gene content,
hotspots of divergence and fine tuning of genetic information by
transcript editing.";t   95395841t   7666415i   s   Willey D.L., Gray J.C.;s|   "Two small open reading frames are co-transcribed with the pea
chloroplast genes for the polypeptides of cytochrome b-559.";t   89354671t   2766383i   s+   Bock R., Hagemann R., Koessel H., Kudla J.;sŒ   "Tissue- and stage-specific modulation of RNA editing of the psbF and
psbL transcript from spinach plastids -- a new regulatory mechanism?";t   93360903t   8355656i   s<   Hermann R.G., Alt J., Schiller B., Widger W.R., Cramer W.A.;s•   "Nucleotide sequence of the gene for apocytochrome b-559 on the
spinach plastid chromosome: implications for the structure of the
membrane protein.";i   s,   Kuntz M., Camara B., Weil J.-H., Schantz R.;sx   "The psbL gene from bell pepper (Capsicum annuum): plastid RNA
editing also occurs in non-photosynthetic chromoplasts.";t   93099270t   1463853i   s   Forsthoefel N.R., Cushman J.C.;s¨   "Characterization and expression of photosystem II genes (psbE, psbF,
and psbL) from the facultative crassulacean acid metabolism plant
Mesembryanthemum crystallinum.";t   94345017t   8066140i   s+   Kubo T., Yanai Y., Kinoshita T., Mikami T.;s   "The chloroplast trnP-trnW-petG gene cluster in the mitochondrial
genomes of Beta vulgaris, B. trigyna and B. webbiana: evolutionary
aspects.";t   95254673t   7736615i   s   Naithani S.;i   s   Ikeuchi M., Takio K., Inoue Y.;s”   "N-terminal sequencing of photosystem II low-molecular-mass proteins.
5 and 4.1 kDa components of the O2-evolving core complex from higher
plants.";t   89121082t   2644131i   s9   Zheleva D., Sharma J., Panico M., Morris H.R., Barber J.;si   "Isolation and characterization of monomeric and dimeric
CP47-reaction center photosystem II complexes.";t   98298118t   9632665N(   i%   i  R.  (   Rq   i    i    R   R   (   R   R/  (   Rò   R0  (   R   R1  (   Rò   R2  (   R   R3  (   Rò   R4  (   R   R5  (   Rò   R6  (   R   R7  (   Rò   R8  (   R   R9  (   Rò   R:  (   R   R;  (   Rò   R<  (   R   R=  (   Rò   R>  (   R   R?  (   Rò   R@  (   R   RA  (   Rò   RB  (   R   RC  (   Rò   RD  (   R   RE  (   Rò   RF  (   R   RG  (   Rò   RH  (   R   RI  (   Rò   RJ  (   R   RK  (   Rò   RL  (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R0   R1   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp014  s>   

7                      
&
c         C   sr  d } t  j j d | ƒ } t | ƒ } t j | d ƒ } | j ƒ  |  j t | t	 ƒ ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ t | ƒ } t j | ƒ } | j ƒ  |  j
 | j d ƒ |  j
 | j d g ƒ |  j
 | j d d	 d
 d d d d d d d d d g ƒ |  j
 | j d ƒ |  j
 t | j ƒ d ƒ |  j
 t | j ƒ d ƒ |  j
 | j j ƒ  | j ƒ |  j
 | j | j ƒ |  j
 | j | j ƒ |  j | j | j k ƒ t | ƒ } t t j | d ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t	 ƒ ƒ |  j
 | d j j ƒ  | j j ƒ  ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ t | ƒ } t t j | ƒ ƒ } | j ƒ  |  j
 t | ƒ d ƒ |  j t | d t j ƒ ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ |  j
 | d j | j ƒ d S(   s   Parsing SwissProt file sp015t   sp015R   R   t   IPI00383150s   IPI00383150.2R   sV   Seq('MSFQAPRRLLELAGQSLLRDQALAISVLDELPRELFPRLFVEAFTSRRCEVLKV...TPC', ProteinAlphabet())R
   R   R   R   R   Rï   R   R   R   R   R   R   iÉ  ixÎ  t   5C3151AAADBDE232i    i   N(   iÉ  ixÎ  RP  (   R   R   R   R   R    R   R    R!   R"   R   R#   R$   R%   R&   R'   R(   R   R)   R*   R+   R,   R-   R.   R/   R2   R3   R4   R5   R6   (   R7   R8   R9   R:   R;   R<   R=   (    (    s   test_SwissProt.pyt
   test_sp015ú  sR    

7
&
(   t   __name__t
   __module__R>   RK   Rg   Rw   Rƒ   R‘   R—   RÖ   Rå   Rø   R  R  R#  RM  RQ  (    (    (    s   test_SwissProt.pyR      s    	R	W	š	j	S	Q	Z	ÿ ;	M	„	¸	O	O	ßt   __main__t	   verbosityi   t
   testRunner(   t   __doc__R   t   unittestt   BioR    R   t   Bio.SeqRecordR   t   TestCaseR   RR  t   TextTestRunnert   runnert   main(    (    (    s   test_SwissProt.pyt   <module>   s    ÿ ÿ ÿ ÿ ÿ ÿ ÿ ;