ó
jÜxec           @   sf   d  d l  m Z d  d l Z d  d l Z d  d l m Z m Z e d e
 d „ ƒ Z e d „  ƒ Z d S(   iÿÿÿÿ(   t   divisionN(   t   checkt   handle_errorsc   	      C   s	  t  t |  t j ƒ d t |  ƒ ƒ |  j ƒ  } t  | d d d h k d | ƒ t  t | t ƒ d t | ƒ ƒ | s— t  t j	 | j
 ƒ j ƒ  d ƒ n  t  | j d d k d	 ƒ t  | j d d k d
 ƒ x t | j ƒ D]n \ } } t | ƒ } t  t | t ƒ d | | f ƒ t  t | ƒ d k d | t | ƒ t | ƒ f d ƒ qÛ Wt | j ƒ } | j ƒ  | | } d | j _ y | j j t ƒ } Wn t k
 r²t  t d ƒ n Xt  t | | j k ƒ d ƒ t  t t | j ƒ ƒ t | j ƒ k d ƒ | d k r+t  t |  j
 j ƒ  d k ƒ d ƒ nÚ | d k röt  t |  j
 j ƒ  d k ƒ d ƒ |  j d d ƒ j
 } t  t t j | d ƒ ƒ d ƒ t t j | d ƒ ƒ sd GH|  j
 | d d … t j f |  j  d d … d d … f <|  j ƒ  } qn | d k rn  | S(   sð  
    Checks to make sure that the input dataframe, df, represents a valid
    matrix, i.e., an object that can be displayed as a logo.

    parameters
    ----------

    df: (dataframe)
        A pandas dataframe where each row represents an (integer) position
        and each column represents to a (single) character.

    matrix_type: (None or str)
        If 'probability', validates df as a probability matrix, i.e., all
        elements are in [0,1] and rows are normalized). If 'information',
        validates df as an information matrix, i.e., all elements >= 0.

    allow_nan: (bool)
        Whether to allow NaN entries in the matrix.

    returns
    -------
    out_df: (dataframe)
        A cleaned-up version of df (if possible).
    s<   out_df needs to be a valid pandas out_df, out_df entered: %st   probabilityt   informations?   matrix_type = %s; must be None, "probability", or "information"s+   allow_nan must be of type bool; is type %s.sF   some matrix elements are not finite. Set allow_nan=True to allow this.i    i   s&   df has zero rows. Needs multiple rows.s,   df has zero columns. Needs multiple columns.s-   column number %d is of type %s; must be a strs#   column %d is %s and has length %d; s   must have length 1.t   poss_   could not convert df.index to type int. Check that all positions have integer numerical values.sr   could not convert df.index values to int without changingsome values. Make sure that df.index values are integers.s@   not all values of df.index are unique. Make sure all are unique.s   not all values in df are >=0.t   axisg        s&   some columns in df sum to nearly zero.g      ð?sM   in validate_matrix(): Row sums in df are not close to 1. Reormalizing rows...N(!   R   t
   isinstancet   pdt	   DataFramet   typet   copyt   Nonet   boolt   npt   isfinitet   valuest   allt   shapet	   enumeratet   columnst   strt   lent   reprt   listt   sortt   indext   namet   astypet   intt	   TypeErrort   Falset   sett   ravelt   sumt   anyt   iscloset   newaxist   loc(	   t   dft   matrix_typet	   allow_nant   out_dft   it   colt	   char_colst	   int_indext   sums(    (    s;   /tmp/pip-install-l3LICk/logomaker/logomaker/src/validate.pyt   validate_matrix   sb    


$%9c         C   sÆ   t  |  d t ƒ} t t | j j ƒ  d k ƒ d ƒ | j d d ƒ j } t t t j	 | d ƒ ƒ d ƒ t t j	 | d ƒ ƒ sÂ d	 GH| j | d
 d
 … t j
 f | j d
 d
 … d
 d
 … f <n  | S(   s  
    Verifies that the input dataframe df indeed represents a
    probability matrix. Renormalizes df with a text warning if it is not
    already normalized. Throws an error if df cannot be reliably normalized.

    parameters
    ----------

    df: (dataframe)
        A pandas dataframe where each row represents an (integer) position
        and each column represents to a (single) character.

    returns
    -------
    prob_df: (dataframe)
        A cleaned-up and normalized version of df (if possible).
    R)   i    s   not all values in df are >=0.R   i   g        s+   some columns in prob_df sum to nearly zero.g      ð?sV   in validate_probability_mat(): Row sums in df are not close to 1. Reormalizing rows...N(   R0   R   R   R   R   R!   R"   R#   R   R$   R%   R&   (   R'   t   prob_dfR/   (    (    s;   /tmp/pip-install-l3LICk/logomaker/logomaker/src/validate.pyt   validate_probability_mat„   s    <(   t
   __future__R    t   numpyR   t   pandasR   t   logomaker.src.error_handlingR   R   R   R   R0   R2   (    (    (    s;   /tmp/pip-install-l3LICk/logomaker/logomaker/src/validate.pyt   <module>   s   |