3
i\>                 @   sJ   d dl mZ d dlZd dlZd dlmZmZ ed	ddZ	edd Z
dS )
    )divisionN)checkhandle_errorsFc       	      C   s<  t t| tjdt|   | j }t |dkd|  t t|tdt|  |sft tj|j	j
 d t |jd d	kd
 t |jd	 d	kd x^t|jD ]P\}}t|}t t|td||f  t t|d	kd|t|t|f d  qW t|j}|j  || }d|j_y|jjt}W n  tk
r@   t dd Y nX t t
||jkd t tt|jt|jkd |dkrt t
| j	j dkd n|dkr.t t
| j	j dkd | jd	dj	}t ttj|d d t
tj|ds8td | j	|ddtjf  | jddddf< | j }n
|dkr8|S )a  
    Checks to make sure that the input dataframe, df, represents a valid
    matrix, i.e., an object that can be displayed as a logo.

    parameters
    ----------

    df: (dataframe)
        A pandas dataframe where each row represents an (integer) position
        and each column represents to a (single) character.

    matrix_type: (None or str)
        If 'probability', validates df as a probability matrix, i.e., all
        elements are in [0,1] and rows are normalized). If 'information',
        validates df as an information matrix, i.e., all elements >= 0.

    allow_nan: (bool)
        Whether to allow NaN entries in the matrix.

    returns
    -------
    out_df: (dataframe)
        A cleaned-up version of df (if possible).
    z<out_df needs to be a valid pandas out_df, out_df entered: %sNprobabilityinformationz?matrix_type = %s; must be None, "probability", or "information"z+allow_nan must be of type bool; is type %s.zFsome matrix elements are not finite. Set allow_nan=True to allow this.r      z&df has zero rows. Needs multiple rows.z,df has zero columns. Needs multiple columns.z-column number %d is of type %s; must be a strz#column %d is %s and has length %d; zmust have length 1.posFz_could not convert df.index to type int. Check that all positions have integer numerical values.zrcould not convert df.index values to int without changingsome values. Make sure that df.index values are integers.z@not all values of df.index are unique. Make sure all are unique.znot all values in df are >=0.)axisg        z&some columns in df sum to nearly zero.g      ?zMin validate_matrix(): Row sums in df are not close to 1. Reormalizing rows...>   r   Nr   ) r   
isinstancepd	DataFrametypecopyboolnpisfinitevaluesallshape	enumeratecolumnsstrlenreprlistsortindexnameastypeint	TypeErrorsetravelsumanyiscloseprintnewaxisloc)	dfmatrix_type	allow_nanout_dficolZ	char_cols	int_indexsums r1   ../../logomaker/src/validate.pyvalidate_matrix   s`    




*

r3   c             C   s   t | dd}tt|jj dkd |jddj}tttj|d d ttj|d	st	d
 |j|ddtj
f  |jddddf< |S )a  
    Verifies that the input dataframe df indeed represents a
    probability matrix. Renormalizes df with a text warning if it is not
    already normalized. Throws an error if df cannot be reliably normalized.

    parameters
    ----------

    df: (dataframe)
        A pandas dataframe where each row represents an (integer) position
        and each column represents to a (single) character.

    returns
    -------
    prob_df: (dataframe)
        A cleaned-up and normalized version of df (if possible).
    F)r+   r   znot all values in df are >=0.r   )r	   g        z+some columns in prob_df sum to nearly zero.g      ?zVin validate_probability_mat(): Row sums in df are not close to 1. Reormalizing rows...N)r3   r   r   r   r"   r#   r$   r   r%   r&   r'   r(   )r)   prob_dfr0   r1   r1   r2   validate_probability_mat   s    *r5   )NF)
__future__r   numpyr   pandasr   logomaker.src.error_handlingr   r   r3   r5   r1   r1   r1   r2   <module>   s   |