ó
áp7]c           @  s›   d  Z  d d l m Z m Z d d l Z d d l Z d d l m	 Z	 d d l
 m Z m Z d „  Z d e f d „  ƒ  YZ e e e e e e d	 d
 „ Z d S(   sL   Principal Component Analysis

Author: josef-pktd
Modified by Kevin Sheppard
iÿÿÿÿ(   t   print_functiont   divisionN(   t   range(   t   ValueWarningt   EstimationWarningc         C  s   t  j t  j |  |  ƒ ƒ S(   N(   t   npt   sqrtt   sum(   t   x(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyt   _norm   s    t   PCAc           B  sï   e  Z d  Z d e e e e d d d d d d d d „ Z d „  Z d „  Z d „  Z	 d	 „  Z
 d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d e e d „ Z d „  Z d e e d d „ Z d d d „ Z RS(   s>  
    Principal Component Analysis

    Parameters
    ----------
    data : array-like
        Variables in columns, observations in rows
    ncomp : int, optional
        Number of components to return.  If None, returns the as many as the
        smaller of the number of rows or columns in data
    standardize: bool, optional
        Flag indicating to use standardized data with mean 0 and unit
        variance.  standardized being True implies demean.  Using standardized
        data is equivalent to computing principal components from the
        correlation matrix of data
    demean : bool, optional
        Flag indicating whether to demean data before computing principal
        components.  demean is ignored if standardize is True. Demeaning data
        but not standardizing is equivalent to computing principal components
        from the covariance matrix of data
    normalize : bool , optional
        Indicates whether th normalize the factors to have unit inner product.
        If False, the loadings will have unit inner product.
    weights : array, optional
        Series weights to use after transforming data according to standardize
        or demean when computing the principal components.
    gls : bool, optional
        Flag indicating to implement a two-step GLS estimator where
        in the first step principal components are used to estimate residuals,
        and then the inverse residual variance is used as a set of weights to
        estimate the final principal components.  Setting gls to True requires
        ncomp to be less then the min of the number of rows or columns
    method : str, optional
        Sets the linear algebra routine used to compute eigenvectors
        'svd' uses a singular value decomposition (default).
        'eig' uses an eigenvalue decomposition of a quadratic form
        'nipals' uses the NIPALS algorithm and can be faster than SVD when
        ncomp is small and nvars is large. See notes about additional changes
        when using NIPALS
    tol : float, optional
        Tolerance to use when checking for convergence when using NIPALS
    max_iter : int, optional
        Maximum iterations when using NIPALS
    missing : string
        Method for missing data.  Choices are
        'drop-row' - drop rows with missing values
        'drop-col' - drop columns with missing values
        'drop-min' - drop either rows or columns, choosing by data retention
        'fill-em' - use EM algorithm to fill missing value.  ncomp should be
        set to the number of factors required
    tol_em : float
        Tolerance to use when checking for convergence of the EM algorithm
    max_em_iter : int
        Maximum iterations for the EM algorithm

    Attributes
    ----------
    factors : array or DataFrame
        nobs by ncomp array of of principal components (scores)
    scores :  array or DataFrame
        nobs by ncomp array of of principal components - identical to factors
    loadings : array or DataFrame
        ncomp by nvar array of  principal component loadings for constructing
        the factors
    coeff : array or DataFrame
        nvar by ncomp array of  principal component loadings for constructing
        the projections
    projection : array or DataFrame
        nobs by var array containing the projection of the data onto the ncomp
        estimated factors
    rsquare : array or Series
        ncomp array where the element in the ith position is the R-square
        of including the fist i principal components.  Note: values are
        calculated on the transformed data, not the original data
    ic : array or DataFrame
        ncomp by 3 array containing the Bai and Ng (2003) Information
        criteria.  Each column is a different criteria, and each row
        represents the number of included factors.
    eigenvals : array or Series
        nvar array of eigenvalues
    eigenvecs : array or DataFrame
        nvar by nvar array of eigenvectors
    weights : array
        nvar array of weights used to compute the principal components,
        normalized to unit length
    transformed_data : array
        Standardized, demeaned and weighted data used to compute
        principal components and related quantities
    cols : array
        Array of indices indicating columns used in the PCA
    rows : array
        Array of indices indicating rows used in the PCA

    Examples
    --------
    Basic PCA using the correlation matrix of the data

    >>> import numpy as np
    >>> from statsmodels.multivariate.pca import PCA
    >>> x = np.random.randn(100)[:, None]
    >>> x = x + np.random.randn(100, 100)
    >>> pc = PCA(x)

    Note that the principal components are computed using a SVD and so the
    correlation matrix is never constructed, unless method='eig'.

    PCA using the covariance matrix of the data

    >>> pc = PCA(x, standardize=False)

    Limiting the number of factors returned to 1 computed using NIPALS

    >>> pc = PCA(x, ncomp=1, method='nipals')
    >>> pc.factors.shape
    (100, 1)

    Notes
    -----
    The default options perform principal component analysis on the
    demeaned, unit variance version of data.  Setting standardize to False
    will instead only demean, and setting both standardized and
    demean to False will not alter the data.

    Once the data have been transformed, the following relationships hold when
    the number of components (ncomp) is the same as tne minimum of the number
    of observation or the number of variables.

    .. math:

        X' X = V \Lambda V'

    .. math:

        F = X V

    .. math:

        X = F V'

    where X is the `data`, F is the array of principal components (`factors`
    or `scores`), and V is the array of eigenvectors (`loadings`) and V' is
    the array of factor coefficients (`coeff`).

    When weights are provided, the principal components are computed from the
    modified data

    .. math:

        \Omega^{-\frac{1}{2}} X

    where :math:`\Omega` is a diagonal matrix composed of the weights. For
    example, when using the GLS version of PCA, the elements of :math:`\Omega`
    will be the inverse of the variances of the residuals from

    .. math:

        X - F V'

    where the number of factors is less than the rank of X

    .. [*] J. Bai and S. Ng, "Determining the number of factors in approximate
       factor models," Econometrica, vol. 70, number 1, pp. 191-221, 2002
    t   svdgH¯¼šò×j>iè  id   c         C  sÐ  d  |  _ g  |  _ t | t j ƒ r? | j |  _ | j |  _ n  t j	 | ƒ |  _
 | |  _ | |  _ |
 |  _ d |  j k  o† d k  n sš t d ƒ ‚ n  | |  _ | |  _ | |  _ | |  _ | |  _ |  j
 j \ |  _ |  _ | d  k r t j |  j ƒ } nW t j | ƒ j ƒ  } | j d |  j k r:t d ƒ ‚ n  | t j | d j ƒ  ƒ } | |  _ t |  j |  j ƒ } | d  k r‡| n | |  _ |  j | k rÍd d  l } d } | j  | t! ƒ | |  _ n  | |  _" |  j" d k rýt d j# | ƒ ƒ ‚ n  t j$ |  j ƒ |  _% t j$ |  j ƒ |  _& |	 |  _' |  j
 |  _( |	 d  k	 rÓ|  j) ƒ  |  j( j \ |  _ |  _ |  j t j |  j
 j ƒ k r£t j |  j( j ƒ |  _ qÓ|  j t j |  j( j ƒ k rÓt d ƒ ‚ qÓn  d |  _* d  |  _+ d  |  _, d  |  _- d  |  _. d  |  _/ d  |  _0 d  |  _1 |  _2 d  |  _3 d  |  _4 d  |  _5 d  |  _6 d  |  _7 d  |  _8 d  |  _9 |  j: ƒ  |  _, |  j; ƒ  | r¦|  j< ƒ  |  j: ƒ  |  _, |  j; ƒ  n  |  j= ƒ  |  j d  k	 rÌ|  j> ƒ  n  d  S(   Ni    i   s$   tol must be strictly between 0 and 1s!   weights should have nvar elementsg       @iÿÿÿÿs¥   The requested number of components is more than can be computed from data. The maximum number of components is the minimum of the number of observations or variablest   eigR   t   nipalss   method {0} is not known.s   When adjusting for missing values, user provided ncomp must be no larger than the smallest dimension of the missing-value-adjusted data size.g        (   R   R   R   (?   t   Nonet   _indext   _columnst
   isinstancet   pdt	   DataFramet   indext   columnsR   t   asarrayt   datat   _glst
   _normalizet   _tolt
   ValueErrort	   _max_itert   _max_em_itert   _tol_emt   _standardizet   _demeant   shapet   _nobst   _nvart   onest   arrayt   flattenR   t   meant   weightst   mint   _ncompt   warningst   warnR   t   _methodt   formatt   aranget   rowst   colst   _missingt   _adjusted_datat   _adjust_missingt   _tsst   _esst   transformed_datat   _mut   _sigmat
   _ess_indivt
   _tss_indivt   scorest   factorst   loadingst   coefft	   eigenvalst	   eigenvecst
   projectiont   rsquaret   ict   _prepare_datat   _pcat   _compute_gls_weightst   _compute_rsquare_and_ict
   _to_pandas(   t   selfR   t   ncompt   standardizet   demeant	   normalizet   glsR(   t   methodt   missingt   tolt   max_itert   tol_emt   max_em_itert   min_dimR+   R,   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyt   __init__¹   sŠ    													
														


c   
      C  s×  d „  } d „  } |  j  d k rb | |  j ƒ \ |  _ } t j | ƒ d |  _ |  j | |  _ n|  j  d k r¢ | |  j ƒ \ |  _ } t j | ƒ d |  _ nØ |  j  d k rM| |  j ƒ \ } } | j } | |  j ƒ \ } } | j }	 |	 | k r| |  _ t j | ƒ d |  _ qz| |  _ |  j | |  _ t j | ƒ d |  _ n- |  j  d k rn|  j	 ƒ  |  _ n t
 d ƒ ‚ |  j d
 k	 r²|  j |  j |  _ |  j |  j |  _ n  |  j j d k rÓt
 d	 ƒ ‚ n  d
 S(   sE   
        Implements alternatives for handling missing values
        c         S  s>   t  j t  j t  j |  ƒ d ƒ ƒ } |  d  d  … | f | f S(   Ni    (   R   t   logical_nott   anyt   isnan(   R   R   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyt   keep_col"  s    $c         S  s>   t  j t  j t  j |  ƒ d ƒ ƒ } |  | d  d  … f | f S(   Ni   (   R   RX   RY   RZ   (   R   R   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyt   keep_row&  s    $s   drop-coli    s   drop-rows   drop-mins   fill-ems   missing method is not known.s2   Removal of missing values has eliminated all data.N(   R2   R   R3   R   t   whereR1   R(   R0   t   sizet   _fill_missing_emR   R   R   R   (
   RJ   R[   R\   R   t   drop_colt   drop_col_indext   drop_col_sizet   drop_rowt   drop_row_indext   drop_row_size(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyR4     s8    						c   	      C  s  |  j  t j |  j ƒ } |  j |  j k r: t d ƒ ‚ n  | d j d ƒ } d | } | t j | d j ƒ  ƒ } |  j } d t	 | | j	 ƒ  d ƒ | } | d k  rû t
 t j | | ƒ ƒ } d d l } d j d	 | d
 | ƒ } | j | t ƒ n  | |  _ d S(   sF   
        Computes GLS weights based on percentage of data fit
        sO   gls can only be used when ncomp < nvar so that residuals have non-zero varianceg       @i    g      ð?gš™™™™™¹?iÿÿÿÿNs‰   Many series are being down weighted by GLS. Of the {original} series, the GLS estimates are based on only {effective} (effective) series.t   originalt	   effective(   R7   R   R   RB   R*   R#   R   R'   R   R   t   intt   roundR+   R.   R,   R   R(   (	   RJ   t   errorst   varR(   t   nvart   eff_series_perct
   eff_seriesR+   R,   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRG   M  s    
	"	c         C  s'   |  j  ƒ  |  j ƒ  |  j ƒ  |  _ d S(   s"   
        Main PCA routine
        N(   t   _compute_eigt   _compute_pca_from_eigt   projectRB   (   RJ   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRF   f  s    

c         C  s8   |  j  ƒ  } | d  } | d t t |  ƒ ƒ d 7} | S(   Niÿÿÿÿs   , id: t   )(   t   __str__t   hext   id(   RJ   t   string(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyt   __repr__n  s    
c         C  sñ   d } | d t  |  j ƒ d 7} | d t  |  j ƒ d 7} |  j rN d } n |  j r` d } n d } | d | d 7} |  j rŽ | d	 7} n  | d
 t  |  j ƒ d 7} | d t  |  j ƒ d 7} | |  j d k rÜ d n d 7} | d 7} | S(   Ns   Principal Component Analysis(s   nobs: s   , s   nvar: s   Standardize (Correlation)s   Demean (Covariance)R   s   transformation: s   GLS, s   normalization: s   number of components: R   s   method: t
   Eigenvaluet   SVDRr   s   method: Eigenvalue(	   t   strR"   R#   R   R    R   R   R*   R-   (   RJ   Rv   t   kind(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRs   t  s     					
c         C  sØ   |  j  } t j t j | ƒ ƒ rA t j | j d ƒ j t j ƒ St j | d d ƒ|  _	 t j
 t j | |  j	 d d d ƒƒ |  _ |  j r¥ | |  j	 |  j } n |  j r¾ | |  j	 } n | } | t j
 |  j ƒ S(   s-   
        Standardize or demean data.
        i   t   axisi    g       @(   R3   R   t   allRZ   t   emptyR!   t   fillt   nant   nanmeanR8   R   R9   R   R    R(   (   RJ   t   adj_dataR   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRE   ‡  s    	 ,		c         C  s@   |  j  d k r |  j ƒ  S|  j  d k r2 |  j ƒ  S|  j ƒ  Sd S(   sz   
        Wrapper for actual eigenvalue method

        This is a workaround to avoid instance methods in __dict__
        R   R   N(   R-   t   _compute_using_eigt   _compute_using_svdt   _compute_using_nipals(   RJ   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRo   ™  s
    

c         C  sA   |  j  } t j j | ƒ \ } } } | d |  _ | j |  _ d S(   s/   SVD method to compute eigenvalues and eigenvecsg       @N(   R7   R   t   linalgR   R@   t   TRA   (   RJ   R   t   ut   st   v(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyR„   ¦  s    	c         C  s7   |  j  } t j j | j j | ƒ ƒ \ |  _ |  _ d S(   sY   
        Eigenvalue decomposition method to compute eigenvalues and eigenvectors
        N(   R7   R   R†   t   eighR‡   t   dotR@   RA   (   RJ   R   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRƒ   ­  s    	c         C  sÔ  |  j  } |  j d k r% | d } n  |  j |  j |  j } } } t j |  j ƒ } t j |  j |  j f ƒ } xLt | ƒ D]>} t j | j	 d ƒ ƒ } | d d … | g f }	 d }
 d } x¦ | | k rg|
 | k  rg| j
 j |	 ƒ |	 j
 j |	 ƒ } | t j | j
 j | ƒ ƒ } |	 } | j | ƒ | j
 j | ƒ }	 t |	 | ƒ t |	 ƒ } |
 d 7}
 qÂ W|	 d j ƒ  | | <| | d d … | g f <| d k r| | |	 j | j
 ƒ 8} q| q| W| |  _ | |  _ d S(   sg   
        NIPALS implementation to compute small number of eigenvalues
        and eigenvectors
        i   g        i    Ng      ð?i   (   R7   R*   R   R   R   t   zerosR#   R   t   argmaxRk   R‡   RŒ   R   R	   R   R@   RA   (   RJ   R   RR   RS   RK   t   valst   vecst   it   max_var_indt   factort   _itert   difft   vect   factor_last(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyR…   ´  s0    	"	c         C  së  t  j t  j |  j ƒ ƒ } t  j | ƒ r1 |  j St  j |  j ƒ  ƒ } |  _ |  j } t  j	 | d ƒ } t  j	 | d ƒ } t  j
 | | k  ƒ s¤ t  j
 | | k  ƒ r³ t d ƒ ‚ n  t  j | ƒ } t  j | d ƒ } t  j |  j d f ƒ | } | | }	 |	 | | <d }
 d } x¤ |
 |  j k r¶| |  j k  r¶|	 } | |  _ |  j ƒ  |  j ƒ  t  j |  j d t d t ƒ ƒ } | | }	 |	 | | <| |	 } t | ƒ t |	 ƒ }
 | d 7} qW|  j d } t  j |  j ƒ  ƒ } | | | | <| S(   s5   
        EM algorithm to fill missing values
        i   i    s\   Implementation requires that all columns and all rows have at least ncomp non-missing valuesg      ð?t	   transformt   unweightg        (   R   RX   RZ   R   R}   R   RE   R7   R*   R   RY   R   R   R$   R"   R   R   Ro   Rp   Rq   t   FalseR	   R3   (   RJ   t   non_missingR   RK   t   col_non_missingt   row_non_missingt   maskt   muRB   t   projection_maskedR•   R”   t   last_projection_maskedt   delta(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyR_   Ô  s@    	*

!	




c         C  s¦  |  j  |  j } } t j | ƒ } | d d d … } | | } | d d … | f } | d k j ƒ  rã | j d | d k j ƒ  } | |  j k  rã d d l } | j	 d j
 d | ƒ t ƒ | |  _ t j t j ƒ j | | )qã n  | |  j  } | d d … d |  j … f } | | |  _  |  _ |  j j | ƒ |  _ |  _ | |  _ | j |  _ |  j r¢|  j j t j | ƒ j |  _ |  j t j | ƒ _ |  j |  _ n  d S(   sR   
        Compute relevant statistics after eigenvalues have been computed
        Niÿÿÿÿi    sg   Only {num:d} eigenvalues are positive.  This is the maximum number of components that can be extracted.t   num(   R@   RA   R   t   argsortRY   R!   R   R*   R+   R,   R.   R   t   finfot   float64t   tinyR7   RŒ   R<   R=   R>   R‡   R?   R   R   (   RJ   R   R   t   indicest   num_goodR+   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRp     s0    
			c         C  sH  |  j  } |  j t j | ƒ } t j | d d ƒ |  _ t j |  j ƒ |  _ t j |  j d ƒ |  _	 t j |  j d |  j
 f ƒ |  _ x t |  j d ƒ D]z } |  j d | d t d t ƒ } | d j d d ƒ } | j ƒ  } |  j | |  j	 | <|  j | |  j | d d … f <qœ Wd	 |  j	 |  j |  _ |  j	 } | d k } | j ƒ  rxt j | ƒ d j ƒ  }	 | |	  } n  t j | ƒ }
 t j | j d ƒ } |  j |  j
 } } | | | | } t | | ƒ } t j | t j d	 | ƒ | t j | ƒ t j | ƒ | g ƒ } | d d … d f } |
 | | } | j |  _ d S(
   s-   
        Final statistics to compute
        i   i    i   RK   R˜   R™   R|   Ng      ð?(   R(   R7   R   R   R   R;   R5   R   R*   R6   R#   R:   R   Rq   Rš   RC   RY   R]   R)   t   logR/   R!   R"   R%   R   R‡   RD   (   RJ   R(   t   ss_dataR‘   RB   t	   indiv_rsst   rsst   esst   invalidt   last_obst   log_esst   rt   nobsRl   t   sum_to_prodRV   t	   penaltiesRD   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRH   1  s:    	"$	c         C  s8  | d k r |  j n | } | |  j k r9 t d ƒ ‚ n  t j |  j ƒ } t j |  j ƒ } | d d … d | … f j | d | … d d … f ƒ } | s¤ | r½ | t j |  j	 ƒ 9} n  | r|  j
 rÜ | |  j 9} n  |  j
 sî |  j r| |  j 7} qn  |  j d k	 r4t j | d |  j d |  j ƒ} n  | S(   sÚ  
        Project series onto a specific number of factors

        Parameters
        ----------
        ncomp : int, optional
            Number of components to use.  If omitted, all components
            initially computed are used.

        Returns
        -------
        projection : array
            nobs by nvar array of the projection onto ncomp factors
        transform : bool
            Flag indicating whether to return the projection in the original
            space of the data (True, default) or in the space of the
            standardized/demeaned data
        unweight : bool
            Flag indicating whether to undo the effects of the estimation
            weights

        Notes
        -----
        s=   ncomp must be smaller than the number of components computed.NR   R   (   R   R*   R   R   R   R=   R?   RŒ   R   R(   R   R9   R    R8   R   R   R   R   (   RJ   RK   R˜   R™   R=   R?   RB   (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRq   Z  s$    ;		c         C  sü  |  j  } t j t j |  j ƒ ƒ } d t t | ƒ ƒ d } g  t |  j ƒ D] } | j | ƒ ^ qN } t	 j
 |  j d | d | ƒ} | |  _ |  _ t	 j
 |  j d |  j d | ƒ} | |  _ t	 j
 |  j d | d |  j ƒ} | |  _ t	 j
 |  j d |  j d | ƒ} | |  _ t	 j |  j ƒ |  _ d |  j _ | j d d ƒ } g  t |  j j d ƒ D] } | j | ƒ ^ q_} t	 j
 |  j d | ƒ|  _ t	 j |  j ƒ |  _ d	 |  j j _ d
 |  j _ t	 j
 |  j d d d d g ƒ|  _ d	 |  j j _ d S(   s:   
        Returns pandas DataFrames for all values
        s	   comp_{0:0s   d}R   R   R@   t   compt   eigenveci   RK   RC   t   IC_p1t   IC_p2t   IC_p3N(   R   R   t   ceilt   log10R*   Rz   Rh   R   R.   R   R   R=   R<   RB   R   R?   R>   t   SeriesR@   t   namet   replaceRA   R!   RC   R   RD   (   RJ   R   t	   num_zerost   comp_strR‘   R1   t   dft   vec_str(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyRI   Š  s4    	+					2$c         C  s
  d d l  j j } | j | ƒ \ } } | d k r< |  j n | } t j |  j ƒ } | |  j  } | ry t j	 | ƒ } n  | r | j
 d ƒ n  | j t j | ƒ | |  d ƒ | j d t ƒ t j | j ƒ  ƒ } | d | d }	 | d t j |	 |	 g ƒ 7} | j | ƒ t j | j ƒ  ƒ }
 d } | r˜t j |
 d |
 d ƒ }	 t j t j t j |
 d ƒ | |	 t j |
 d ƒ | |	 g ƒ ƒ }
 n0 |
 d |
 d }	 |
 | t j |	 |	 g ƒ 7}
 | j |
 ƒ | j d	 ƒ | j d
 ƒ | j d ƒ | j ƒ  | S(   sØ  
        Plot of the ordered eigenvalues

        Parameters
        ----------
        ncomp : int, optional
            Number of components ot include in the plot.  If None, will
            included the same as the number of components computed
        log_scale : boot, optional
            Flag indicating whether ot use a log scale for the y-axis
        cumulative : bool, optional
            Flag indicating whether to plot the eigenvalues or cumulative
            eigenvalues
        ax : Matplotlib axes instance, optional
            An axes on which to draw the graph.  If omitted, new a figure
            is created

        Returns
        -------
        fig : figure
            Handle to the figure
        iÿÿÿÿNRª   t   bot   tighti   i    g{®Gáz”?s
   Scree PlotRx   s   Component Number(   t   statsmodels.graphics.utilst   graphicst   utilst   create_mpl_axR   R*   R   R   R@   t   cumsumt
   set_yscalet   plotR/   t	   autoscalet   TrueR%   t   get_xlimt   set_xlimt   get_ylimRª   t   expt   set_ylimt	   set_titlet
   set_ylabelt
   set_xlabelt   tight_layout(   RJ   RK   t	   log_scalet
   cumulativet   axt   gutilst   figR   t   xlimt   spt   ylimt   scale(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyt
   plot_scree±  s:     $'
c         C  s´   d d l  j j } | j | ƒ \ } } | d k r9 d n | } t | |  j ƒ } d |  j |  j } | d } | |  } | j	 | j
 ƒ | j d ƒ | j d ƒ | j d ƒ | S(	   s  
        Box plots of the individual series R-square against the number of PCs

        Parameters
        ----------
        ncomp : int, optional
            Number of components ot include in the plot.  If None, will
            plot the minimum of 10 or the number of computed components
        ax : Matplotlib axes instance, optional
            An axes on which to draw the graph.  If omitted, new a figure
            is created

        Returns
        -------
        fig : figure
            Handle to the figure
        iÿÿÿÿNi
   g      ð?i   s   Individual Input $R^2$s   $R^2$s'   Number of Included Principal Components(   RÆ   RÇ   RÈ   RÉ   R   R)   R*   R:   R;   t   boxplotR‡   RÔ   RÕ   RÖ   (   RJ   RK   RÚ   RÛ   RÜ   t   r2s(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyt   plot_rsquareí  s    

N(   t   __name__t
   __module__t   __doc__R   RÎ   Rš   RW   R4   RG   RF   Rw   Rs   RE   Ro   R„   Rƒ   R…   R_   Rp   RH   Rq   RI   Rá   Rä   (    (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyR
      s.   £	a	0									 	9	$	)0	';R   c   	      C  sd   t  |  d | d | d | d | d | d | d | ƒ} | j | j | j | j | j | j | j f S(   s/
  
    Principal Component Analysis

    Parameters
    ----------
    data : array
        Variables in columns, observations in rows.
    ncomp : int, optional
        Number of components to return.  If None, returns the as many as the
        smaller to the number of rows or columns of data.
    standardize: bool, optional
        Flag indicating to use standardized data with mean 0 and unit
        variance.  standardized being True implies demean.
    demean : bool, optional
        Flag indicating whether to demean data before computing principal
        components.  demean is ignored if standardize is True.
    normalize : bool , optional
        Indicates whether th normalize the factors to have unit inner
        product.  If False, the loadings will have unit inner product.
    weights : array, optional
        Series weights to use after transforming data according to standardize
        or demean when computing the principal components.
    gls : bool, optional
        Flag indicating to implement a two-step GLS estimator where
        in the first step principal components are used to estimate residuals,
        and then the inverse residual variance is used as a set of weights to
        estimate the final principal components
    method : str, optional
        Determines the linear algebra routine uses.  'eig', the default,
        uses an eigenvalue decomposition. 'svd' uses a singular value
        decomposition.

    Returns
    -------
    factors : array or DataFrame
        nobs by ncomp array of of principal components (also known as scores)
    loadings : array or DataFrame
        ncomp by nvar array of  principal component loadings for constructing
        the factors
    projection : array or DataFrame
        nobs by var array containing the projection of the data onto the ncomp
        estimated factors
    rsquare : array or Series
        ncomp array where the element in the ith position is the R-square
        of including the fist i principal components.  The values are
        calculated on the transformed data, not the original data.
    ic : array or DataFrame
        ncomp by 3 array containing the Bai and Ng (2003) Information
        criteria.  Each column is a different criteria, and each row
        represents the number of included factors.
    eigenvals : array or Series
        nvar array of eigenvalues
    eigenvecs : array or DataFrame
        nvar by nvar array of eigenvectors

    Notes
    -----
    This is a simple function wrapper around the PCA class. See PCA for
    more information and additional methods.
    RK   RL   RM   RN   RO   R(   RP   (   R
   R=   R>   RB   RC   RD   R@   RA   (	   R   RK   RL   RM   RN   RO   R(   RP   t   pc(    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyt   pca  s    >(   Rç   t
   __future__R    R   t   numpyR   t   pandasR   t   statsmodels.compat.pythonR   t   statsmodels.tools.sm_exceptionsR   R   R	   t   objectR
   R   RÎ   Rš   Ré   (    (    (    s;   lib/python2.7/site-packages/statsmodels/multivariate/pca.pyt   <module>   s   	ÿ ÿ ÿ  