ó
áp7]c           @  sM  d  Z  d d l m Z d d l Z d d l j Z d d l m	 Z	 d d l
 m Z d e f d „  ƒ  YZ e d k rId	 g Z d	 e k rId
 Z e j e j j d e d f ƒ e j e d	 f ƒ f Z e j e j e j d ƒ d d ƒ e j d ƒ d d d … f j Z e j d d d d g d d d d g d d d d g g ƒ Z e j d d d d g d d d d g d d d d g g ƒ Z e j e e ƒ Z e d e j j d e j ƒ 7Z e j e d d d g ƒ Z e d e j j d e j ƒ Z  e e  e ƒ Z! e" e! j# ƒ  ƒ e" d ƒ e! j$ d d d e& d d ƒ e" e! j# ƒ  ƒ qIn  d S(   sQ   
Created on Sun Nov 14 08:21:41 2010

Author: josef-pktd
License: BSD (3-clause)
iÿÿÿÿ(   t   print_functionN(   t   pca(   t   LeaveOneOutt   FactorModelUnivariatec           B  sM   e  Z d  Z d „  Z d d e d „ Z d „  Z d e d d „ Z d „  Z	 RS(   s  

    Todo:
    check treatment of const, make it optional ?
        add hasconst (0 or 1), needed when selecting nfact+hasconst
    options are arguments in calc_factors, should be more public instead
    cross-validation is slow for large number of observations
    c         C  s(   t  j | ƒ |  _ t  j | ƒ |  _ d  S(   N(   t   npt   asarrayt   endogt   exog(   t   selfR   R   (    (    sH   lib/python2.7/site-packages/statsmodels/sandbox/datarich/factormodels.pyt   __init__   s    i    c         C  s¦   | d k r |  j } n t j | ƒ } t | d | d d ƒ\ } } } } | |  _ | r~ t j | d t ƒ|  _	 d |  _
 n | |  _	 d |  _
 | |  _ | |  _ d S(   sÜ   get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        t   keepdimt	   normalizei   t   prependi    N(   t   NoneR   R   R   R   t   exog_reducedt   smt   add_constantt   Truet   factorst   hasconstt   evalst   evecs(   R   t   xR
   t   addconstt   xredt   factR   R   (    (    sH   lib/python2.7/site-packages/statsmodels/sandbox/datarich/factormodels.pyt   calc_factors"   s    $				c         C  sR   t  |  d ƒ s |  j ƒ  n  t j |  j |  j d  d  … d  | d … f ƒ j ƒ  S(   Nt   factors_wconsti   (   t   hasattrR   R   t   OLSR   R   t   fit(   R   t   nfact(    (    sH   lib/python2.7/site-packages/statsmodels/sandbox/datarich/factormodels.pyt   fit_fixed_nfact9   s    c      
   C  sD  t  |  d ƒ s |  j ƒ  n  |  j } | d k rH |  j j d | } n  | | d k  rk t d d ƒ ‚ n  t | d ƒ } |  j } g  } x+t	 d | | ƒ D]} |  j d d … d | … f } t
 j | | ƒ j ƒ  }	 | s…| d k rt t | ƒ ƒ } n  d }
 x„ | D]p \ } } t
 j | | | | d d … f ƒ j ƒ  } |
 | | | j j | j | | d d … f ƒ d 7}
 qWn	 t j }
 | j | |	 j |	 j |	 j |
 g ƒ q Wt j | ƒ |  _ } t j t j | d d … d d	 … f d
 ƒ t j | d d … d	 f d
 ƒ t j | d d … d f d
 ƒ f |  _ d S(   sW  estimate the model and selection criteria for up to maxfact factors

        The selection criteria that are calculated are AIC, BIC, and R2_adj. and
        additionally cross-validation prediction error sum of squares if `skip_crossval`
        is false. Cross-validation is not used by default because it can be
        time consuming to calculate.

        By default the cross-validation method is Leave-one-out on the full dataset.
        A different cross-validation sample can be specified as an argument to
        cv_iter.

        Results are attached in `results_find_nfact`



        R   i   s9   nothing to do, number of factors (incl. constant) should s   be at least 1i
   Ng        g       @i   i    iÿÿÿÿ(   R   R   R   R   R   t   shapet
   ValueErrort   minR   t   rangeR   R   R   R   t   lent   modelt   predictt   paramsR   t   nant   appendt   aict   bict   rsquared_adjt   arrayt   results_find_nfactt   r_t   argmint   argmaxt
   best_nfact(   R   t   maxfactt   skip_crossvalt   cv_iterR   t   y0t   resultst   kR   t   rest   prederr2t   inidxt   outidxt   res_l1o(    (    sH   lib/python2.7/site-packages/statsmodels/sandbox/datarich/factormodels.pyt   fit_find_nfact>   s6    			,
5	)Jc         C  s  t  |  d ƒ s |  j ƒ  n  |  j } d } | d d 7} | d d d t |  j ƒ 7} d d	 l m } d
 j d ƒ } d g d g d } t d | ƒ } | | | d d | ƒ} | d d 7} | d d 7} | d | j
 ƒ  7} | d d 7} | d d 7} | d d 7} | d d 7} | S(   sÇ   provides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : string
            summary of the results for selecting the number of factors

        R/   t    s   
s+   Best result for k, by AIC, BIC, R2_adj, L1Ot    i   s   %5d %4d %6d %5diÿÿÿÿ(   t   SimpleTables   k, AIC, BIC, R2_adj, L1Os   , s   %6ds   %10.3fi   t	   data_fmtst   txt_fmts!   PCA regression on simulated data,s*   DGP: 2 factors and 4 explanatory variabless(   Notes: k is number of components of PCA,s%          constant is added additionallys,          k=0 means regression on constant onlys>          L1O: sum of squared prediction errors for leave-one-outs                      N(   R   R?   R/   t   tupleR3   t   statsmodels.iolib.tableRB   t   splitt   dictR   t   __str__(   R   R8   t   sumstrRB   t   headerst	   numformatt   txt_fmt1t   tabl(    (    sH   lib/python2.7/site-packages/statsmodels/sandbox/datarich/factormodels.pyt   summary_find_nfact€   s&    		N(
   t   __name__t
   __module__t   __doc__R	   R   R   R   R    R?   RO   (    (    (    sH   lib/python2.7/site-packages/statsmodels/sandbox/datarich/factormodels.pyR      s   		Bt   __main__i   iô  t   sizei   i    i   g      ð?g        g      @g       @gš™™™™™¹?g      ø?s   with cross validation - slowerR4   R5   R6   ('   RR   t
   __future__R    t   numpyR   t   statsmodels.apit   apiR   t   statsmodels.sandbox.toolsR   t#   statsmodels.sandbox.tools.cross_valR   t   objectR   RP   t   examplest   nobst   c_t   randomt   normalt   onest   f0t   repeatt   eyet   aranget   Tt   f2xcoefR.   t   dott   x0R!   t   ytrueR7   t   modt   printRO   R?   R   t   False(    (    (    sH   lib/python2.7/site-packages/statsmodels/sandbox/datarich/factormodels.pyt   <module>   s6   “	7D  
