ó
áp7]c           @   sè   d  d l  m Z d  d l m Z m Z m Z d  d l m Z d  d l m	 Z	 d  d l
 Z e d „ Z e d „ Z d d	 „ Z d
 „  Z d „  Z e e e d „ Z d d „ Z i  d „ Z d e f d „  ƒ  YZ d e f d „  ƒ  YZ d S(   iÿÿÿÿ(   t   RegularizedResults(   t   _calc_nodewise_rowt   _calc_nodewise_weightt   _calc_approx_inv_cov(   t   LikelihoodModelResults(   t   OLSNc         C   s/   | d k r t d d ƒ ‚ n  |  j |   j S(   sÁ  estimates the regularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized

    Returns
    -------
    An array of the paramters for the regularized fit
    s!   _est_regularized_naive currently s#   requires that fit_kwds not be None.N(   t   Nonet
   ValueErrort   fit_regularizedt   params(   t   modt   pnumt
   partitionst   fit_kwds(    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt   _est_regularized_naiveK   s    c         C   s/   | d k r t d d ƒ ‚ n  |  j |   j S(   s¬  estimates the unregularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit

    Returns
    -------
    An array of the parameters for the fit
    s#   _est_unregularized_naive currently s#   requires that fit_kwds not be None.N(   R   R   t   fitR	   (   R
   R   R   R   (    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt   _est_unregularized_naivee   s    i    c         C   sm   t  |  d ƒ } t  |  ƒ } t j | ƒ } x |  D] } | | 7} q2 W| | :} d | t j | ƒ | k  <| S(   s   joins the results from each run of _est_<type>_naive
    and returns the mean estimate of the coefficients

    Parameters
    ----------
    params_l : list
        A list of arrays of coefficients.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    i    (   t   lent   npt   zerost   abs(   t   params_lt	   thresholdt   pR   t	   params_mnR	   (    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt   _join_naive   s    
c         C   s2   |  j  t j | ƒ |  } | | d | 7} | S(   s  calculates the log-likelihood gradient for the debiasing

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array-like
        The estimated coefficients for the current partition.
    alpha : scalar or array-like
        The penalty weight.  If a scalar, the same penalty weight
        applies to all variables in the model.  If a vector, it
        must have the same length as `params`, and contains a
        penalty weight for each coefficient.
    L1_wt : scalar
        The fraction of the penalty given to the L1 penalty term.
        Must be between 0 and 1 (inclusive).  If 0, the fit is
        a ridge fit, if 1 it is a lasso fit.
    score_kwds : dict-like or None
        Keyword arguments for the score function.

    Returns
    -------
    An array-like object of the same dimension as params

    Notes
    -----
    In general:

    gradient l_k(params)

    where k corresponds to the index of the partition

    For OLS:

    X^T(y - X^T params)
    i   (   t   scoreR   t   asarray(   R
   R	   t   alphat   L1_wtt
   score_kwdst   grad(    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt
   _calc_grad˜   s    &c         C   s?   t  j |  j t  j | ƒ |  ƒ } | d d … d f |  j S(   sú  calculates the weighted design matrix necessary to generate
    the approximate inverse covariance matrix

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array-like
        The estimated coefficients for the current partition.
    hess_kwds : dict-like or None
        Keyword arguments for the hessian function.

    Returns
    -------
    An array-like object, updated design matrix, same dimension
    as mod.exog
    N(   R   t   sqrtt   hessian_factorR   R   t   exog(   R
   R	   t	   hess_kwdst   rhess(    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt   _calc_wdesign_matÃ   s    $c         C   ss  | d k r i  n | } | d k r* i  n | } | d k rO t d d ƒ ‚ n
 | d } d | k rr | d } n d } |  j j \ } }	 t t j d |	 | ƒ ƒ }
 |  j |   j } t	 |  | | | | ƒ | } t
 |  | | ƒ } g  } g  } xm t | |
 t | d |
 |	 ƒ ƒ D]G } t | | | ƒ } | j | ƒ t | | | | ƒ } | j | ƒ qW| | | | f S(   s‚  estimates the regularized fitted parameters, is the default
    estimation_method for class DistributedModel.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    mnum : scalar
        Index of current partition.
    partitions : scalar
        Total number of partitions.
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized
    score_kwds : dict-like or None
        Keyword arguments for the score function.
    hess_kwds : dict-like or None
        Keyword arguments for the Hessian function.

    Returns
    -------
    A tuple of parameters for regularized fit
        An array-like object of the fitted parameters, params
        An array-like object for the gradient
        A list of array like objects for nodewise_row
        A list of array like objects for nodewise_weight
    s$   _est_regularized_debiased currently s#   requires that fit_kwds not be None.R   R   i   g      ð?N(   R   R   R#   t   shapet   intR   t   ceilR   R	   R    R&   t   ranget   minR   t   appendR   (   R
   t   mnumR   R   R   R$   R   R   t   nobsR   t   p_partR	   R   t   wexogt   nodewise_row_lt   nodewise_weight_lt   idxt   nodewise_rowt   nodewise_weight(    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt   _est_regularized_debiasedÚ   s.    
+	c         C   s  t  |  d d ƒ } t  |  ƒ } t j | ƒ } t j | ƒ } g  } g  } xL |  D]D } | | d 7} | | d 7} | j | d ƒ | j | d ƒ qQ Wt j | ƒ } t j | ƒ } | | :} | d | 9} t | | ƒ }	 | |	 j | ƒ }
 d |
 t j |
 ƒ | k  <|
 S(   s†  joins the results from each run of _est_regularized_debiased
    and returns the debiased estimate of the coefficients

    Parameters
    ----------
    results_l : list
        A list of tuples each one containing the params, grad,
        nodewise_row and nodewise_weight values for each partition.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    i    i   i   i   g      ð¿(   R   R   R   t   extendt   arrayR   t   dotR   (   t	   results_lR   R   R   R   t   grad_mnR1   R2   t   rt   approx_inv_covt   debiased_params(    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt   _join_debiased  s&    
c   	      C   sY   |  j  j ƒ  } | j | ƒ |  j | | |  } |  j | | |  j d | |  j } | S(   sß  handles the model fitting for each machine. NOTE: this
    is primarily handled outside of DistributedModel because
    joblib can't handle class methods.

    Parameters
    ----------
    self : DistributedModel class instance
        An instance of DistributedModel.
    pnum : scalar
        index of current partition.
    endog : array-like
        endogenous data for current partition.
    exog : array-like
        exogenous data for current partition.
    fit_kwds : dict-like
        Keywords needed for the model fitting.
    init_kwds_e : dict-like
        Additional init_kwds to add for each partition.

    Returns
    -------
    estimation_method result.  For the default,
    _est_regularized_debiased, a tuple.
    R   (   t	   init_kwdst   copyt   updatet   model_classt   estimation_methodR   t   estimation_kwds(	   t   selfR   t   endogR#   R   t   init_kwds_et   temp_init_kwdst   modelt   results(    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt   _helper_fit_partitionH  s    t   DistributedModelc        	   B   s\   e  Z d  Z d d d d d d d d d „ Z d d d d d „ Z d d „ Z d d „ Z RS(   sÇ  
    Distributed model class

    Parameters
    ----------
    partitions : scalar
        The number of partitions that the data will be split into.
    model_class : statsmodels model class
        The model class which will be used for estimation. If None
        this defaults to OLS.
    init_kwds : dict-like or None
        Keywords needed for initializing the model, in addition to
        endog and exog.
    init_kwds_generator : generator or None
        Additional keyword generator that produces model init_kwds
        that may vary based on data partition.  The current usecase
        is for WLS and GLS
    estimation_method : function or None
        The method that performs the estimation for each partition.
        If None this defaults to _est_regularized_debiased.
    estimation_kwds : dict-like or None
        Keywords to be passed to estimation_method.
    join_method : function or None
        The method used to recombine the results from each partition.
        If None this defaults to _join_debiased.
    join_kwds : dict-like or None
        Keywords to be passed to join_method.
    results_class : results class or None
        The class of results that should be returned.  If None this
        defaults to RegularizedResults.
    results_kwds : dict-like or None
        Keywords to be passed to results class.

    Attributes
    ----------
    partitions : scalar
        See Parameters.
    model_class : statsmodels model class
        See Parameters.
    init_kwds : dict-like
        See Parameters.
    init_kwds_generator : generator or None
        See Parameters.
    estimation_method : function
        See Parameters.
    estimation_kwds : dict-like
        See Parameters.
    join_method : function
        See Parameters.
    join_kwds : dict-like
        See Parameters.
    results_class : results class
        See Parameters.
    results_kwds : dict-like
        See Parameters.

    Examples
    --------

    Notes
    -----
    c
   
      C   s  | |  _  | d  k r! t |  _ n	 | |  _ | d  k rB i  |  _ n	 | |  _ | d  k rc t |  _ n	 | |  _ | d  k r„ i  |  _ n	 | |  _ | d  k r¥ t |  _	 n	 | |  _	 | d  k rÆ i  |  _
 n	 | |  _
 | d  k rç t |  _ n	 | |  _ |	 d  k ri  |  _ n	 |	 |  _ d  S(   N(   R   R   R   RC   R@   R6   RD   RE   R?   t   join_methodt	   join_kwdsR    t   results_classt   results_kwds(
   RF   R   RC   R@   RD   RE   RN   RO   RP   RQ   (    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt   __init__­  s2    								t
   sequentialc   	      C   s¹   | d k r i  } n  | d k r9 |  j | | | ƒ } n7 | d k r` |  j | | | | ƒ } n t d | ƒ ‚ |  j | |  j  } |  j d g d g |  j  } |  j | | |  j	  S(   se  Performs the distributed estimation using the corresponding
        DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like or None
            Keywords needed for the model fitting.
        parallel_method : str
            type of distributed estimation to be used, currently
            "sequential", "joblib" and "dask" are supported.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        RS   t   joblibs.   parallel_method: %s is currently not supportedi    N(
   R   t   fit_sequentialt
   fit_joblibR   RN   RO   RC   R@   RP   RQ   (	   RF   t   data_generatorR   t   parallel_methodt   parallel_backendt   init_kwds_generatorR:   R	   t   res_mod(    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyR   Ü  s    	
c         C   sÁ   g  } | d k r] x¨ t | ƒ D]7 \ } \ } } t |  | | | | ƒ } | j | ƒ q Wn` t t | | ƒ ƒ }	 xH |	 D]@ \ } \ \ } } }
 t |  | | | | |
 ƒ } | j | ƒ qy W| S(   s*  Sequentially performs the distributed estimation using
        the corresponding DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        N(   R   t	   enumerateRL   R,   t   zip(   RF   RW   R   RZ   R:   R   RG   R#   RK   t   tup_genRH   (    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyRU     s    		c   
         ss  d d l  m } | t ˆ j ƒ \ } ‰  } | d k rn | d k rn | ‡  ‡ ‡ f d †  t | ƒ Dƒ ƒ } n| d k	 r¾ | d k r¾ | - | ‡  ‡ ‡ f d †  t | ƒ Dƒ ƒ } Wd QXn± | d k r| d k	 rt t | | ƒ ƒ }	 | ‡  ‡ ‡ f d †  |	 Dƒ ƒ } n_ | d k	 ro| d k	 rot t | | ƒ ƒ }	 | ' | ‡  ‡ ‡ f d †  |	 Dƒ ƒ } Wd QXn  | S(   s©  Performs the distributed estimation in parallel using joblib

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        iÿÿÿÿ(   t   parallel_funcc         3   s3   |  ]) \ } \ } } ˆ  ˆ | | | ˆ ƒ Vq d  S(   N(    (   t   .0R   RG   R#   (   t   fR   RF   (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pys	   <genexpr>c  s   c         3   s3   |  ]) \ } \ } } ˆ  ˆ | | | ˆ ƒ Vq d  S(   N(    (   R`   R   RG   R#   (   Ra   R   RF   (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pys	   <genexpr>i  s   Nc         3   s<   |  ]2 \ } \ \ } } } ˆ  ˆ | | | ˆ | ƒ Vq d  S(   N(    (   R`   R   RG   R#   R@   (   Ra   R   RF   (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pys	   <genexpr>o  s   c         3   s<   |  ]2 \ } \ \ } } } ˆ  ˆ | | | ˆ | ƒ Vq d  S(   N(    (   R`   R   RG   R#   R@   (   Ra   R   RF   (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pys	   <genexpr>v  s   (   t   statsmodels.tools.parallelR_   RL   R   R   R\   R]   (
   RF   RW   R   RY   RZ   R_   t   part   n_jobsR:   R^   (    (   Ra   R   RF   sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyRV   D  s&    N(   t   __name__t
   __module__t   __doc__R   RR   R   RU   RV   (    (    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyRM   m  s   >	,9/t   DistributedResultsc           B   s    e  Z d  Z d „  Z d „  Z RS(   sS  
    Class to contain model results

    Parameters
    ----------
    model : class instance
        class instance for model used for distributed data,
        this particular instance uses fake data and is really
        only to allow use of methods like predict.
    params : array
        parameter estimates from the fit model.

    c         C   s   t  t |  ƒ j | | ƒ d  S(   N(   t   superRh   RR   (   RF   RJ   R	   (    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyRR     s    c         O   s   |  j  j |  j | | | Ž S(   sh  Calls self.model.predict for the provided exog.  See
        Results.predict.

        Parameters
        ----------
        exog : array-like NOT optional
            The values for which we want to predict, unlike standard
            predict this is NOT optional since the data in self.model
            is fake.
        args, kwargs :
            Some models can take additional arguments or keywords, see the
            predict method of the model for the details.

        Returns
        -------
            prediction : ndarray, pandas.Series or pandas.DataFrame
            See self.model.predict

        (   RJ   t   predictR	   (   RF   R#   t   argst   kwargs(    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyRj     s    (   Re   Rf   Rg   RR   Rj   (    (    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyRh   }  s   	(   t   statsmodels.base.elastic_netR    t(   statsmodels.stats.regularized_covarianceR   R   R   t   statsmodels.base.modelR   t#   statsmodels.regression.linear_modelR   t   numpyR   R   R   R   R   R    R&   R6   R?   RL   t   objectRM   Rh   (    (    (    sF   lib/python2.7/site-packages/statsmodels/base/distributed_estimation.pyt   <module>   s    E	+	@.$ÿ 