
    !gV                         d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 d dl
Z	 ddZddZddZd	 Zd
 Z	 	 ddZddZi fdZ G d d      Z G d de      Zy)    )RegularizedResults)_calc_nodewise_row_calc_nodewise_weight_calc_approx_inv_cov)LikelihoodModelResults)OLSNc                 T    |t        d       | j                  di |j                  S )a  estimates the regularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized

    Returns
    -------
    An array of the parameters for the regularized fit
    zD_est_regularized_naive currently requires that fit_kwds not be None. )
ValueErrorfit_regularizedparamsmodpnum
partitionsfit_kwdss       d/var/www/dash_apps/app1/venv/lib/python3.12/site-packages/statsmodels/base/distributed_estimation.py_est_regularized_naiver   K   s<    &  ? @ 	@ 3**111    c                 T    |t        d       | j                  di |j                  S )a  estimates the unregularized fitted parameters.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    pnum : scalar
        Index of current partition
    partitions : scalar
        Total number of partitions
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit

    Returns
    -------
    An array of the parameters for the fit
    zF_est_unregularized_naive currently requires that fit_kwds not be None.r
   )r   fitr   r   s       r   _est_unregularized_naiver   e   s:    &  ? @ 	@ 377X%%%r   c                     t        | d         }t        |       }t        j                  |      }| D ]  }||z  }	 ||z  }d|t        j                  |      |k  <   |S )a   joins the results from each run of _est_<type>_naive
    and returns the mean estimate of the coefficients

    Parameters
    ----------
    params_l : list
        A list of arrays of coefficients.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    r   )lennpzerosabs)params_l	thresholdpr   	params_mnr   s         r   _join_naiver"      si     	HQKAXJI V	I/0IbffY)+,r   c                 j     | j                   t        j                  |      fi | }||d|z
  z  z  }|S )a  calculates the log-likelihood gradient for the debiasing

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array_like
        The estimated coefficients for the current partition.
    alpha : scalar or array_like
        The penalty weight.  If a scalar, the same penalty weight
        applies to all variables in the model.  If a vector, it
        must have the same length as `params`, and contains a
        penalty weight for each coefficient.
    L1_wt : scalar
        The fraction of the penalty given to the L1 penalty term.
        Must be between 0 and 1 (inclusive).  If 0, the fit is
        a ridge fit, if 1 it is a lasso fit.
    score_kwds : dict-like or None
        Keyword arguments for the score function.

    Returns
    -------
    An array-like object of the same dimension as params

    Notes
    -----
    In general:

    gradient l_k(params)

    where k corresponds to the index of the partition

    For OLS:

    X^T(y - X^T params)
       )scorer   asarray)r   r   alphaL1_wt
score_kwdsgrads         r   
_calc_gradr+      s>    L CIIbjj(7J77DEQYDKr   c                     t        j                   | j                  t        j                  |      fi |      }|dddf   | j                  z  S )a  calculates the weighted design matrix necessary to generate
    the approximate inverse covariance matrix

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    params : array_like
        The estimated coefficients for the current partition.
    hess_kwds : dict-like or None
        Keyword arguments for the hessian function.

    Returns
    -------
    An array-like object, updated design matrix, same dimension
    as mod.exog
    N)r   sqrthessian_factorr&   exog)r   r   	hess_kwdsrhesss       r   _calc_wdesign_matr2      sE    & GG&C&&rzz&'9GYGHED>CHH$$r   c                    |i n|}|i n|}|t        d      |d   }d|v r|d   }nd}| j                  j                  \  }}	t        t	        j
                  d|	z  |z              }
 | j                  di |j                  }t        | ||||      |z  }t        | ||      }g }g }t        ||
z  t        |dz   |
z  |	            D ]?  }t        |||      }|j                  |       t        ||||      }|j                  |       A ||||fS )a  estimates the regularized fitted parameters, is the default
    estimation_method for class DistributedModel.

    Parameters
    ----------
    mod : statsmodels model class instance
        The model for the current partition.
    mnum : scalar
        Index of current partition.
    partitions : scalar
        Total number of partitions.
    fit_kwds : dict-like or None
        Keyword arguments to be given to fit_regularized
    score_kwds : dict-like or None
        Keyword arguments for the score function.
    hess_kwds : dict-like or None
        Keyword arguments for the Hessian function.

    Returns
    -------
    A tuple of parameters for regularized fit
        An array-like object of the fitted parameters, params
        An array-like object for the gradient
        A list of array like objects for nodewise_row
        A list of array like objects for nodewise_weight
    zG_est_regularized_debiased currently requires that fit_kwds not be None.r'   r(   r$   g      ?r
   )r   r/   shapeintr   ceilr   r   r+   r2   rangeminr   appendr   )r   mnumr   r   r)   r0   r'   r(   nobsr    p_partr   r*   wexognodewise_row_lnodewise_weight_lidxnodewise_rownodewise_weights                      r   _est_regularized_debiasedrC      sJ   : ")zJ'YI ? @ 	@ !(!hhnnGD!"q&J./0F S  ,8,33Fc65%<tCDc695ENTF]CV(;Q$?@ 2)%e<l+/|S057  12 4):::r   c                    t        | d   d         }t        |       }t        j                  |      }t        j                  |      }g }g }| D ]:  }||d   z  }||d   z  }|j                  |d          |j                  |d          < t        j                  |      }t        j                  |      }||z  }|d|z  z  }t        ||      }	||	j                  |      z   }
d|
t        j                  |
      |k  <   |
S )a  joins the results from each run of _est_regularized_debiased
    and returns the debiased estimate of the coefficients

    Parameters
    ----------
    results_l : list
        A list of tuples each one containing the params, grad,
        nodewise_row and nodewise_weight values for each partition.
    threshold : scalar
        The threshold at which the coefficients will be cut.
    r   r$         g      )r   r   r   extendarrayr   dotr   )	results_lr   r    r   r!   grad_mnr>   r?   rapprox_inv_covdebiased_paramss              r   _join_debiasedrO     s    	IaLOAYJIhhqkGN 'QqT	1Q4ad#  1&' XXn-N!23IsZG).:KLN."4"4W"==O;<OBFF?+i78r   c                     | j                   j                         }|j                  |        | j                  ||fi |} | j                  ||| j
                  fd|i| j                  }|S )a  handles the model fitting for each machine. NOTE: this
    is primarily handled outside of DistributedModel because
    joblib cannot handle class methods.

    Parameters
    ----------
    self : DistributedModel class instance
        An instance of DistributedModel.
    pnum : scalar
        index of current partition.
    endog : array_like
        endogenous data for current partition.
    exog : array_like
        exogenous data for current partition.
    fit_kwds : dict-like
        Keywords needed for the model fitting.
    init_kwds_e : dict-like
        Additional init_kwds to add for each partition.

    Returns
    -------
    estimation_method result.  For the default,
    _est_regularized_debiased, a tuple.
    r   )	init_kwdscopyupdatemodel_classestimation_methodr   estimation_kwds)	selfr   endogr/   r   init_kwds_etemp_init_kwdsmodelresultss	            r   _helper_fit_partitionr]   H  sv    6 ^^((*N+&DUD;N;E$d$$UD$// =.6='+';';=G Nr   c                   @    e Zd ZdZ	 	 	 	 ddZ	 	 ddZ	 d	dZ	 d	dZy)
DistributedModela  
    Distributed model class

    Parameters
    ----------
    partitions : scalar
        The number of partitions that the data will be split into.
    model_class : statsmodels model class
        The model class which will be used for estimation. If None
        this defaults to OLS.
    init_kwds : dict-like or None
        Keywords needed for initializing the model, in addition to
        endog and exog.
    init_kwds_generator : generator or None
        Additional keyword generator that produces model init_kwds
        that may vary based on data partition.  The current usecase
        is for WLS and GLS
    estimation_method : function or None
        The method that performs the estimation for each partition.
        If None this defaults to _est_regularized_debiased.
    estimation_kwds : dict-like or None
        Keywords to be passed to estimation_method.
    join_method : function or None
        The method used to recombine the results from each partition.
        If None this defaults to _join_debiased.
    join_kwds : dict-like or None
        Keywords to be passed to join_method.
    results_class : results class or None
        The class of results that should be returned.  If None this
        defaults to RegularizedResults.
    results_kwds : dict-like or None
        Keywords to be passed to results class.

    Attributes
    ----------
    partitions : scalar
        See Parameters.
    model_class : statsmodels model class
        See Parameters.
    init_kwds : dict-like
        See Parameters.
    init_kwds_generator : generator or None
        See Parameters.
    estimation_method : function
        See Parameters.
    estimation_kwds : dict-like
        See Parameters.
    join_method : function
        See Parameters.
    join_kwds : dict-like
        See Parameters.
    results_class : results class
        See Parameters.
    results_kwds : dict-like
        See Parameters.

    Notes
    -----

    Examples
    --------
    Nc
                 B   || _         |t        | _        n|| _        |i | _        n|| _        |t        | _        n|| _        |i | _        n|| _        |t        | _        n|| _        |i | _	        n|| _	        |t        | _        n|| _        |	i | _        y |	| _        y N)r   r   rT   rQ   rC   rU   rV   rO   join_method	join_kwdsr   results_classresults_kwds)
rW   r   rT   rQ   rU   rV   rb   rc   rd   re   s
             r   __init__zDistributedModel.__init__  s    
 %"D*DDN&DN$%>D"%6D""#%D #2D -D*DDN&DN !3D!.D "D ,Dr   c                 B   |i }|dk(  r| j                  |||      }n(|dk(  r| j                  ||||      }nt        d|z         | j                  |fi | j                  } | j
                  dgdgfi | j                  } | j                  ||fi | j                  S )ae  Performs the distributed estimation using the corresponding
        DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like or None
            Keywords needed for the model fitting.
        parallel_method : str
            type of distributed estimation to be used, currently
            "sequential", "joblib" and "dask" are supported.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        
sequentialjoblibz.parallel_method: %s is currently not supportedr   )	fit_sequential
fit_joblibr   rb   rc   rT   rQ   rd   re   )	rW   data_generatorr   parallel_methodparallel_backendinit_kwds_generatorrJ   r   res_mods	            r   r   zDistributedModel.fit  s    < Hl*++NH,?AI ((8(;=I
 M./ 0 0 "!!)>t~~> #$""A3>t~~>!t!!'6GT5F5FGGr   c           	      
   g }|8t        |      D ](  \  }\  }}t        | ||||      }|j                  |       * |S t        t        ||            }	|	D ],  \  }\  \  }}}
t        | |||||
      }|j                  |       . |S )a*  Sequentially performs the distributed estimation using
        the corresponding DistributedModel

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        )	enumerater]   r9   zip)rW   rl   r   ro   rJ   r   rX   r/   r\   tup_genrY   s              r   rj   zDistributedModel.fit_sequential  s    0 	&'0'@ *#mud/dE408:  )	*"   N$7!9 :G 7> *22}tk/dE408+G  )	* r   c                    
 ddl m}  |t         j                        \  }
}| | |
 fdt	        |      D              }|S |+|)|5   |
 fdt	        |      D              }ddd       |S |,|*t	        t        ||            }	 |
 fd|	D              }|S |7|5t	        t        ||            }	|5   |
 fd|	D              }ddd       |S S # 1 sw Y   S xY w# 1 sw Y   S xY w)a  Performs the distributed estimation in parallel using joblib

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        r   )parallel_funcNc              3   B   K   | ]  \  }\  }} |||        y wra   r
   .0r   rX   r/   fr   rW   s       r   	<genexpr>z.DistributedModel.fit_joblib.<locals>.<genexpr>c  s.      : 3mud dD%x@ :   c              3   B   K   | ]  \  }\  }} |||        y wra   r
   rx   s       r   r{   z.DistributedModel.fit_joblib.<locals>.<genexpr>i  s.       >$7D-5$ !"$eT8 D  >r|   c           	   3   J   K   | ]  \  }\  \  }}} ||||        y wra   r
   ry   r   rX   r/   rQ   rz   r   rW   s        r   r{   z.DistributedModel.fit_joblib.<locals>.<genexpr>o  s5      ( @&@}ti dD%xK (    #c           	   3   J   K   | ]  \  }\  \  }}} ||||        y wra   r
   r   s        r   r{   z.DistributedModel.fit_joblib.<locals>.<genexpr>v  s5       ,$DD*D=E4) !"$eT8Y O  ,r   )statsmodels.tools.parallelrv   r]   r   rr   rs   )rW   rl   r   rn   ro   rv   parn_jobsrJ   rt   rz   s   ` `       @r   rk   zDistributedModel.fit_joblibD  s:   4 	=&'<dooNQ#(;(C :(8: :I. ' ).A.I! >  >#,^#< > >	>$  %*=*IN4G HIG (&( (I  ).A.MN4G HIG! ,  ,#* , ,	,
 y%>$ ,
 s   C=C*C'*C4)NNNNNNNN)Nrh   NNra   )__name__
__module____qualname____doc__rf   r   rj   rk   r
   r   r   r_   r_   m  sD    =G~ 0437CG26--^ BN7;7Ht ,0-` (,6r   r_   c                   (     e Zd ZdZ fdZd Z xZS )DistributedResultsaT  
    Class to contain model results

    Parameters
    ----------
    model : class instance
        Class instance for model used for distributed data,
        this particular instance uses fake data and is really
        only to allow use of methods like predict.
    params : ndarray
        Parameter estimates from the fit model.
    c                 &    t         |   ||       y ra   )superrf   )rW   r[   r   	__class__s      r   rf   zDistributedResults.__init__  s    'r   c                 X     | j                   j                  | j                  |g|i |S )a  Calls self.model.predict for the provided exog.  See
        Results.predict.

        Parameters
        ----------
        exog : array_like NOT optional
            The values for which we want to predict, unlike standard
            predict this is NOT optional since the data in self.model
            is fake.
        *args :
            Some models can take additional arguments. See the
            predict method of the model for the details.
        **kwargs :
            Some models can take additional keywords arguments. See the
            predict method of the model for the details.

        Returns
        -------
            prediction : ndarray, pandas.Series or pandas.DataFrame
            See self.model.predict
        )r[   predictr   )rW   r/   argskwargss       r   r   zDistributedResults.predict  s+    . "tzz!!$++tEdEfEEr   )r   r   r   r   rf   r   __classcell__)r   s   @r   r   r   }  s    (Fr   r   ra   )r   )NNN)statsmodels.base.elastic_netr   (statsmodels.stats.regularized_covariancer   r   r   statsmodels.base.modelr   #statsmodels.regression.linear_modelr   numpyr   r   r   r"   r+   r2   rC   rO   r]   r_   r   r
   r   r   <module>r      sx    ;0 0 9 3 @F24&42(V%. ?C9=>;B*\ ')"JM M`(F/ (Fr   