
    !g~                         d Z ddlZddlmZ ddlmZ ddlmZ ddl	Z	ddl
mZ ddlmZmZ ddZddZdd	Z	 	 dd
Z	 	 ddZ	 	 	 ddZd Z G d d      Z	 	 ddZd ZddZ G d d      Z G d de      Zd dZy)!zM

Created on Fri Aug 17 13:10:52 2012

Author: Josef Perktold
License: BSD-3
    N)svds)	fminbound)Bunch)IterationLimitWarningiteration_limit_docc                     t         j                  j                  |       \  }}t        j                  ||k        }t        j                  |t        j
                  ||      z  |j                        }||fS N)nplinalgeighanydotmaximumT)xvalueevalsevecsclippedx_news         `/var/www/dash_apps/app1/venv/lib/python3.12/site-packages/statsmodels/stats/correlation_tools.py
clip_evalsr      sX    99>>!$LE5ffUU]#GFF52::eU33UWW=E'>    c                    | j                   d   }|| j                   d   k7  rt        d      t        j                  | j                         }| j	                         }t        j
                  |      }t        t        t        |       |z              D ]:  }||z
  }t        ||      \  }	}
|
s|	} |S |	|z
  }|	j	                         }d|||f<   < t        j                  t        t               |S )a  
    Find the nearest correlation matrix that is positive semi-definite.

    The function iteratively adjust the correlation matrix by clipping the
    eigenvalues of a difference matrix. The diagonal elements are set to one.

    Parameters
    ----------
    corr : ndarray, (k, k)
        initial correlation matrix
    threshold : float
        clipping threshold for smallest eigenvalue, see Notes
    n_fact : int or float
        factor to determine the maximum number of iterations. The maximum
        number of iterations is the integer part of the number of columns in
        the correlation matrix times n_fact.

    Returns
    -------
    corr_new : ndarray, (optional)
        corrected correlation matrix

    Notes
    -----
    The smallest eigenvalue of the corrected correlation matrix is
    approximately equal to the ``threshold``.
    If the threshold=0, then the smallest eigenvalue of the correlation matrix
    might be negative, but zero within a numerical error, for example in the
    range of -1e-16.

    Assumes input correlation matrix is symmetric.

    Stops after the first step if correlation matrix is already positive
    semi-definite or positive definite, so that smallest eigenvalue is above
    threshold. In this case, the returned array is not the original, but
    is equal to it within numerical precision.

    See Also
    --------
    corr_clipped
    cov_nearest

    r      zmatrix is not squarer   )shape
ValueErrorr
   zeroscopyarangerangeintlenr   warningswarnr   r   )corr	thresholdn_factk_varsdiffr   diag_idxiix_adjx_psdr   s              r   corr_nearestr0      s    X ZZ]FA/0088DJJDIIKEyy HCD	F*+, 
B#E;wE L u}

$%h !
B 	)+@ALr   c                     t        | |      \  }}|s| S t        j                  t        j                  |            }||z  |dddf   z  }|S )a  
    Find a near correlation matrix that is positive semi-definite

    This function clips the eigenvalues, replacing eigenvalues smaller than
    the threshold by the threshold. The new matrix is normalized, so that the
    diagonal elements are one.
    Compared to corr_nearest, the distance between the original correlation
    matrix and the positive definite correlation matrix is larger, however,
    it is much faster since it only computes eigenvalues once.

    Parameters
    ----------
    corr : ndarray, (k, k)
        initial correlation matrix
    threshold : float
        clipping threshold for smallest eigenvalue, see Notes

    Returns
    -------
    corr_new : ndarray, (optional)
        corrected correlation matrix


    Notes
    -----
    The smallest eigenvalue of the corrected correlation matrix is
    approximately equal to the ``threshold``. In examples, the
    smallest eigenvalue can be by a factor of 10 smaller than the threshold,
    e.g. threshold 1e-8 can result in smallest eigenvalue in the range
    between 1e-9 and 1e-8.
    If the threshold=0, then the smallest eigenvalue of the correlation matrix
    might be negative, but zero within a numerical error, for example in the
    range of -1e-16.

    Assumes input correlation matrix is symmetric. The diagonal elements of
    returned correlation matrix is set to ones.

    If the correlation matrix is already positive semi-definite given the
    threshold, then the original correlation matrix is returned.

    ``cov_clipped`` is 40 or more times faster than ``cov_nearest`` in simple
    example, but has a slightly larger approximation error.

    See Also
    --------
    corr_nearest
    cov_nearest

    r   N)r   r
   sqrtdiag)r'   r(   r   r   x_stds        r   corr_clippedr5   ^   sP    d  I6NE7 GGBGGEN#EEME!T'N*ELr   c                     ddl m}m}  || d      \  }}|dk(  rt        ||      }	nt	        |||      }	 ||	|      }|r||	|fS |S )aw  
    Find the nearest covariance matrix that is positive (semi-) definite

    This leaves the diagonal, i.e. the variance, unchanged

    Parameters
    ----------
    cov : ndarray, (k,k)
        initial covariance matrix
    method : str
        if "clipped", then the faster but less accurate ``corr_clipped`` is
        used.if "nearest", then ``corr_nearest`` is used
    threshold : float
        clipping threshold for smallest eigen value, see Notes
    n_fact : int or float
        factor to determine the maximum number of iterations in
        ``corr_nearest``. See its doc string
    return_all : bool
        if False (default), then only the covariance matrix is returned.
        If True, then correlation matrix and standard deviation are
        additionally returned.

    Returns
    -------
    cov_ : ndarray
        corrected covariance matrix
    corr_ : ndarray, (optional)
        corrected correlation matrix
    std_ : ndarray, (optional)
        standard deviation


    Notes
    -----
    This converts the covariance matrix to a correlation matrix. Then, finds
    the nearest correlation matrix that is positive semidefinite and converts
    it back to a covariance matrix using the initial standard deviation.

    The smallest eigenvalue of the intermediate correlation matrix is
    approximately equal to the ``threshold``.
    If the threshold=0, then the smallest eigenvalue of the correlation matrix
    might be negative, but zero within a numerical error, for example in the
    range of -1e-16.

    Assumes input covariance matrix is symmetric.

    See Also
    --------
    corr_nearest
    corr_clipped
    r   )cov2corrcorr2covT)
return_stdr   )r(   )r(   r)   ) statsmodels.stats.moment_helpersr7   r8   r5   r0   )
covmethodr(   r)   
return_allr7   r8   cov_std_corr_s
             r   cov_nearestrA      s\    l D#$/JD$TY7TYvFE4 DUD  r   c
                 <   d}
 | |      }t        || d       }t        |	      D ]u  } | ||
|z  z         } ||      }||z  j                         }||||
z  |z  z   k  r|
||
|z  z   ||fc S d|
dz  z  |z  ||z
  |
|z  z
  z  }||k  r|||
z  k  r|}
n|
dz  }
|}w y)a   
    Implements the non-monotone line search of Grippo et al. (1986),
    as described in Birgin, Martinez and Raydan (2013).

    Parameters
    ----------
    obj : real-valued function
        The objective function, to be minimized
    grad : vector-valued function
        The gradient of the objective function
    x : array_like
        The starting point for the line search
    d : array_like
        The search direction
    obj_hist : array_like
        Objective function history (must contain at least one value)
    M : positive int
        Number of previous function points to consider (see references
        for details).
    sig1 : real
        Tuning parameter, see references for details.
    sig2 : real
        Tuning parameter, see references for details.
    gam : real
        Tuning parameter, see references for details.
    maxiter : int
        The maximum number of iterations; returns Nones if convergence
        does not occur by this point

    Returns
    -------
    alpha : real
        The step value
    x : Array_like
        The function argument at the final step
    obval : Real
        The function value at the final step
    g : Array_like
        The gradient at the final step

    Notes
    -----
    The basic idea is to take a big step in the direction of the
    gradient, even if the function value is not decreased (but there
    is a maximum allowed increase in terms of the recent history of
    the iterates).

    References
    ----------
    Grippo L, Lampariello F, Lucidi S (1986). A Nonmonotone Line
    Search Technique for Newton's Method. SIAM Journal on Numerical
    Analysis, 23, 707-716.

    E. Birgin, J.M. Martinez, and M. Raydan. Spectral projected
    gradient methods: Review and perspectives. Journal of Statistical
    Software (preprint).
          ?Ng         g       @)NNNN)maxr"   sum)objgradr   dobj_histMsig1sig2gammaxiteralpha
last_obvalobj_maxiterobvalggtda1s                     r   _nmono_linesearchrX      s    x EQJ(A23- Gg AaK G1ukkmGc%im++!eAg+ua//%(]3%*"4uSy"@ABJR4:-ERKE
!$ "r   c                    t        d|z  |	      }|j                         } ||      } | |      g}t        t        |            D ]  }||z
  } ||       ||z  }t	        j
                  t	        j                  |            |k  rt        d
i d||ddc S |||z  z
  } ||       ||z  }t        | ||||||
|||
      \  }}}}|t        d
i d||ddc S |j                  |       ||z
  }||z
  }||z  j                         }|dk  r|	}n,||z  j                         }t        |t        ||z  |	            }|}|} t        d
i d||d	dS )a0  
    Implements the spectral projected gradient method for minimizing a
    differentiable function on a convex domain.

    Parameters
    ----------
    func : real valued function
        The objective function to be minimized.
    grad : real array-valued function
        The gradient of the objective function
    start : array_like
        The starting point
    project : function
        In-place projection of the argument to the domain
        of func.
    ... See notes regarding additional arguments

    Returns
    -------
    rslt : Bunch
        rslt.params is the final iterate, other fields describe
        convergence status.

    Notes
    -----
    This can be an effective heuristic algorithm for problems where no
    guaranteed algorithm for computing a global minimizer is known.

    There are a number of tuning parameters, but these generally
    should not be changed except for `maxiter` (positive integer) and
    `ctol` (small positive real).  See the Birgin et al reference for
    more information about the tuning parameters.

    Reference
    ---------
    E. Birgin, J.M. Martinez, and M. Raydan. Spectral projected
    gradient methods: Review and perspectives. Journal of Statistical
    Software (preprint).  Available at:
    http://www.ime.usp.br/~egbirgin/publications/bmr5.pdf
    
   TzConverged successfully)	Convergedparamsobjective_valuesMessage)rK   rL   rM   rN   rO   FzFailed in nmono_linesearchr   zspg_optim did not converge )minr    r"   r#   r
   rE   absr   rX   appendrF   )funcrH   startprojectrO   rK   ctolmaxiter_nmlslam_minlam_maxrL   rM   rN   lamr\   gvalrJ   itrdfrI   rP   params1fvalgval1sysysss                               r   
_spg_optimru   4  s   X bj'
"CZZ\F<DVHS\" . d]
f66"&&*$ B08'?A B B
 SX
	V '8 
'"#we = F&08'CE F F 	fDLcYY[7CA#Bgs2b5'23C].`  >&(0;= > >r   c                     t        j                  | | z  j                  d            }t        j                  |dkD        }t	        |      dkD  r| |ddfxx   ||   dddf   z  cc<   yy)z
    Project a matrix into the domain of matrices whose row-wise sums
    of squares are less than or equal to 1.

    The input matrix is modified in-place.
    r   r   N)r
   r2   rF   flatnonzeror$   )Xnmr-   s      r   _project_correlation_factorsrz     s^     
!A#1	B	Q	B
2w{	"a%BrF1d7O# r   c                   .    e Zd ZdZd Zd Zd Zd Zd Zy)FactoredPSDMatrixa  
    Representation of a positive semidefinite matrix in factored form.

    The representation is constructed based on a vector `diag` and
    rectangular matrix `root`, such that the PSD matrix represented by
    the class instance is Diag + root * root', where Diag is the
    square diagonal matrix with `diag` on its main diagonal.

    Parameters
    ----------
    diag : 1d array_like
        See above
    root : 2d array_like
        See above

    Notes
    -----
    The matrix is represented internally in the form Diag^{1/2}(I +
    factor * scales * factor')Diag^{1/2}, where `Diag` and `scales`
    are diagonal matrices, and `factor` is an orthogonal matrix.
    c                     || _         || _        |t        j                  |      d d d f   z  }t        j                  j                  |d      \  }}}|| _        |dz  | _        y )Nr   rD   )r3   rootr
   r2   r   svdfactorscales)selfr3   r~   urq   vts         r   __init__zFactoredPSDMatrix.__init__  sW    		bggdmAtG,,99==q)1bdr   c                     t        j                  | j                        t        j                  | j                  | j                  j                        z   S )zh
        Returns the PSD matrix represented by this instance as a full
        (square) matrix.
        )r
   r3   r   r~   r   r   s    r   	to_matrixzFactoredPSDMatrix.to_matrix  s3    
 wwtyy!BFF499diikk$BBBr   c                 ^   ddt        j                  d| j                  z         z  z   }|t        j                  | j                        dddf   z  }t        j                  | j
                  j                  |      }||dddf   z  }t        j                  | j
                  |      }||z  }|S )aE  
        Decorrelate the columns of `rhs`.

        Parameters
        ----------
        rhs : array_like
            A 2 dimensional array with the same number of rows as the
            PSD matrix represented by the class instance.

        Returns
        -------
        C^{-1/2} * rhs, where C is the covariance matrix represented
        by this class instance.

        Notes
        -----
        The returned matrix has the identity matrix as its row-wise
        population covariance matrix.

        This function exploits the factor structure for efficiency.
        r   N)r
   r2   r   r3   r   r   r   )r   rhsqvalrhs1s       r   decorrelatezFactoredPSDMatrix.decorrelate  s    4 ADKK000 BGGDII&q$w//vvdkkmmS)QWvvdkk4(t
r   c                 X   | j                    d| j                   z   z  }t        j                  | j                        }||dddf   z  }|dddf   t        j                  | j
                  j                  |      z  }|t        j                  | j
                  |      z   }||dddf   z  S )a  
        Solve a linear system of equations with factor-structured
        coefficients.

        Parameters
        ----------
        rhs : array_like
            A 2 dimensional array with the same number of rows as the
            PSD matrix represented by the class instance.

        Returns
        -------
        C^{-1} * rhs, where C is the covariance matrix represented
        by this class instance.

        Notes
        -----
        This function exploits the factor structure for efficiency.
        r   N)r   r
   r2   r3   r   r   r   )r   r   r   drmats        r   solvezFactoredPSDMatrix.solve  s    * |q4;;/WWTYYBq$wK1d7mbffT[[]]C88BFF4;;,,R4[  r   c                 J   t        j                  t        j                  | j                              }|t        j                  t        j                  | j                              z  }|t        j                  t        j                  dd| j                  z  z               z  }|S )za
        Returns the logarithm of the determinant of a
        factor-structured matrix.
        r   )r
   rF   logr3   r   )r   logdets     r   r   zFactoredPSDMatrix.logdet  sm     tyy)*"&&,--"&&AO 3455r   N)	__name__
__module____qualname____doc__r   r   r   r   r   r_   r   r   r|   r|     s"    ,C#J!8
r   r|   c           
      D   | j                   \  }}t        | |      \  }}	}
|t        j                  |	      z  }t        j                  |dz  j	                  d            }t        j
                  |dkD        }||ddfxx   ||   dddf   z  cc<   | j                         t              t        j                  u rt        j                  d       nrt        j                        rRj                  t        j                  j                   d                j                          j                          nt!        d      fd}fd}t#        |||t$        ||||	      }|j&                  }d|dz  j	                  d      z
  }t)        ||      }||_        |`|S )
a  
    Find the nearest correlation matrix with factor structure to a
    given square matrix.

    Parameters
    ----------
    corr : square array
        The target matrix (to which the nearest correlation matrix is
        sought).  Must be square, but need not be positive
        semidefinite.
    rank : int
        The rank of the factor structure of the solution, i.e., the
        number of linearly independent columns of X.
    ctol : positive real
        Convergence criterion.
    lam_min : float
        Tuning parameter for spectral projected gradient optimization
        (smallest allowed step in the search direction).
    lam_max : float
        Tuning parameter for spectral projected gradient optimization
        (largest allowed step in the search direction).
    maxiter : int
        Maximum number of iterations in spectral projected gradient
        optimization.

    Returns
    -------
    rslt : Bunch
        rslt.corr is a FactoredPSDMatrix defining the estimated
        correlation structure.  Other fields of `rslt` contain
        returned values from spg_optim.

    Notes
    -----
    A correlation matrix has factor structure if it can be written in
    the form I + XX' - diag(XX'), where X is n x k with linearly
    independent columns, and with each row having sum of squares at
    most equal to 1.  The approximation is made in terms of the
    Frobenius norm.

    This routine is useful when one has an approximate correlation
    matrix that is not positive semidefinite, and there is need to
    estimate the inverse, square root, or inverse square root of the
    population correlation matrix.  The factor structure allows these
    tasks to be done without constructing any n x n matrices.

    This is a non-convex problem with no known guaranteed globally
    convergent algorithm for computing the solution.  Borsdof, Higham
    and Raydan (2010) compared several methods for this problem and
    found the spectral projected gradient (SPG) method (used here) to
    perform best.

    The input matrix `corr` can be a dense numpy array or any scipy
    sparse matrix.  The latter is useful if the input matrix is
    obtained by thresholding a very large sample correlation matrix.
    If `corr` is sparse, the calculations are optimized to save
    memory, so no working matrix with more than 10^6 elements is
    constructed.

    References
    ----------
    .. [*] R Borsdof, N Higham, M Raydan (2010).  Computing a nearest
       correlation matrix with factor structure. SIAM J Matrix Anal Appl,
       31:5, 2603-2622.
       http://eprints.ma.man.ac.uk/1523/01/covered/MIMS_ep2009_87.pdf

    Examples
    --------
    Hard thresholding a correlation matrix may result in a matrix that
    is not positive semidefinite.  We can approximate a hard
    thresholded correlation matrix with a PSD matrix as follows, where
    `corr` is the input correlation matrix.

    >>> import numpy as np
    >>> from statsmodels.stats.correlation_tools import corr_nearest_factor
    >>> np.random.seed(1234)
    >>> b = 1.5 - np.random.rand(10, 1)
    >>> x = np.random.randn(100,1).dot(b.T) + np.random.randn(100,10)
    >>> corr = np.corrcoef(x.T)
    >>> corr = corr * (np.abs(corr) >= 0.3)
    >>> rslt = corr_nearest_factor(corr, 3)
    rD   r   h㈵>Nr   zMatrix type not supportedc                 J   t        j                  | t        j                  | j                  |             }t              t         j                  u r|t        j                  |       z  }n|j                  |       z  }|| | z  j                  d      d d d f   | z  z  }d|z  S )Nr      )r
   r   r   typendarrayrF   )rx   grcorr1s     r   rH   z!corr_nearest_factor.<locals>.grad  s    VVArvvacc1~&;"**$"&&""B%))A,B
qsiil1d7#a''tr   c                    t        	      t        j                  u rPt        j                  | | j                        }t        j
                  |d       |	z  }||z  j                         }|S d}d}t        || j                  d   z        }d}|| j                  d   k  rt        ||z   | j                  d         }t        j                  | ||d d f   | j                        }t        j                  |j                  d         }d||||z   f<   |t        j                  	||d d f   j                               z  }|||z  j                         z  }||z  }|| j                  d   k  r|S )Nr           g    .A)r   r
   r   r   r   fill_diagonalrF   r#   r   r`   r!   asarraytodense)
rx   rK   ro   max_wsbsirir2r   r-   r   s
            r   rc   z!corr_nearest_factor.<locals>.func  sE   ;"**$q!##AQ"JAaC99;DKDFVaggaj()BBqwwqz/"R%,FF1RVQY<-YYqwwqz* "be)RZZbfai 0 8 8 :;;1		#b qwwqz/ Kr   )rf   rh   ri   rO   )r   r   r
   r2   rF   rw   r    r   r   r   sparseissparsesetdiagr   eliminate_zerossort_indicesr   ru   rz   r\   r|   r'   )r'   rankrf   rh   ri   rO   p_r   rq   r   rx   ry   r-   rH   rc   rsltr~   r3   solnr   s                       @r   corr_nearest_factorr     sb   j ::DAq D$HAq"	BGGAJA	!Q$A	B	T		"Bb!eH2q$wH IIKEE{bjj 
"		bhhu{{1~./455. dD!%A%wID;;Da}}QDT4(DDIKr   c                   
 | j                   \  }t        | |      \  }
}t        j                  |       r{t	        j
                  |j                  | j                  |            | j                         j                         | j                  |       j                         j                         nrt	        j
                  |j                  t	        j
                  | |            t	        j                  |       t	        j                  t	        j
                  | |             
fd}t        |dd      }
|z
  }|t	        j                  |      z  }|t	        j                  t        j                        z  }	t        |	|      S )a'  
    Approximate an arbitrary square matrix with a factor-structured
    matrix of the form k*I + XX'.

    Parameters
    ----------
    cov : array_like
        The input array, must be square but need not be positive
        semidefinite
    rank : int
        The rank of the fitted factor structure

    Returns
    -------
    A FactoredPSDMatrix instance containing the fitted matrix

    Notes
    -----
    This routine is useful if one has an estimated covariance matrix
    that is not SPD, and the ultimate goal is to estimate the inverse,
    square root, or inverse square root of the true covariance
    matrix. The factor structure allows these tasks to be performed
    without constructing any n x n matrices.

    The calculations use the fact that if k is known, then X can be
    determined from the eigen-decomposition of cov - k*I, which can
    in turn be easily obtained form the eigen-decomposition of `cov`.
    Thus the problem can be reduced to a 1-dimensional search for k
    that does not require repeated eigen-decompositions.

    If the input matrix is sparse, then cov - k*I is also sparse, so
    the eigen-decomposition can be done efficiently using sparse
    routines.

    The one-dimensional search for the optimal value of k is not
    convex, so a local minimum could be obtained.

    Examples
    --------
    Hard thresholding a covariance matrix may result in a matrix that
    is not positive semidefinite.  We can approximate a hard
    thresholded covariance matrix with a PSD matrix as follows:

    >>> import numpy as np
    >>> np.random.seed(1234)
    >>> b = 1.5 - np.random.rand(10, 1)
    >>> x = np.random.randn(100,1).dot(b.T) + np.random.randn(100,10)
    >>> cov = np.cov(x)
    >>> cov = cov * (np.abs(cov) >= 0.3)
    >>> rslt = cov_nearest_factor_homog(cov, 3)
    c                    | z
  }| dz  z  z   t        j                  |dz        z   d| z  z  z
  }|d| z  t        j                  |      z  dt        j                  t        j                        |z        z  z
  z  }|S )NrD   )r
   rF   r3   )kLambda_tvLambdaQSQmtstsss      r   funz%cov_nearest_factor_homog.<locals>.fun  s}    A:!QT(NRVVHaK001Q3r69	QqS!!AbffRWWS\H-D&E$EEEr   r   g     j@dtype)r   r   r   r   r
   r   r   diagonalrF   tracer   r2   onesfloat64r|   )r;   r   nQr   r   k_opt
Lambda_optfac_optr3   r   r   r   r   r   s             @@@@@r   cov_nearest_factor_homogr     s   j 99DAqT?LAvqsffQSS#''!*%\\^!ggcl##%))+ffQSS"&&a.)XXc]hhrvvc3'(  c1c"E%J"''*%%G2771BJJ//DT7++r   c                    | j                   \  }}|dt        |      z  }| j                         } | | j                  d      dddf   z  } | j	                  dd      }t        j                  |dkD        }| |ddfxx   ||   dddf   z  cc<   t        j                  |dk        }d| |ddf<   t        t        j                  ||z              }g g g }
}	}d}||k  rt        | j                   d   ||z         }t        j                  | ||ddf   | j                        |dz
  z  }t        j                  |      }t        j                  ||k\        \  }}|j                  ||z          |	j                  |       |
j                  |||f          ||z  }||k  rt        j                  |      }t        j                  |	      }t        j                  |
      }
t!        j"                  |
||ff||f      }|S )ap  
    Construct a sparse matrix containing the thresholded row-wise
    correlation matrix from a data array.

    Parameters
    ----------
    data : array_like
        The data from which the row-wise thresholded correlation
        matrix is to be computed.
    minabs : non-negative real
        The threshold value; correlation coefficients smaller in
        magnitude than minabs are set to zero.  If None, defaults
        to 1 / sqrt(n), see Notes for more information.

    Returns
    -------
    cormat : sparse.coo_matrix
        The thresholded correlation matrix, in COO format.

    Notes
    -----
    This is an alternative to C = np.corrcoef(data); C \*= (np.abs(C)
    >= absmin), suitable for very tall data matrices.

    If the data are jointly Gaussian, the marginal sampling
    distributions of the elements of the sample correlation matrix are
    approximately Gaussian with standard deviation 1 / sqrt(n).  The
    default value of ``minabs`` is thus equal to 1 standard error, which
    will set to zero approximately 68% of the estimated correlation
    coefficients for which the population value is zero.

    No intermediate matrix with more than ``max_elt`` values will be
    constructed.  However memory use could still be high if a large
    number of correlation values exceed `minabs` in magnitude.

    The thresholded matrix is returned in COO format, which can easily
    be converted to other sparse formats.

    Examples
    --------
    Here X is a tall data matrix (e.g. with 100,000 rows and 50
    columns).  The row-wise correlation matrix of X is calculated
    and stored in sparse form, with all entries smaller than 0.3
    treated as 0.

    >>> import numpy as np
    >>> np.random.seed(1234)
    >>> b = 1.5 - np.random.rand(10, 1)
    >>> x = np.random.randn(100,1).dot(b.T) + np.random.randn(100,10)
    >>> cmat = corr_thresholded(x, 0.3)
    NrC   r   )ddofr   r   )r   floatr    meanstdr
   rw   r#   floorr`   r   r   ra   nonzerorb   concatenater   
coo_matrix)dataminabsmax_eltnrowncolsdr-   r   ipos_alljpos_all
cor_valuesr   r   cmcmaiposjposcmats                     r   corr_thresholdedr     s   j JD$~eDk! 99;DDIIaLD!!D	!!	B	T		"BQK2b6!T'?"K	d
	#BDQK 
RXXgn%	&B%'R
hH	
B
t)$**Q-b)VVDCOTVV,q9ffRjZZv.
dr	""T4Z.)
b t) >>(#D>>(#D
+Jj4,7$FDKr   c                   *    e Zd ZdZd Zd Zd ZddZy)MultivariateKernelz
    Base class for multivariate kernels.

    An instance of MultivariateKernel implements a `call` method having
    signature `call(x, loc)`, returning the kernel weights comparing `x`
    (a 1d ndarray) to each row of `loc` (a 2d ndarray).
    c                     t         r	   )NotImplementedErrorr   r   locs      r   callzMultivariateKernel.calli  s    !!r   c                 2    || _         | j                          y)z
        Set the bandwidth to the given vector.

        Parameters
        ----------
        bw : array_like
            A vector of non-negative bandwidth values.
        N)bw_setup)r   r   s     r   set_bandwidthz MultivariateKernel.set_bandwidthl  s     r   c                     t        j                  | j                        | _        | j                  | j                  z  | _        y r	   )r
   prodr   bwkbw2r   s    r   r   zMultivariateKernel._setupy  s,     77477#77TWW$r   Nc                 V   |j                  d      }t        j                  |ddgd      \  }}||z
  dz  }t        j                  ||k  ||      }|d|j                  d   dz  z  z  }|||z  }t        j
                  |t        j                  	      | _        | j                          y)
aP  
        Set default bandwiths based on domain values.

        Parameters
        ----------
        loc : array_like
            Values from the domain to which the kernel will
            be applied.
        bwm : scalar, optional
            A non-negative scalar that is used to multiply
            the default bandwidth.
        r      K   )axisg/$??g?Nr   )	r   r
   
percentilewherer   r   r   r   r   )r   r   bwmr   q25q75iqrr   s           r   set_default_bwz!MultivariateKernel.set_default_bw  s     WWQZ==r2hQ7SSyE!XXcBhR(
cCIIaLC'''?#IB **Rrzz2r   r	   )r   r   r   r   r   r   r   r   r_   r   r   r   r   `  s    "%r   r   c                       e Zd ZdZd Zy)GaussianMultivariateKernelzA
    The Gaussian (squared exponential) multivariate kernel.
    c                     t        j                  ||z
  dz   d| j                  z  z        j                  d      | j                  z  S )NrD   r   )r
   expr   rF   r   r   s      r   r   zGaussianMultivariateKernel.call  s<    vvC!|mq488|4599!<txxGGr   N)r   r   r   r   r   r_   r   r   r  r    s    Hr   r  c                    
 t        j                          t        j                        t        j                  |      }j                  dk(  r	dddf    j                  d   j                  d   t	        |      g}t        |      t        |      k7  rd}t        |      i 
t        |      D ]"  \  }}|
vrg 
|<   
|   j                  |       $ 
j                         D ]  }t        j                  
|         
|<    
t               |j                         n:t        j                  |      rj                  |       nj                  |        
fd}	|	S )a  
    Use kernel averaging to estimate a multivariate covariance function.

    The goal is to estimate a covariance function C(x, y) =
    cov(Z(x), Z(y)) where x, y are vectors in R^p (e.g. representing
    locations in time or space), and Z(.) represents a multivariate
    process on R^p.

    The data used for estimation can be observed at arbitrary values of the
    position vector, and there can be multiple independent observations
    from the process.

    Parameters
    ----------
    exog : array_like
        The rows of exog are realizations of the process obtained at
        specified points.
    loc : array_like
        The rows of loc are the locations (e.g. in space or time) at
        which the rows of exog are observed.
    groups : array_like
        The values of groups are labels for distinct independent copies
        of the process.
    kernel : MultivariateKernel instance, optional
        An instance of MultivariateKernel, defaults to
        GaussianMultivariateKernel.
    bw : array_like or scalar
        A bandwidth vector, or bandwidth multiplier.  If a 1d array, it
        contains kernel bandwidths for each component of the process, and
        must have length equal to the number of columns of exog.  If a scalar,
        bw is a bandwidth multiplier used to adjust the default bandwidth; if
        None, a default bandwidth is used.

    Returns
    -------
    A real-valued function C(x, y) that returns an estimate of the covariance
    between values of the process located at x and y.

    References
    ----------
    .. [1] Genton M, W Kleiber (2015).  Cross covariance functions for
        multivariate geostatics.  Statistical Science 30(2).
        https://arxiv.org/pdf/1507.08017.pdf
    r   Nr   z7exog, loc, and groups must have the same number of rows)r   c           
      (   j                  |       }j                  |      }d\  }}j                         D ]  \  }}t        |      }t        j                  ||f      \  }	}
||	j
                     }	||
j
                     }
||	   ||
   z  }|t        j                  d|	d d f   |
d d f   |      z  }||j                         z  } |dk  r=d}t        j                  |       t        j                  t        j                  |      z  S ||z  S )N)r   r   zij,ik,i->jkg|=zgEffective sample size is 0.  The bandwidth may be too small, or you are outside the range of your data.)r   itemsr$   r
   indicesflateinsumrF   r%   r&   nan	ones_like)r   rr   kxkyr   cwrU   r-   r   j1j2wmsgexogixkernelr   s                r   r;   zkernel_covariance.<locals>.cov  s   [[C [[C BXXZ 	EArBAZZA'FBBGGBBGGB2BA "))M4A;RUQGGB!%%'MB	 :GCMM#66BLL,,,Bwr   )r
   r   ndimr   r$   r`   rE   r   	enumeraterb   keyssortr  r   isscalarr   )r  r   groupsr  r   r   r  irU   r;   r  s   `` `      @r   kernel_covariancer    sJ   \ ::dD
**S/CZZF
xx1}!T'l	A		!c&k2A
1vQGo 
B&! 1B;BqE
1Q WWY 11 ~+-	zc"	Rcr*R 8 Jr   )r   )V瞯<d   )r  )r   r  r  F)rZ   皙?r   -C6?r  )	g     @rZ   gMbP?   KH9ꌠ9Y>)Fr   r   r!  )gư>r#  r$  i  )Ng    cA)NN)r   numpyr
   scipy.sparser   scipy.sparse.linalgr   scipy.optimizer   r%   statsmodels.tools.toolsr   statsmodels.tools.sm_exceptionsr   r   r   r0   r5   rA   rX   ru   rz   r|   r   r   r   r   r  r  r_   r   r   <module>r+     s      $ $  )0@F9x @C BJ =@25R"j ;=4959e>P
$q qh 8=.2SlN,bYx9 9xH!3 Hir   