
    !gT                        d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
Zd dlZerd dlmZ nd Zd dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZmZmZ dZ  ejB                  e       dz  Z"d Z#d9dZ$d9dZ%d9dZ&d9dZ'd9dZ(d9dZ)e$ejT                  ejV                  ejX                  ejZ                  e%ej\                  e)e&e(e'dZ/d Z0d Z1d9dZ2dZ3dZ4e4D  cg c]	  } | e3vs|  c} Z5e3 e6e5      z   Z7 G d d      Z8 ee8jr                        Z:e:jw                  d edd d!g             e:jw                  d"g        e:jw                  d#d$gd%gfd&gd'gfg        e e<e:            	 d:d(d(d)d*e d+d,d-eejz                  ej|                  ej~                  f   d.e	e<   d/e@d0e@d1eAd2e@d3e	eeBeAf      d4e@d5ej~                  fd6       ZC G d7 d8      ZDyc c} w );    )PD_LT_2Appenderis_numeric_dtype)SP_LT_19)Union)SequenceN)is_categorical_dtypec                 6    t        | t        j                        S N)
isinstancepdCategoricalDtypedtypes    _/var/www/dash_apps/app1/venv/lib/python3.12/site-packages/statsmodels/stats/descriptivestats.pyr	   r	      s    %!4!455    )stats)SimpleTable)jarque_bera)cache_readonly)	Docstring	Parameter)
array_like	bool_like
float_likeint_like)	      
      2   K   Z   _   c   g      Y@c                 D    | j                         | j                         z
  S r   )maxmin)dfs    r   pd_ptpr*   "   s    668bffhr   c                 R    dt        j                  |       z
  j                  |      S )Nr   axis)npisnansum)xr-   s     r   nancountr2   &   s"    O  d ++r   c                 `    t        j                  | |      t        j                  | |      z
  S Nr,   )r.   nanmaxnanminarrr-   s     r   nanptpr9   *   s#    99St$ryy4'@@@r   c                 6    t        j                  | dz  |      S )N   r,   )r.   nansumr7   s     r   nanussr=   .   s    99SAXD))r   c                 :    t        j                  | t        |      S r4   )r.   nanpercentilePERCENTILESr7   s     r   r?   r?   2   s    C488r   c                 2    t        j                  | |d      S Nomit)r-   
nan_policy)r   kurtosisr7   s     r   nankurtosisrF   6   s    >>#DV<<r   c                 2    t        j                  | |d      S rB   )r   skewr7   s     r   nanskewnessrI   :   s    ::c88r   )obsmeanstdr'   r(   ptpvarrH   ussrE   percentilesc                 r    	 t        j                  |       }|S # t        $ r t        j                  }Y |S w xY w)zi
    wrapper for scipy.stats.kurtosis that returns nan instead of raising Error

    missing options
    )r   rE   
ValueErrorr.   nanaress     r   	_kurtosisrW   M   s;    nnQ J  ffJ    66c                 r    	 t        j                  |       }|S # t        $ r t        j                  }Y |S w xY w)ze
    wrapper for scipy.stats.skew that returns nan instead of raising Error

    missing options
    )r   rH   rR   r.   rS   rT   s     r   _skewrZ   Z   s:    jjm J  ffJrX   c                 n   t        j                  |       } t        j                  | |kD        }t        j                  | |k        }||z
  dz  }	 t        j                  t        ||      ||z   d      j                  }||fS # t        $ r* t        j                  t        ||      ||z   d      }Y ||fS w xY w)a8  
    Signs test

    Parameters
    ----------
    samp : array_like
        1d array. The sample for which you want to perform the sign test.
    mu0 : float
        See Notes for the definition of the sign test. mu0 is 0 by
        default, but it is common to set it to the median.

    Returns
    -------
    M
    p-value

    Notes
    -----
    The signs test returns

    M = (N(+) - N(-))/2

    where N(+) is the number of values above `mu0`, N(-) is the number of
    values below.  Values equal to `mu0` are discarded.

    The p-value for M is calculated using the binomial distribution
    and can be interpreted the same as for a t-test. The test-statistic
    is distributed Binom(min(N(+), N(-)), n_trials, .5) where n_trials
    equals N(+) + N(-).

    See Also
    --------
    scipy.stats.wilcoxon
    g       @      ?)	r.   asarrayr0   r   	binomtestr(   pvalueAttributeError
binom_test)sampmu0posnegMps         r   	sign_testrh   g   s    F ::dD
&&
C
&&
C	scA<OOCSM39c:AA a4K  <Sc]C#Is;a4K<s   .B -B43B4)nobsmissingrK   std_errcirL   iqr
iqr_normalmad
mad_normalcoef_varranger'   r(   rH   rE   r   modemedianrP   )ri   rj   distincttopfreqc                      e Zd ZdZg dZeZeZe	Z
	 dddddeddd	eej                  ej                   ej"                  f   d
ee   dededededeeeef      defdZdej"                  dej"                  fdZedej"                  fd       Zedej"                  fd       Zedej"                  fd       ZdefdZdefdZy)Descriptiona  
    Extended descriptive statistics for data

    Parameters
    ----------
    data : array_like
        Data to describe. Must be convertible to a pandas DataFrame.
    stats : Sequence[str], optional
        Statistics to include. If not provided the full set of statistics is
        computed. This list may evolve across versions to reflect best
        practices. Supported options are:
        "nobs", "missing", "mean", "std_err", "ci", "ci", "std", "iqr",
        "iqr_normal", "mad", "mad_normal", "coef_var", "range", "max",
        "min", "skew", "kurtosis", "jarque_bera", "mode", "freq",
        "median", "percentiles", "distinct", "top", and "freq". See Notes for
        details.
    numeric : bool, default True
        Whether to include numeric columns in the descriptive statistics.
    categorical : bool, default True
        Whether to include categorical columns in the descriptive statistics.
    alpha : float, default 0.05
        A number between 0 and 1 representing the size used to compute the
        confidence interval, which has coverage 1 - alpha.
    use_t : bool, default False
        Use the Student's t distribution to construct confidence intervals.
    percentiles : sequence[float]
        A distinct sequence of floating point values all between 0 and 100.
        The default percentiles are 1, 5, 10, 25, 50, 75, 90, 95, 99.
    ntop : int, default 5
        The number of top categorical labels to report. Default is

    Attributes
    ----------
    numeric_statistics
        The list of supported statistics for numeric data
    categorical_statistics
        The list of supported statistics for categorical data
    default_statistics
        The default list of statistics

    See Also
    --------
    pandas.DataFrame.describe
        Basic descriptive statistics
    describe
        A simplified version that returns a DataFrame

    Notes
    -----
    The selectable statistics include:

    * "nobs" - Number of observations
    * "missing" - Number of missing observations
    * "mean" - Mean
    * "std_err" - Standard Error of the mean assuming no correlation
    * "ci" - Confidence interval with coverage (1 - alpha) using the normal or
      t. This option creates two entries in any tables: lower_ci and upper_ci.
    * "std" - Standard Deviation
    * "iqr" - Interquartile range
    * "iqr_normal" - Interquartile range relative to a Normal
    * "mad" - Mean absolute deviation
    * "mad_normal" - Mean absolute deviation relative to a Normal
    * "coef_var" - Coefficient of variation
    * "range" - Range between the maximum and the minimum
    * "max" - The maximum
    * "min" - The minimum
    * "skew" - The skewness defined as the standardized 3rd central moment
    * "kurtosis" - The kurtosis defined as the standardized 4th central moment
    * "jarque_bera" - The Jarque-Bera test statistic for normality based on
      the skewness and kurtosis. This option creates two entries, jarque_bera
      and jarque_beta_pval.
    * "mode" - The mode of the data. This option creates two entries in all tables,
      mode and mode_freq which is the empirical frequency of the modal value.
    * "median" - The median of the data.
    * "percentiles" - The percentiles. Values included depend on the input value of
      ``percentiles``.
    * "distinct" - The number of distinct categories in a categorical.
    * "top" - The mode common categories. Labeled top_n for n in 1, 2, ..., ``ntop``.
    * "freq" - The frequency of the common categories. Labeled freq_n for n in 1,
      2, ..., ``ntop``.
    ri   rj   ru   NT皙?Fr   numericcategoricalalphause_trP   ntopdatar   r}   r~   r   r   rP   r   c          	         |}	t        |t        j                  t        j                  f      st	        |dd      }	|	j
                  dk(  rt        j                  |      }t        |d      }t        |d      }g }
d}|r!|
j                  t        j                         d}|r"|
j                  d       ||dk7  rd	ndz  }|dz  }|s|st        d
      t        j                  |      j                  |
      | _        | j                  j                  d   dk(  rt        d| d      | j                  j                  D cg c]  }t        |       c}| _        | j                  j                  D cg c]  }t#        |       c}| _        |7|D cg c]  }|t&        vs| }}|rt        dj)                  |       d      |t+        t&              n
t+        |      | _        t/        |d      | _        d| j,                  v | _        d| j,                  v | _        | j2                  r4| j0                  dcxk  r"t7        | j$                        k  rt        d       ddgddgddgt9        d| j0                  dz         D cg c]  }d| 	 c}t9        d| j0                  dz         D cg c]  }d| 	 c}d}|D ]Y  }|| j,                  v s| j,                  j;                  |      }| j,                  d | ||   z   | j,                  |dz   d  z   | _        [ t	        |ddd      | _        t        j>                  | j<                        | _        t        j@                  | j<                        j                  d   | j<                  j                  d   k7  rt        d       t        jB                  | j<                  d!k\        s"t        jB                  | j<                  dk        rt        d"      tE        |d#      | _#        d|cxk  rdk  st        d$       t        d$      t        |d%      | _$        y c c}w c c}w c c}w c c}w c c}w )&Nr   r;   )maxdimr   r}   r~    categoryzand z4At least one of numeric and categorical must be Truer   z
Selecting z results in an empty DataFramez, z are not known statisticsr   rv   rw   z"top must be a non-negative integerrs   	mode_frequpper_cilower_cir   jarque_bera_pvaltop_freq_)rs   rl   r   rv   rw   rP   d)r   r   zpercentiles must be distinctd   z.percentiles must be strictly between 0 and 100r   z&alpha must be strictly between 0 and 1r   )%r   r   Series	DataFramer   ndimr   appendr.   numberrR   select_dtypes_datashapedtypesr   _is_numericr	   _is_cat_likeDEFAULT_STATISTICSjoinlist_statsr   _ntop_compute_top_compute_freqr0   rr   index_percentilessortuniqueanyr   _alpha_use_t)selfr   r   r}   r~   r   r   rP   r   data_arrinclude	col_typesdtstatundefireplacementskeyidxs                      r   __init__zDescription.__init__  s    $BLL 9:!$q9H==A99T?DGY/];	NN299%!INN:&9?:I&I{F  \\$'55g>
::A!#YK'EF  <@::;L;LMR,R0M/3zz/@/@
)+ $
 &+Ndt;M/MTNEN yy'((AB  ).D#$4; 	 dF+
!T[[0#t{{2q!I3t7H7H3I!IABB "J
 [)z*)+=>(-aa(@A1d1#JA*/4::>*BCQuQC[C
   	Cdkk!kk'',KK%"3'(kk#'),- 	 'q
 GGD$5$5699T&&'--a0D4E4E4K4KA4NN;<<66$##s*+rvvd6G6G16L/MMNN 05}1}EFF EFFw/c N

 O& BCs$   =P3/P8P=P=>Q
'Qr)   returnc                 x    |j                   | j                  D cg c]  }||j                  v s| c}   S c c}w r   )locr   r   )r   r)   ss      r   _reorderzDescription._reorderd  s-    vv$++?Qbhhq?@@?s   77c                    | j                   }| j                  }|j                  d   dk(  r|S |j                  d   dk(  r|S t        j                  ||gd      }| j                  || j                  j                           S )z
        Descriptive statistics for both numeric and categorical data

        Returns
        -------
        DataFrame
            The statistics
        r   r   r,   )r}   r~   r   r   concatr   r   columns)r   r}   r~   r)   s       r   framezDescription.frameg  sz     ,,&&Q1$N]]1"YY-A6}}R

 2 2344r   c           	        $ | j                   j                  dd| j                  f   }|j                  }|j                  \  }}|j                         }|j                         }|j                         }||z
  j                         j                         }|j                         }	|	j                  |dkD  xx   |j                  |dkD     dz  z  cc<   | j                  r8t        j                  |dz
        j                  d| j                  dz  z
        }
n/t        j                  j                  d| j                  dz  z
        }
d }|j!                  |      j"                  }|j$                  dkD  rt'        |t(        j*                        rGt-        j.                  |d   t0              }t-        j.                  |d   t,        j2                        }ng }g }|j4                  D ]9  }|j                  |   }|j7                  |d          |j7                  |d          ; t-        j8                  |      }t-        j8                  |      }nt-        j:                  d      x}}|dkD  }t-        j<                  |j                  d   t,        j>                        }||   |j                  |   z  ||<   |}	 dd	l m!} |j                         }|D ]_  } |||   jD                        s||   jG                         jI                         s;||   jK                  t,        j>                        ||<   a 	 |j                  d   dkD  r$|jO                  d
      |jO                  d      z
  }n|}d $|j!                  $fdd      j"                  }|j                         }t,        j>                  |j                  |dk(  <   ||z  }i dt)        jP                  t-        jR                  |t,        j2                        |j                  d   z  |      d|j                  d   |z
  d|d|	d||
|	z  z   d||
|	z  z
  d|d|d|d|dtU        |      d|jW                         d|jY                         d|d   d|d    d!|t-        jZ                  t        j                  j                  dd
g            z  d"|t-        j\                  dt,        j^                  z        z  |d   |d   t)        jP                  ||      t)        jP                  ||      |ja                         d#}|jc                         D ci c]  \  }}|| jd                  v s|| }}}t)        j*                  tg        |ji                               |tg        |jk                               $      }d%| jd                  vr|S |j                  d   dkD  r2|jO                  | jl                  d&z        jo                  t0              }n(t)        j*                  | jl                  d&z  t0        '      }t-        jp                  t-        jr                  d&|j4                  z        d&|j4                  z  k(        r/|j4                  D cg c]  }tu        d&|z         d( c}|_        nd)}d&} |j4                  }!|rW| d*z  } t-        jr                  | |j4                  z        }t-        jp                  t-        jZ                  |      dkD        rd+}|rWt-        jr                  | |!z        | d&z  z  }!d,tw        ty        | d&z              dz
   d-}"d.|" d/}#|!D cg c]  }|#j{                  |       c}|_        | jd                  |j4                  j}                         z   | _2        | j                  t)        j                  ||gd0            S # tL        $ r Y w xY wc c}}w c c}w c c}w )1z
        Descriptive statistics for numeric data

        Returns
        -------
        DataFrame
            The statistics of the numeric columns
        Nr   r\   r   g      ?r;   c                    t        | j                  t        j                        r| j                  n| j                  j                  }| j	                         j                  |      }t        ri nddi}t        j                  |fi |}t        j                  |d         rt        |d         |d   fS |d   j                  d   dkD  r|D cg c]  }t        |       c}S t        j                  t        j                  fS c c}w )Nr   keepdimsTr   r   )r   r   r.   numpy_dtypedropnato_numpyr   r   rs   isscalarfloatr   rS   )serr   ser_no_missingkwargsmode_resvals         r   _modez"Description.numeric.<locals>._mode  s    !+CIIrxx!@CIIciiF[F[E ZZ\222?N#R*d);Fzz.;F;H{{8A;'Xa[)8A;66{  #a'.67sc
7766266>! 8s   Dr   )is_extension_array_dtypeg      ?g      ?c                     t        j                  |       }|j                  d   dk  rt         j                  fdz  S t	        |      S )Nr   r;      )r.   r]   r   rS   r   )crU   s     r   _safe_jarque_beraz.Description.numeric.<locals>._safe_jarque_bera  s8    

1AwwqzA~y1}$q>!r   c                 B    t         | j                                     S r   )r   r   )r1   r   s    r   <lambda>z%Description.numeric.<locals>.<lambda>  s    d,QXXZ89 r   expand)result_typeri   r   rj   rK   rk   r   r   rL   rm   ro   rq   rr   r'   r(   rH   rE      rn   rp   )r   r   rs   r   rt   )r   r   rP   r   )r   r   %Tr   Fz0.fz{0:z}%r,   )Ar   r   r   r   r   rL   countrK   abscopyr   r   tppfr   normapplyTsizer   r   r   r.   r]   r   int64r   r   
atleast_1demptyfullrS   pandas.api.typesr   r   isnullr   fillnaImportErrorquantiler   onesr*   r'   r(   diffsqrtpirt   itemsr   r   valueskeysr   astypeallfloorintlenstrformattolistr   r   )%r   r)   cols_krL   r   rK   ro   rk   qr   mode_valuesrs   mode_countsr   r   r   r   _dfr   colrm   jbnan_meanrq   resultsvfinal
results_dfpercdupescaler   fmtoutputr   s%                                       @r   r}   zDescription.numericz  s     ::>>!T-=-=*=>zzxx1ffh
wwyDyoo$$&((*EAI%))EAI"6#"==;;	"&&sT[[1_'<=A

sT[[1_45A
	" hhuo''a+r||4zz+a.> jjQrxxH  &,, /C%//#.CKKA'&&s1v./ }}T* mmK8!#!,D;aiGGDJJqM2662	$S)EIIcN:	# 	A'')C ;+BsGMM:3x(,,.#&s8??266#:C; 88A;?,,t$s||D'99CC	" XX9x  

! 	 99;&(ffX]#>
BII*RXXa[8
 rxx{U*	

 D
 w
 q7{*
 q7{*
 3
 3
 3
 
 VBZ
 2668
 2668
  BqE!
" 1#
$ #

d|(D EE%
& #BEE	 22'
( a5 "1IId$/9D9iik1
4 #*--/F$!QQ$++5EAFF\\ $d5::<6H

 +88A;?<< 1 1C 78??FD<<d&7&7#&=UKD66"((3+,tzz1ABC:>**E3Ss^,A.EDJDEJJEhhutzz1266"''#,*+ D	 
 HHUU]+us{;Es3ucz?+A-.a0CC5_F8=>&--,>DJ kkDJJ$5$5$77}}RYY
D'9BCC]  		^ G F ?s6   20^. #!^. '^. 6^>^>*_:_	.	^;:^;c                 $   | j                   j                  dd| j                  D cg c]  }| c}f   }|j                  d   }|j                  }|D ci c]  }|||   j                  d       }}t        j                  |D ci c]  }|||   j                  d    c}t        j                        }i }i }|D ]  }||   }	|	j                  d   | j                  k\  rB|	j                  d| j                   ||<   t        j                  |	j                  dd       ||<   ft        |	j                        }
|
dg| j                  t        |
      z
  z  z  }
|
||<   t        |	      }|t        j                   g| j                  t        |      z
  z  z  }t        j                  |      ||<    t#        d| j                  dz         D cg c]  }d| 	 }}t        j$                  |d	||
      }t#        d| j                  dz         D cg c]  }d| 	 }}t        j$                  |d	||
      }t        j                  t        j&                  |t        j                        |j                  d   z  |      |j                  d   |j)                         z
  |d}|j+                         D ci c]  \  }}|| j,                  v s|| }}}t        j$                  t        |j/                               |t        |j1                               d	      }| j2                  rt        j4                  ||gd      }| j6                  rt        j4                  ||gd      }| j9                  |      S c c}w c c}w c c}w c c}w c c}w c c}}w )z
        Descriptive statistics for categorical data

        Returns
        -------
        DataFrame
            The statistics of the categorical columns
        Nr   T)	normalizer   r   r   r   object)r   r   r   r   r   rz   )r   r   r   r,   )r   r   r   r   r   value_countsr   r   r.   r   r   r   r]   ilocr   r   rS   rr   r   r   r   r   r   r   r   r   r   r   r   )r   r   r)   r   r   vcru   rv   rw   singler   freq_valr   r   top_dffreq_dfr  r  r  r  s                       r   r~   zDescription.categorical  s    ZZ^^At/@/@AAABHHQKzzCEFCc2c7''$'77FF99.01sS"S'--""1
  	1CWF||A$**,!<<$**5CJJv{{2A7S	6<<(vc#h!677C<RVVH

S](BCCJJx0S		1 &+1djj1n%=>4s>>cM&+AtzzA~&>?5??,,t85$O II*RXXa[8 xx{RXXZ/ 
 #*--/F$!QQ$++5EAFF\\ uzz|$	

 J#7a@JJ#8qAJ}}Z(([  B G1  ?? Gs)   	M3M8M=
NN2N	Nc                    | j                   j                  t              }|j                         j	                         j	                         r|j                  d      }|j                  D cg c]  }t        |       }}|j                  D cg c]  }t        |       }}g }|j                         D ]$  \  }}|j                  |D 	cg c]  }	|	 c}	       & d }
t        |||ddd|
didgt        |      z        S c c}w c c}w c c}	w )	z
        Summary table of the descriptive statistics

        Returns
        -------
        SimpleTable
            A table instance supporting export to text, csv and LaTeX
        r   c                 f    t        | t              r| S | dz  | k(  rt        t        |             S | dS )Nr   z0.4g)r   r   r   )r  s    r   
_formatterz'Description.summary.<locals>._formatterY  s4    !S!a13q6{"Xr   zDescriptive Statistics	data_fmtsz%s)r   r   r   )headerstubstitletxt_fmt	datatypes)r   r   r  r   r   r   r   r   r   iterrowsr   r   r   )r   r)   r   r   r   r  r   r   rowr  r  s              r   summaryzDescription.summaryG  s     ZZv&99;??  "2B$&JJ/SC//%'XX.cS..kkm 	*FAsKKC(q()	*	 * dz":;cCIo
 	
 0. )s   +D D	D

c                 P    t        | j                         j                               S r   )r   r!  as_text)r   s    r   __str__zDescription.__str__i  s    4<<>))+,,r   r   ) __name__
__module____qualname____doc___int_fmtNUMERIC_STATISTICSnumeric_statisticsCATEGORICAL_STATISTICScategorical_statisticsr   default_statisticsr@   r   r.   ndarrayr   r   r   r   r   boolr   r   r   r   r   r   r}   r~   r   r!  r$   r   r   ry   ry      sd   Pd /H+3+
  $W0
  3>W0BJJ		2<<78W0 }W0
 W0 W0 W0 W0 eCJ/0W0 W0rA2<< ABLL A 5r|| 5 5$ PD PD PDd 7)R\\ 7) 7)r 
  
D- -r   ry   Returnsr   zDescriptive statistics
AttributeszSee Also)zpandas.DataFrame.describeNzBasic descriptive statistics)ry   Nz;Descriptive statistics class with additional output optionsTr{   Fr   r|   r   r   r}   r~   r   r   rP   r   r   c          
      <    t        | |||||||      j                  S )Nr|   )ry   r   )r   r   r}   r~   r   r   rP   r   s           r   describer5    s0     	 e	r   c                       e Zd ZdZd Zy)Describez
    Removed.
    c                     t        d      )NzDescribe has been removed)NotImplementedError)r   datasets     r   r   zDescribe.__init__  s    !"=>>r   N)r%  r&  r'  r(  r   r1  r   r   r7  r7    s    ?r   r7  )r   r   )Estatsmodels.compat.pandasr   r   r   statsmodels.compat.scipyr   typingr   collections.abcr   numpyr.   pandasr   pandas.core.dtypes.commonr	   scipyr   statsmodels.iolib.tabler   statsmodels.stats.stattoolsr   statsmodels.tools.decoratorsr   statsmodels.tools.docstringr   r   statsmodels.tools.validationr   r   r   r   r@   array	QUANTILESr*   r2   r9   r=   r?   rF   rI   nanmeannanstdr5   r6   nanvarMISSINGrW   rZ   rh   r*  r,  _additionaltupler   ry   r(  dsreplace_blockr   r/  r   r   r0  r   r   r5  r7  )r   s   0r   <module>rR     sj   I I -  $  
>6  / 3 7 <  1BHH[!E)	,A*9=9
 JJ99999999 

,^ , H +t;M/MD (%*<< w- w-t {""#   y{-E,FG   r "    11+,	

 ##JK	
	 
#b'   /:


BIIr||3
4C= 	
    %U
+,  \\ .? ?Ys   !	G+G