
    !gY                        d dl mZ d dlZd dlZd dlmZmZ d dlZd dl	m
Z
 d dlmZmZmZ d dlmZ d dlmZ d Zd	 Zd
 Zd Zd Zd Zd Zd Z G d d      Z G d d      Zedk(  r\d dlZd dlmZ  ej@                  ddg d      Z! ede!      jE                         Z# ede!      jE                         Z$ ee#d      Z%yy)    )lrangeN)	DataFrameIndex)stats)_has_intercept_intercept_idx_remove_intercept_patsy)summary2)OLSc                     || j                         S |dk(  r| j                  S |dk(  r| j                  S |dk(  r| j                  S |dk(  r| j                  S t        d|z        )Nhc0hc1hc2hc3z robust options %s not understood)
cov_paramscov_HC0cov_HC1cov_HC2cov_HC3
ValueError)modelrobusts     T/var/www/dash_apps/app1/venv/lib/python3.12/site-packages/statsmodels/stats/anova.py_get_covariancer      sk    ~!!	5}}	5}}	5}}	5}};fDEE    c                 4   |j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|r|j                         }| j                  j                  }| j                  j                  }|j
                  d   }| j                  j                  }	| j                  j                  j                  }
| j                  j                  }t        |
j                        t        |
      z
  dz   }d	|z  }d
dd||g}t        t        j                  |df      |      }|dv rt!        | ||||
|||||
      S |dv rt#        | |
||||      S |dv rt%        | |
||||      S |dv rt'        d      t)        dt+        |      z        )a9  
    Anova table for one fitted linear model.

    Parameters
    ----------
    model : fitted linear model results instance
        A fitted linear model
    typ : int or str {1,2,3} or {"I","II","III"}
        Type of sum of squares to use.

    **kwargs**

    scale : float
        Estimate of variance, If None, will be estimated from the largest
    model. Default is None.
        test : str {"F", "Chisq", "Cp"} or None
        Test statistics to provide. Default is "F".

    Notes
    -----
    Use of this function is discouraged. Use anova_lm instead.
    testFscaleNtyp   r   r   zPR(>%s)dfsum_sqmean_sq   columnsr!   I)   II)   III)   IVzType IV not yet implementedzType %s not understood)getlowerr   endogexogshapeendog_namesdatadesign_info
exog_nameslentermsr   r   npzerosanova1_lm_singleanova2_lm_singleanova3_lm_singleNotImplementedErrorr   str)r   kwargsr   r   r    r   r2   r3   nobsresponse_namer7   r8   n_rowspr_testnamestables                   r   anova_singlerI   #   s   . ::fc"DJJw%E
**UA
CZZ$'FKKE;;D::a=DKK++M++""..K''J+##$~k'BBQFF$G8Yg6Ebhh{+U;E
hudD+u &gv? 	?			{FD' &( 	(	
	{FD' &( 	(			!"?@@1CH<==r   c
                    t        | dd      }
|
Bt        j                  j                  |      \  }}t        j                  |j
                  |      }
t        j                  t        |j                        t        |j                        f      }|j                  D cg c]  }|j                  |       }}t        |      D ]  \  }}d|||f<    t        j                  ||
dz        }t        |      }||    }t        j                  |j                        }||    }|j                         }t!        |dgz         |_        t        j$                  ||    j'                  d      |f   |j(                  |ddgf<   | j*                  | j,                  f|j(                  dddgf<   |dk(  r|d   |d   z  | j*                  | j,                  z  z  ||<   t.        j0                  j3                  |d   |d   | j,                        ||<   t        j4                  t        j4                  f|j(                  d||gf<   |d   |d   z  |d	<   |S c c}w )
a  
    Anova table for one fitted linear model.

    Parameters
    ----------
    model : fitted linear model results instance
        A fitted linear model

    **kwargs**

    scale : float
        Estimate of variance, If None, will be estimated from the largest
    model. Default is None.
        test : str {"F", "Chisq", "Cp"} or None
        Test statistics to provide. Default is "F".

    Notes
    -----
    Use of this function is discouraged. Use anova_lm instead.
    effectsNr!   r*   Residualr"   r#   r   r$   )getattrr;   linalgqrdotTr<   r9   r:   column_names
term_namesslice	enumerater   arraytolistr   indexc_sumlocssrdf_residr   fsfnan)r   r2   r3   rC   r7   rH   rE   r   rF   r   rK   qrarrnameslicesislice_r#   idxrS   rX   s                         r   r=   r=   _   s   . eY-Giill4 !&&e$
((C))*C0H0H,IJ
KC2=2H2HI$k%IFIf% &AvI VVC!$F

%CSD\F+001JSD!JE,-EK)+sC4y}}Q/?/G)HEIIedH%%&-2YY-FEIIj8D/)*s{h%+5		ENN24dE#Jd$)NN4g13		*tWo-.Xt4E)L/ Js   Ic                 6   |j                   dd }t        |      }dd||g}t        t        j                  |df      |      }t        | d      }	t        | |      }
g }g }t        |      D ]  \  }}|j                  |      }t        |j                  |j                        }g }t        |j                        }|D ]  }t        |j                        }|j                  |      s*||k(  r0|j                  |      }|j                  t        |j                  |j                               |j                  t        |j                  |j                                t        j                  | j                   j"                  j$                  d         |   }t        j                  | j                   j"                  j$                  d         |   }|j&                  rt        j(                  t        j(                  ||
      |j*                        }ddlm} |j1                  |      \  }}|j$                  d   |j$                  d   z
  }t        j(                  |dd| df   j*                  |      }n|}|j$                  d   }|d	k(  re| j3                  ||

      }|j4                  x|j6                  |j8                  |   |f<   }|j:                  |j6                  |j8                  |   |f<   ||j6                  |j8                  |   df<   |j=                  |j                         |j=                  |j?                                 tA        |dgz         |_        |jB                  t        jD                  || j                   j"                  j$                  d   dz   gz            }||   |d   z  | jF                  z  | jH                  z  }||d<   | jF                  | jH                  t        jJ                  t        jJ                  f|j6                  ddd||gf<   |S )a  
    Anova type II table for one fitted linear model.

    Parameters
    ----------
    model : fitted linear model results instance
        A fitted linear model

    **kwargs**

    scale : float
        Estimate of variance, If None, will be estimated from the largest
    model. Default is None.
        test : str {"F", "Chisq", "Cp"} or None
        Test statistics to provide. Default is "F".

    Notes
    -----
    Use of this function is discouraged. Use anova_lm instead.

    Type II
    Sum of Squares compares marginal contribution of terms. Thus, it is
    not particularly useful for models with significant interaction terms.
    Nr#   r"   r.   r&   r!   r   )rN   r   cov_prL   )&r:   r	   r   r;   r<   r   rU   rT   r   startstopsetfactorsissubsetextendeyer   r3   r4   sizerP   rQ   scipyrN   rO   f_testfvaluer[   rX   pvalueappendrd   r   ilocargsortr\   r]   r`   )r   r7   rE   r   rF   r   
terms_inforG   rH   cov
robust_cov	col_orderrX   rf   termcolsL1L2term_sett	other_setcolLVLrN   
orth_compl_rb   L12r^   
test_valuer\   s                                  r   r>   r>      sg   2 ""1%J(4JtT7+Ebhh{+u=E
%
&C /JIEZ( '"4   &DJJ		*t||$ 	7AAIII  +H	4I!''*		&CHH56		&CHH56	7 VVEKK$$**1-.r2VVEKK$$**1-.r277&&:.rtt4C$!99S>LJqbhhqk)A &&AqbcE*,,b1CCA3;S
3A;<88CEIIekk!nd*+j12EIIekk!ng-. +,		%++a.$&'$TYY[!O'"R ,-EKJJrzz)u{{/?/?/E/Ea/H/J.K"KLME
+d
#eii
/
>CE(O=BYY<ANN<>FFBFF=LEIIj8D$889 Lr   c                    |t        |      z  }|j                  }dd||g}t        t        j                  |df      |      }t        | |      }	g }
g }t        |      D ]  \  }}|j                  |      }t        j                  | j                  j                  j                  d         |   }|}|j                  d   }|dk(  re| j                  ||	      }|j                  x|j                  |j                  |   |f<   }|j                   |j                  |j                  |   |f<   ||j                  |j                  |   df<   |j#                  |j%                                 t'        |d	gz         |_        ||   |d   z  | j(                  z  | j*                  z  }||d<   | j(                  | j*                  t        j,                  t        j,                  f|j                  d	dd||gf<   |S )
Nr#   r"   r.   r&   r!   r   r   rj   rL   )r   r:   r   r;   r<   r   rU   rT   rr   r   r3   r4   ru   rv   r[   rX   rw   rx   rd   r   r\   r]   r`   )r   r7   rE   r   rF   r   r{   rG   rH   r|   r~   rX   rf   r   r   r   r   rb   r^   r   r\   s                        r   r?   r?      s   
n[))F""JtT7+Ebhh{+u=E
%
(CIEZ( "4  &VVEKK$$**1-.t4HHQK3;S,A;<88CEIIekk!nd*+j12EIIekk!ng-. +,		%++a.$&'TYY[!"" ,-EK +d
#eii
/
>CE(O=BYY<ANN<>FFBFF=LEIIj8D$889 Lr   c                     |j                  dd      }t        |       dk(  r| d   }t        |fi |S |dvrt        dt	        |      z        |j                  dd      }|j                  dd	      }t        |       }d
|z  }dddd||g}t        t        j                  |df      |      }	|s| d   j                  }| D 
cg c]  }
|
j                   c}
|	d<   | D 
cg c]  }
|
j                   c}
|	d<   t        j                  |	d   j                         |	j                  |	j                  dd	 df<   |	d   j                          |	d<   |dk(  rn|	d   |	d   z  |z  |	d<   t        j                   j#                  |	d   |	d   |	d         |	|<   t        j$                  |	j                  |	d   j'                         |f<   |	S c c}
w c c}
w )a	  
    Anova table for one or more fitted linear models.

    Parameters
    ----------
    args : fitted linear model results instance
        One or more fitted linear models
    scale : float
        Estimate of variance, If None, will be estimated from the largest
        model. Default is None.
    test : str {"F", "Chisq", "Cp"} or None
        Test statistics to provide. Default is "F".
    typ : str or int {"I","II","III"} or {1,2,3}
        The type of Anova test to perform. See notes.
    robust : {None, "hc0", "hc1", "hc2", "hc3"}
        Use heteroscedasticity-corrected coefficient covariance matrix.
        If robust covariance is desired, it is recommended to use `hc3`.

    Returns
    -------
    anova : DataFrame
        When args is a single model, return is DataFrame with columns:

        sum_sq : float64
            Sum of squares for model terms.
        df : float64
            Degrees of freedom for model terms.
        F : float64
            F statistic value for significance of adding model terms.
        PR(>F) : float64
            P-value for significance of adding model terms.

        When args is multiple models, return is DataFrame with columns:

        df_resid : float64
            Degrees of freedom of residuals in models.
        ssr : float64
            Sum of squares of residuals in models.
        df_diff : float64
            Degrees of freedom difference from previous model in args
        ss_dff : float64
            Difference in ssr from previous model in args
        F : float64
            F statistic comparing to previous model in args
        PR(>F): float64
            P-value for significance comparing to previous model in args

    Notes
    -----
    Model statistics are given in the order of args. Models must have been fit
    using the formula api.

    See Also
    --------
    model_results.compare_f_test, model_results.compare_lm_test

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> from statsmodels.formula.api import ols
    >>> moore = sm.datasets.get_rdataset("Moore", "carData", cache=True) # load
    >>> data = moore.data
    >>> data = data.rename(columns={"partner.status" :
    ...                             "partner_status"}) # make name pythonic
    >>> moore_lm = ols('conformity ~ C(fcategory, Sum)*C(partner_status, Sum)',
    ...                 data=data).fit()
    >>> table = sm.stats.anova_lm(moore_lm, typ=2) # Type 2 Anova DataFrame
    >>> print(table)
    r    r!   r   r(   z6Multiple models only supported for type I. Got type %sr   r   r   NzPr(>%s)r]   r\   df_diffss_diff   r&   )r0   r9   rI   r   rA   r   r;   r<   r   r\   r]   diffvaluesr[   rX   r   r^   r_   r`   isnull)argsrB   r    r   r   r   n_modelsrF   rG   rH   mdls              r   anova_lmr     s   L **UA
C 4yA~QE,V,,
( '),S2 3 	3 ::fc"DJJw%E4yH$G	9dGDEbhh!}-u=ER'+,CGG,E%L156#6E*-/WWU:5F5M5M-N,NEIIekk!"oy()e))++E)s{9%i(885@c
E#Ji0@$)*$57g 35&&		%*##%w./L -6s    GGc                 Z    t        j                  dg|z        }|D ]  }| |   }d||<    |S )NTF)r;   rV   )re   slices_to_excludenindr   ss         r   
_not_slicer     s>    
((D6!8
C! 4LA Jr   c                    t        |||j                  d         }||   }t        j                  | |dd|f   j	                  |            }|j
                  j	                  |      }t        |       t        |      z
  }||fS )ah  
    Residual sum of squares of OLS model excluding factors in `keys`
    Assumes x matrix is orthogonal

    Parameters
    ----------
    y : array_like
        dependent variable
    x : array_like
        independent variables
    term_slices : a dict of slices
        term_slices[key] is a boolean array specifies the parameters
        associated with the factor `key`
    params : ndarray
        OLS solution of y = x * params
    keys : keys for term_slices
        factors to be excluded

    Returns
    -------
    rss : float
        residual sum of squares
    df : int
        degrees of freedom
    r!   N)r   r4   r;   subtractrP   rQ   r9   )	yxterm_slicesparamskeysr   params1r\   r]   s	            r   _ssr_reduced_modelr     sq    4 [$

3CSkG
++a1c6w/
0C
%%))C.C1vG$H=r   c                   .    e Zd ZdZ	 	 ddZd Zd Zd Zy)AnovaRMa  
    Repeated measures Anova using least squares regression

    The full model regression residual sum of squares is
    used to compare with the reduced model for calculating the
    within-subject effect sum of squares [1].

    Currently, only fully balanced within-subject designs are supported.
    Calculation of between-subject effects and corrections for violation of
    sphericity are not yet implemented.

    Parameters
    ----------
    data : DataFrame
    depvar : str
        The dependent variable in `data`
    subject : str
        Specify the subject id
    within : list[str]
        The within-subject factors
    between : list[str]
        The between-subject factors, this is not yet implemented
    aggregate_func : {None, 'mean', callable}
        If the data set contains more than a single observation per subject
        and cell of the specified model, this function will be used to
        aggregate the data before running the Anova. `None` (the default) will
        not perform any aggregation; 'mean' is s shortcut to `numpy.mean`.
        An exception will be raised if aggregation is required, but no
        aggregation function was specified.

    Returns
    -------
    results : AnovaResults instance

    Raises
    ------
    ValueError
        If the data need to be aggregated, but `aggregate_func` was not
        specified.

    Notes
    -----
    This implementation currently only supports fully balanced designs. If the
    data contain more than one observation per subject and cell of the design,
    these observations need to be aggregated into a single observation
    before the Anova is calculated, either manually or by passing an aggregation
    function via the `aggregate_func` keyword argument.
    Note that if the input data set was not balanced before performing the
    aggregation, the implied heteroscedasticity of the data is ignored.

    References
    ----------
    .. [*] Rutherford, Andrew. Anova and ANCOVA: a GLM approach. John Wiley & Sons, 2011.
    Nc                    || _         || _        || _        d|v rt        d      || _        |t        d      || _        |dk(  r t        j                  j                  | _
        n|| _
        |j                  |j                  |g|z               s*| j                  | j                          nd}t        |      | j                          y )NCzSFactor name cannot be 'C'! This is in conflict with patsy's contrast function name.z)Between subject effect not yet supported!mean)subsetzThe data set contains more than one observation per subject and cell. Either aggregate the data manually, or pass the `aggregate_func` parameter.)r6   depvarwithinr   betweenr@   subjectpdSeriesr   aggregate_funcequalsdrop_duplicates
_aggregate_check_data_balanced)selfr6   r   r   r   r   r   msgs           r   __init__zAnovaRM.__init__  s    	&= D E E% '7 8 8V#"$))..D"0D{{4//y67I/JK"".!A !o%!!#r   c                     | j                   j                  | j                  g| j                  z   d      | j                     j                  | j                        | _         y )NF)as_index)r6   groupbyr   r   r   aggr   r   s    r   r   zAnovaRM._aggregate  sQ    YYgt||nt{{:',  ..2kk; c$--. 		r   c                 R   d}| j                   D ]+  }|t        | j                  |   j                               z  }- i }t	        | j                  j
                  d         D ]`  }g }| j                   D ]-  }|j                  | j                  |   j                  |          / t        |      }||v r||   dz   ||<   \d||<   b d}t        |      |k7  rt        |      |   }|D ]  }|||   k7  st        |       | j                  j
                  d   ||z  kD  rt        d      y)zraise if data is not balanced

        This raises a ValueError if the data is not balanced, and
        returns None if it is balance

        Return might change
        r!   r   zData is unbalanced.z9There are more than 1 element in a cell! Missing factors?N)
r   r9   r6   uniqueranger4   rx   ry   tupler   )	r   factor_levelswi
cell_countrX   keyr   error_messagecounts	            r   r   zAnovaRM._check_data_balanced  sG    ++ 	9BS2!5!5!788M	9 
499??1-. 	$EC{{ 7

499S>..u567*Cj ",S/A"5
3"#
3	$ .z?m+]++3 	0C
3' //	0 99??1 55 ) * * 6r   c           	      
   | j                   | j                     j                  }| j                  D cg c]  }d|z  	 }}d| j                  z  }||gz   }t        j                  dj                  |      | j                         }|j                  j                  }|D ]H  }t        j                  dg|j                  d   z        }	d|	||   <   t        j                  |	      ||<   J dj                  |      g}
t        ||
|j                  d         }	|dd|	f   }t        ||      }|j                         }|j                   |j                  d   k  rt#        d	      |
D ]  }|j%                  |        |D ]  }||   |	   ||<    |j&                  }|j(                  }|j*                  }g d
}t-        j.                  t        j0                  d      |      }|D ]  }| j                  |vs|dk7  st3        |||||g      \  }}||z
  }||z
  |z  }|dj                  |dd       k(  s
|dz   |z   |vr||z  }|}n&t3        |||||dz   |z   g      \  }}||z
  }||z
  |z  }||z  }t4        j6                  j9                  |||      }|j;                  dd      j;                  dd      }||j<                  |df<   ||j<                  |df<   ||j<                  |df<   ||j<                  |df<    t?        |      S c c}w )zvestimate the model and compute the Anova table

        Returns
        -------
        AnovaResults instance
        z
C(%s, Sum)*r6   Fr!   T:Nz$Independent variables are collinear.)F ValueNum DFDen DFPr > F)r   r.   r&   	Interceptr   zC( z, Sum)r   r   r   r   ) r6   r   r   r   r   patsydmatrixjoinr7   term_name_slicesr;   rV   r4   r   r   fitrankr   popr   r]   r\   r   r   r<   r   r   r^   r_   replacer[   AnovaResults)r   r   rf   r   r   ro   r   r   r   r   term_excluder   resultsr   r]   r\   r'   anova_tablessr1	df_resid1df1msmmsedf2r   pr   s                              r   r   zAnovaRM.fit&  s    IIdkk")) -1KK8q,"88-G9$MM#((7+$))<mm44 	-C((E71771:-.C$(CC !!xx}K	- )*lAGGAJ?afI Aq	))+::
"CDD 	AOOA	 	5C*3/4K	5##kk;ll288F#3WE 	4C||3&3++="4q+vu#6i(*czS(388GCRL11sW,K?.C"C&81k6sW,-'/OD) $h.C#:,C#IGGJJq#s+{{4,44XrB34i025h/25h/23h//	42 K((m 9s   L )NNN)__name__
__module____qualname____doc__r   r   r   r    r   r   r   r     s(    5n DH $$<0*B@)r   r   c                   "    e Zd ZdZd Zd Zd Zy)r   zX
    Anova results class

    Attributes
    ----------
    anova_table : DataFrame
    c                     || _         y N)r   )r   r   s     r   r   zAnovaResults.__init__q  s
    &r   c                 >    | j                         j                         S r   )summary__str__r   s    r   r   zAnovaResults.__str__t  s    ||~%%''r   c                     t        j                         }|j                  d       |j                  | j                         |S )zlcreate summary results

        Returns
        -------
        summary : summary2.Summary instance
        Anova)r
   Summary	add_titleadd_dfr   )r   summs     r   r   zAnovaResults.summaryw  s5     !wD$$%r   N)r   r   r   r   r   r   r   r   r   r   r   r   i  s    '(r   r   __main__)olsz	moore.csvr!   )partner_status
conformity	fcategoryfscore)skiprowsrG   z5conformity ~ C(fcategory, Sum)*C(partner_status, Sum)r   z#conformity ~ C(partner_status, Sum)r*   )r    )&statsmodels.compat.pythonr   numpyr;   pandasr   r   r   r   rt   r    statsmodels.formula.formulatoolsr   r   r	   statsmodels.iolibr
   #statsmodels.regression.linear_modelr   r   rI   r=   r>   r?   r   r   r   r   r   r   statsmodels.formula.apir   read_csvmoorer   moore_lmmooreBrH   r   r   r   <module>r     s    ,   #   
 ' 3F"9>x4nVp%NgTD}) })@ 8 z+
 FOOK!#9:E J  #  6UCGGIF X1%E/ r   