
    !g#                     
   d Z ddlmZ ddlZddlmZ d1dZd Z	d Z
dZdZd	 Zd
 Zd Zedk(  rdZej$                  j'                  dedf      j)                  defdefdefdefg      Zej$                  j/                  edf      j)                  defdefg      ZddlZej6                  j8                  j;                  eefd      Z ej>                  edfe      Z ejB                  jD                  D ]
  Z#ee#   e e#<    ejB                  jD                  D ]
  Z#ee#   e e#<    	  ede       \  Z$Z" ede       \  Z$Z" ejP                  e"D  cg c]  } e$|    	 c}       Z)e)jU                  d      dej$                  j/                  e      z  z   Z+ ejX                  e+e)      j[                         Z. e&e.j^                          e&e ee.      z          ejP                   ede"      D  cg c]  } e$|    	 c}       Z)e)jU                  d      dej$                  j/                  e      z  z   Z+ ejX                  e+e)      j[                         Z. e&e.j^                          e&e ee.      z          ejB                  defd efd!efd"efd#efd$efd%efd&efg      Z0 ejb                  d'e0d(d)      Z2 e&d*e2jB                  jD                  D cg c]  }e2jf                  |   jU                         ! c}       e2jf                  j)                  e4      Z5e5jm                  d+ e7e2jB                  jD                              jq                  d      Z9e2e9 ddf   jt                  Z; e&e;jx                          e&e;jB                          ed,e;      \  Z=Z> ejP                   ed-e>      D  cg c]  } e=|    	 c}       Z?e;d&   Z@ ejX                  e@e?      j[                         ZA e&eAj^                          e&e eeA      z         d.j                  e2jB                  jD                  dd+       ZC ed/e;      \  ZDZE ejP                   ed-eE      D  cg c]  } eD|    	 c}       ZFe;d&   ZG ejX                  eGeF      j[                         ZH e&eHj^                          e&e eeH      z         eED ]m  ZI e&d0eIf        ejP                   eeIeE      D  cg c]  } eD|    	 c}       ZJe;d&   ZK ejX                  eKeJ      j[                         ZL e&e eeL      z         o yyc c} w c c} w c c}w c c} w c c} w c c} w )2a   convenience functions for ANOVA type analysis with OLS

Note: statistical results of ANOVA are not checked, OLS is
checked but not whether the reported results are the ones used
in ANOVA

includes form2design for creating dummy variables

TODO:
 * ...
 *

    )lmapNc                     | j                         } t        j                  |       }|r| dddf   |k(  j                  t              S | dddf   |k(  j                  t              ddddf   S )z|convert array of categories to dummy variables
    by default drops dummy variable for last category
    uses ravel, 1d onlyN)ravelnpuniqueastypeint)x	returnallgroupss      i/var/www/dash_apps/app1/venv/lib/python3.12/site-packages/statsmodels/sandbox/regression/try_ols_anova.py
data2dummyr      sh     	
	AYYq\F!T'
f$,,S11!T'
f$,,S1!CRC%88    c                     t        j                  t        t        | j	                                     }| |dddddf   k(  j                  d      j                  j                  t              ddddf   S )zcreates product dummy variables from 2 columns of 2d array

    drops last dummy variable, but not from each category
    singular with simple dummy variable but not with constant

    quickly written, no safeguards

    Nr   )	r   r   r   tupletolistallTr	   r
   )r   r   s     r   data2proddummyr      s^     YYtE188:./FvaQh$$R(**11#6q"u==r   c                 V    |j                   dk(  r	|dddf   }t        | d      }||z  S )zcreate dummy continuous variable

    Parameters
    ----------
    x1 : 1d array
        label or group array
    x2 : 1d array (float)
        continuous variable

    Notes
    -----
    useful for group specific slope coefficients in regression
       NT)r   )ndimr   )x1x2dummys      r   data2groupcontr   .   s2     
ww!|$ZrT*E2:r   aW  
ANOVA statistics (model sum of squares excludes constant)
Source    DF  Sum Squares   Mean Square    F Value    Pr > F
Model     %(df_model)i        %(ess)f       %(mse_model)f   %(fvalue)f %(f_pvalue)f
Error     %(df_resid)i     %(ssr)f       %(mse_resid)f
CTotal    %(nobs)i    %(uncentered_tss)f     %(mse_total)f

R squared  %(rsquared)f
a]  
ANOVA statistics (model sum of squares includes constant)
Source    DF  Sum Squares   Mean Square    F Value    Pr > F
Model     %(df_model)i      %(ssmwithmean)f       %(mse_model)f   %(fvalue)f %(f_pvalue)f
Error     %(df_resid)i     %(ssr)f       %(mse_resid)f
CTotal    %(nobs)i    %(uncentered_tss)f     %(mse_total)f

R squared  %(rsquared)f
c                     i }|j                  | j                         g d}|D ]  }t        | |      ||<    | j                  j                  |d<   | j
                  | j                  z
  |d<   |S )zjupdate regression results dictionary with ANOVA specific statistics

    not checked for completeness
    )df_modeldf_residessssruncentered_tss	mse_model	mse_resid	mse_totalfvaluef_pvaluersquarednobsssmwithmean)update__dict__getattrmodelr*   r#   r"   )resad
anova_attrkeys       r   	anovadictr4   [   sp    
 
BIIcllJ  $#s#3$BvJ**SWW4B}Ir   c                    i }g }| j                         D ]  }|dk(  r7t        j                  |j                  d         |d<   |j	                  d       @d|vr||   ||<   |j	                  |       ^|dd dk(  r7|j                  d      d   }t        ||         ||<   |j	                  |       |dd d	k(  r|j                  d      d   j                  d
      }t        t        j                  ||d      ||d      f         |dj                  |      <   |j	                  dj                  |             &|dd dk(  ro|j                  d      d   j                  d
      }t        ||d      ||d            |dj                  |      <   |j	                  dj                  |             t        d       ||fS )a  convert string formula to data dictionary

    ss : str
     * I : add constant
     * varname : for simple varnames data is used as is
     * F:varname : create dummy variables for factor varname
     * P:varname1*varname2 : create product dummy variables for
       varnames
     * G:varname1*varname2 : create product between factor and
       continuous variable
    data : dict or structured array
       data set, access of variables by name as in dictionaries

    Returns
    -------
    vars : dictionary
        dictionary of variables with converted dummy variables
    names : list
        list of names, product (P:) and grouped continuous
        variables (G:) have name by joining individual names
        sorted according to input

    Examples
    --------
    >>> xx, n = form2design('I a F:b P:c*d G:c*f', testdata)
    >>> xx.keys()
    ['a', 'b', 'const', 'cf', 'cd']
    >>> n
    ['const', 'a', 'b', 'cd', 'cf']

    Notes
    -----

    with sorted dict, separate name list would not be necessary
    Ir   const:N   zF:r   zP:* zG:zunknown expression in formula)splitr   onesshapeappendr   r   c_joinr   
ValueError)ssdatavarsnamesitemvs         r   form2designrI   l   s   H DE
 >3;GGDJJqM2DMLL!_dDJLL"1X

3"A a)DGLLO"1X

3"((-A-beeD1JtAaDz4I.JKDLL$"1X

3"((-A-d1Q4j$qt*EDLL$<==)>* ;r   c                 \    |dd }| j                         D ]  }|j                  |        |S )zwdrop names from a list of strings,
    names to drop are in space delimited list
    does not change original list
    N)r<   remove)rC   linewlirG   s       r   dropnamerN      s3    
 qEE
 TLr   __main__i        )sizeabcdr9   efT)flattenr   zI a F:b P:c*dzI a F:b P:c*d G:a*e fg{Gz?zae fbreedsexlitterpenpigagebageyzdftest3.data.)missingusemaskrc   r   zI F:sex ager;    z'I F:breed F:sex F:litter F:pen age bagez
Results dropping)F)M__doc__statsmodels.compat.pythonr   numpyr   statsmodels.apiapismr   r   r   
anova_str0	anova_strr4   rI   rN   __name__r*   randomrandintviewr
   testdataintnormalfloattestdatacontnumpy.lib.recfunctionslibrecfunctions	zip_descrdt2emptytestdatadtyperF   namexxnprintr@   column_stackXsumra   OLSfitrest1paramsdt_b
genfromtxtdtamaskboolmreshapelenanydroprowsrD   
dta_use_b1r>   xx_b1names_b1X_b1y_b1rest_b1rA   allexogxx_b1a	names_b1aX_b1ay_b1arest_b1adropnX_b1a_y_b1a_	rest_b1a_)nnks   00r   <module>r      s>   +  	9>0
	";z z
 D))##AT!H#5::SIs3iQTUXPY[^_bZc;deK99##4(#3883u+E{9STL!
))
 
 
*
*K+Ft
*
TCrxxa#&H!!'' +$T*+""(( ,%d+, 	 OX6IB3X>IB 	%0BB01A	a4		((t(555ABFF1QKOOE	%,,	)i&
&' 	(65*ABBBBCA	a4		((t(555ABFF1QKOOE	%,,	)i&
&' 288gs^eS\Hc?3<%u~E?S%L2 3D "--S$
GC	)AAchhqkoo'ABdAyyC		0155a8H hYq[!&&J	*

	*

 "-<OE82??X0FG"E"IGHDc?DbffT4 $$&G	'..	)i(
() hhsyys+,G $$MzZFIBOO(2y2IJBVBZJKEsOErvveU#'')H	(//	)i)
)* 0#U+, xy7Q!R&*!RSCBFF66*..0	i)I../0I B 1 C* B H" K "Ss$   T%/T*$T/
.T4T9T>
