
    !g                        d Z ddlZddlmZ ddlmZ ddlm	Z	  G d d      Z
edk(  rdgZdev rd	Zej                  ej                  j!                  ed
f       ej"                  edf      f   Zej                   ej&                   ej(                  d
      d
d       ej*                  d      ddd   f   j,                  Z ej0                  g dg dg dg      Z ej0                  g dg dg dg      Z ej2                  ee      Zedej                  j!                  ej6                        z  z  Z ej2                  eg d      Zedej                  j!                  ej6                        z  z   Z e
ee      Z eejA                                 ed       ejC                  ddd        eejA                                yyy)zQ
Created on Sun Nov 14 08:21:41 2010

Author: josef-pktd
License: BSD (3-clause)
    N)pca)LeaveOneOutc                   2    e Zd ZdZd ZddZd Zd	dZd Zy)
FactorModelUnivariatea  

    Todo:
    check treatment of const, make it optional ?
        add hasconst (0 or 1), needed when selecting nfact+hasconst
    options are arguments in calc_factors, should be more public instead
    cross-validation is slow for large number of observations
    c                 l    t        j                  |      | _        t        j                  |      | _        y )N)npasarrayendogexog)selfr
   r   s      f/var/www/dash_apps/app1/venv/lib/python3.12/site-packages/statsmodels/sandbox/datarich/factormodels.py__init__zFactorModelUnivariate.__init__   s"    ZZ&
JJt$	    Nc                    || j                   }nt        j                  |      }t        ||d      \  }}}}|| _        |r$t        j                  |d      | _        d| _        n|| _        d| _        || _	        || _
        y)zget factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        N   )keepdim	normalizeT)prependr   )r   r   r	   r   exog_reducedsmadd_constantfactorshasconstevalsevecs)r   xr   addconstxredfactr   r   s           r   calc_factorsz"FactorModelUnivariate.calc_factors    sv     9		A

1A$'7a$H dE5 ??4>DLDMDLDM

r   c                     t        | d      s| j                          t        j                  | j                  | j
                  d d d |dz   f         j                         S )Nfactors_wconstr   )hasattrr    r   OLSr
   r   fit)r   nfacts     r   fit_fixed_nfactz%FactorModelUnivariate.fit_fixed_nfact7   sL    t-.vvdjj$,,q%'z":;??AAr   c                 <   t        | d      s| j                          | j                  }|| j                  j                  d   |z
  }||z   dk  rt        d      t        |d      }| j                  }g }t        d||z         D ]  }| j                  ddd|f   }t        j                  ||      j                         }	|s|t        t        |            }d}
|D ]l  \  }}t        j                  ||   ||ddf         j                         }|
||   |j                  j                  |j                   ||ddf         z
  dz  z  }
n nt"        j$                  }
|j'                  ||	j(                  |	j*                  |	j,                  |
g        t#        j.                  |      x| _        }t"        j2                  t#        j4                  |ddddf   d	      t#        j6                  |dddf   d	      t#        j4                  |ddd
f   d	      f   | _        y)aW  estimate the model and selection criteria for up to maxfact factors

        The selection criteria that are calculated are AIC, BIC, and R2_adj. and
        additionally cross-validation prediction error sum of squares if `skip_crossval`
        is false. Cross-validation is not used by default because it can be
        time consuming to calculate.

        By default the cross-validation method is Leave-one-out on the full dataset.
        A different cross-validation sample can be specified as an argument to
        cv_iter.

        Results are attached in `results_find_nfact`



        r   Nr   zFnothing to do, number of factors (incl. constant) should be at least 1
                  @   r   )r#   r    r   r   shape
ValueErrorminr
   ranger   r$   r%   r   lenmodelpredictparamsr   nanappendaicbicrsquared_adjarrayresults_find_nfactr_argminargmax
best_nfact)r   maxfactskip_crossvalcv_iterr   y0resultskr   resprederr2inidxoutidxres_l1os                 r   fit_find_nfactz$FactorModelUnivariate.fit_find_nfact<   s   $ tY'==?ll((+h6GH! - . . gr"ZZq'(*+ 	NA <<"1"%D&&T"&&(C !?)#b'2G%, \ME6 ffRYU1W>BBDGF!(!6!6w~~tFSTH~!V"WY[!\ \H\ 66NNAsww1A1A8LM3	N6 -/HHW,=='%%71QqS5>!!<biiPQRSPSUV>WYYwqt}Q/!1 2r   c                 z   t        | d      s| j                          | j                  }d}|dz  }|ddt        | j                        z  z   z  }ddlm} dj                  d	      }d
gdgdz  z   }t        |      } |||d|      }|dz  }|dz  }|d|j                         z   z  }|dz  }|dz  }|dz  }|dz  }|S )zprovides a summary for the selection of the number of factors

        Returns
        -------
        sumstr : str
            summary of the results for selecting the number of factors

        r<    z,
Best result for k, by AIC, BIC, R2_adj, L1Oz
                   z%5d %4d %6d %5dr   )SimpleTablezk, AIC, BIC, R2_adj, L1Oz, z%6dz%10.3f   )	data_fmtsN)txt_fmtz"
PCA regression on simulated data,z+
DGP: 2 factors and 4 explanatory variables
z)
Notes: k is number of components of PCA,z&
       constant is added additionallyz-
       k=0 means regression on constant onlyz?
       L1O: sum of squared prediction errors for leave-one-out)
r#   rL   r<   tupler@   statsmodels.iolib.tablerO   splitdict__str__)r   rE   sumstrrO   headers	numformattxt_fmt1tabls           r   summary_find_nfactz(FactorModelUnivariate.summary_find_nfact~   s     t12! ))FF 	-"3eDOO6L"LLL7,2248Gxjl*	I.7GT8D<<EE$''CC@@GGYYr   )Nr   T)NTN)	__name__
__module____qualname____doc__r   r    r'   rL   r^    r   r   r   r      s#    %.B
@2D#r   r   __main__r   i     )sizerP   r-   )      ?rg   r*   r*   )r*   r*   rg   rg   )      @r+   rg   r*   )皙?rh   rg   r*   )r*   r*   g      ?ri   ri   )rg   rg   rg   zwith cross validation - slowerF)rA   rB   rC   )"rb   numpyr   statsmodels.apiapir   statsmodels.sandbox.toolsr   #statsmodels.sandbox.tools.cross_valr   r   r_   examplesnobsc_randomnormalonesf0repeateyearangeTf2xcoefr;   dotx0r.   ytruerD   modprintr^   rL   rc   r   r   <module>r      s     ) ;P Pf zsHH}UU299##$q#2GBGGT!H4EEF%%			&"&&)Aa01dd1CCDFF"((0002 3 "((3324 5 RVVB 
c"))"""111r,'S))u{{);;;#B+c$$&'./4udKc$$&''  r   