add glm/gwr base code; start gwr crankshaft func
This commit is contained in:
parent
538ab9a071
commit
01c9195ea5
@ -0,0 +1,444 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Import GLM and pysal\n",
|
||||
"import os\n",
|
||||
"import numpy as np\n",
|
||||
"os.chdir('/Users/toshan/dev/pysal/pysal/contrib/glm')\n",
|
||||
"from glm import GLM\n",
|
||||
"import pysal\n",
|
||||
"import pandas as pd\n",
|
||||
"import statsmodels.formula.api as smf\n",
|
||||
"import statsmodels.api as sm\n",
|
||||
"from family import Gaussian, Binomial, Poisson, QuasiPoisson\n",
|
||||
"\n",
|
||||
"from statsmodels.api import families"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Prepare some test data - columbus example\n",
|
||||
"db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')\n",
|
||||
"y = np.array(db.by_col(\"HOVAL\"))\n",
|
||||
"y = np.reshape(y, (49,1))\n",
|
||||
"X = []\n",
|
||||
"#X.append(np.ones(len(y)))\n",
|
||||
"X.append(db.by_col(\"INC\"))\n",
|
||||
"X.append(db.by_col(\"CRIME\"))\n",
|
||||
"X = np.array(X).T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[ 46.42818268]\n",
|
||||
" [ 0.62898397]\n",
|
||||
" [ -0.48488854]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#First fit pysal OLS model\n",
|
||||
"from pysal.spreg import ols\n",
|
||||
"OLS = ols.OLS(y, X)\n",
|
||||
"print OLS.betas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"[ 46.42818268 0.62898397 -0.48488854]\n",
|
||||
"[ 46.42818268 0.62898397 -0.48488854]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#Then fit Gaussian GLM\n",
|
||||
"\n",
|
||||
"#create Gaussian GLM model object\n",
|
||||
"model = GLM(y, X, Gaussian())\n",
|
||||
"model\n",
|
||||
"\n",
|
||||
"#Fit model to estimate coefficients and return GLMResults object\n",
|
||||
"results = model.fit()\n",
|
||||
"\n",
|
||||
"#Check coefficients - R betas [46.4282, 0.6290, -0.4849]\n",
|
||||
"print results.params\n",
|
||||
"\n",
|
||||
"# Gaussian GLM results from statsmodels\n",
|
||||
"sm_model = smf.GLM(y, sm.add_constant(X), family=families.Gaussian())\n",
|
||||
"sm_results = sm_model.fit()\n",
|
||||
"print sm_results.params"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2 2\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print results.df_model, sm_results.df_model\n",
|
||||
"print np.allclose(results.aic, sm_results.aic)\n",
|
||||
"print np.allclose(results.bic, sm_results.bic)\n",
|
||||
"print np.allclose(results.deviance, sm_results.deviance)\n",
|
||||
"print np.allclose(results.df_model, sm_results.df_model)\n",
|
||||
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
|
||||
"print np.allclose(results.llf, sm_results.llf)\n",
|
||||
"print np.allclose(results.mu, sm_results.mu)\n",
|
||||
"print np.allclose(results.n, sm_results.nobs)\n",
|
||||
"print np.allclose(results.null, sm_results.null)\n",
|
||||
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
|
||||
"print np.allclose(results.params, sm_results.params)\n",
|
||||
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
|
||||
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
|
||||
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
|
||||
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
|
||||
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
|
||||
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
|
||||
"print np.allclose(results.scale, sm_results.scale)\n",
|
||||
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
|
||||
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
|
||||
"print np.allclose(results.bse, sm_results.bse)\n",
|
||||
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
|
||||
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
|
||||
"print np.allclose(results.tvalues, sm_results.tvalues)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'family.Poisson'>\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"[ 3.92159085 0.01183491 -0.01371397]\n",
|
||||
"[ 3.92159085 0.01183491 -0.01371397]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#Now fit a Poisson GLM \n",
|
||||
"\n",
|
||||
"poisson_y = np.round(y).astype(int)\n",
|
||||
"\n",
|
||||
"#create Poisson GLM model object\n",
|
||||
"model = GLM(poisson_y, X, Poisson())\n",
|
||||
"model\n",
|
||||
"\n",
|
||||
"#Fit model to estimate coefficients and return GLMResults object\n",
|
||||
"results = model.fit()\n",
|
||||
"\n",
|
||||
"#Check coefficients - R betas [3.91926, 0.01198, -0.01371]\n",
|
||||
"print results.params.T\n",
|
||||
"\n",
|
||||
"# Poisson GLM results from statsmodels\n",
|
||||
"sm_results = smf.GLM(poisson_y, sm.add_constant(X), family=families.Poisson()).fit()\n",
|
||||
"print sm_results.params"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'family.Poisson'>\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"[ 0.13049161 0.00511599 0.00193769] [ 0.13049161 0.00511599 0.00193769]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print np.allclose(results.aic, sm_results.aic)\n",
|
||||
"print np.allclose(results.bic, sm_results.bic)\n",
|
||||
"print np.allclose(results.deviance, sm_results.deviance)\n",
|
||||
"print np.allclose(results.df_model, sm_results.df_model)\n",
|
||||
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
|
||||
"print np.allclose(results.llf, sm_results.llf)\n",
|
||||
"print np.allclose(results.mu, sm_results.mu)\n",
|
||||
"print np.allclose(results.n, sm_results.nobs)\n",
|
||||
"print np.allclose(results.null, sm_results.null)\n",
|
||||
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
|
||||
"print np.allclose(results.params, sm_results.params)\n",
|
||||
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
|
||||
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
|
||||
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
|
||||
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
|
||||
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
|
||||
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
|
||||
"print np.allclose(results.scale, sm_results.scale)\n",
|
||||
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
|
||||
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
|
||||
"print np.allclose(results.bse, sm_results.bse)\n",
|
||||
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
|
||||
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
|
||||
"print np.allclose(results.tvalues, sm_results.tvalues)\n",
|
||||
"print results.bse, sm_results.bse"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-5.33638276 0.0287754 ]\n",
|
||||
"[-5.33638276 0.0287754 ]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#Now fit a binomial GLM\n",
|
||||
"londonhp = pd.read_csv('/Users/toshan/projects/londonhp.csv')\n",
|
||||
"#londonhp = pd.read_csv('/Users/qszhao/Dropbox/pysal/pysal/contrib/gwr/londonhp.csv')\n",
|
||||
"y = londonhp['BATH2'].values\n",
|
||||
"y = np.reshape(y, (316,1))\n",
|
||||
"X = londonhp['FLOORSZ'].values\n",
|
||||
"X = np.reshape(X, (316,1))\n",
|
||||
"\n",
|
||||
"#create logistic GLM model object\n",
|
||||
"model = GLM(y, X, Binomial())\n",
|
||||
"model\n",
|
||||
"\n",
|
||||
"#Fit model to estimate coefficients and return GLMResults object\n",
|
||||
"results = model.fit()\n",
|
||||
"\n",
|
||||
"#Check coefficients - R betas [-5.33638, 0.02878]\n",
|
||||
"print results.params.T\n",
|
||||
"\n",
|
||||
"# Logistic GLM results from statsmodels\n",
|
||||
"sm_results = smf.GLM(y, sm.add_constant(X), family=families.Binomial()).fit()\n",
|
||||
"print sm_results.params"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1 1\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print results.df_model, sm_results.df_model\n",
|
||||
"print np.allclose(results.aic, sm_results.aic)\n",
|
||||
"print np.allclose(results.bic, sm_results.bic)\n",
|
||||
"print np.allclose(results.deviance, sm_results.deviance)\n",
|
||||
"print np.allclose(results.df_model, sm_results.df_model)\n",
|
||||
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
|
||||
"print np.allclose(results.llf, sm_results.llf)\n",
|
||||
"print np.allclose(results.mu, sm_results.mu)\n",
|
||||
"print np.allclose(results.n, sm_results.nobs)\n",
|
||||
"print np.allclose(results.null, sm_results.null)\n",
|
||||
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
|
||||
"print np.allclose(results.params, sm_results.params)\n",
|
||||
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
|
||||
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
|
||||
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
|
||||
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
|
||||
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
|
||||
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
|
||||
"print np.allclose(results.scale, sm_results.scale)\n",
|
||||
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
|
||||
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
|
||||
"print np.allclose(results.bse, sm_results.bse)\n",
|
||||
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
|
||||
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
|
||||
"print np.allclose(results.tvalues, sm_results.tvalues)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'family.QuasiPoisson'>\n",
|
||||
"<class 'family.QuasiPoisson'>\n",
|
||||
"<class 'family.QuasiPoisson'>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#create QUasiPoisson GLM model object\n",
|
||||
"model = GLM(poisson_y, X, QuasiPoisson())\n",
|
||||
"model\n",
|
||||
"\n",
|
||||
"#Fit model to estimate coefficients and return GLMResults object\n",
|
||||
"results = model.fit()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 2",
|
||||
"language": "python",
|
||||
"name": "python2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
4
src/py/crankshaft/crankshaft/regression/glm/__init__.py
Normal file
4
src/py/crankshaft/crankshaft/regression/glm/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
import glm
|
||||
import family
|
||||
import utils
|
||||
import iwls
|
959
src/py/crankshaft/crankshaft/regression/glm/base.py
Normal file
959
src/py/crankshaft/crankshaft/regression/glm/base.py
Normal file
@ -0,0 +1,959 @@
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from utils import cache_readonly
|
||||
|
||||
class Results(object):
|
||||
"""
|
||||
Class to contain model results
|
||||
Parameters
|
||||
----------
|
||||
model : class instance
|
||||
the previously specified model instance
|
||||
params : array
|
||||
parameter estimates from the fit model
|
||||
"""
|
||||
def __init__(self, model, params, **kwd):
|
||||
self.__dict__.update(kwd)
|
||||
self.initialize(model, params, **kwd)
|
||||
self._data_attr = []
|
||||
|
||||
def initialize(self, model, params, **kwd):
|
||||
self.params = params
|
||||
self.model = model
|
||||
if hasattr(model, 'k_constant'):
|
||||
self.k_constant = model.k_constant
|
||||
|
||||
def predict(self, exog=None, transform=True, *args, **kwargs):
|
||||
"""
|
||||
Call self.model.predict with self.params as the first argument.
|
||||
Parameters
|
||||
----------
|
||||
exog : array-like, optional
|
||||
The values for which you want to predict.
|
||||
transform : bool, optional
|
||||
If the model was fit via a formula, do you want to pass
|
||||
exog through the formula. Default is True. E.g., if you fit
|
||||
a model y ~ log(x1) + log(x2), and transform is True, then
|
||||
you can pass a data structure that contains x1 and x2 in
|
||||
their original form. Otherwise, you'd need to log the data
|
||||
first.
|
||||
args, kwargs :
|
||||
Some models can take additional arguments or keywords, see the
|
||||
predict method of the model for the details.
|
||||
Returns
|
||||
-------
|
||||
prediction : ndarray or pandas.Series
|
||||
See self.model.predict
|
||||
"""
|
||||
if transform and hasattr(self.model, 'formula') and exog is not None:
|
||||
from patsy import dmatrix
|
||||
exog = dmatrix(self.model.data.design_info.builder,
|
||||
exog)
|
||||
|
||||
if exog is not None:
|
||||
exog = np.asarray(exog)
|
||||
if exog.ndim == 1 and (self.model.exog.ndim == 1 or
|
||||
self.model.exog.shape[1] == 1):
|
||||
exog = exog[:, None]
|
||||
exog = np.atleast_2d(exog) # needed in count model shape[1]
|
||||
|
||||
return self.model.predict(self.params, exog, *args, **kwargs)
|
||||
|
||||
|
||||
#TODO: public method?
|
||||
class LikelihoodModelResults(Results):
|
||||
"""
|
||||
Class to contain results from likelihood models
|
||||
Parameters
|
||||
-----------
|
||||
model : LikelihoodModel instance or subclass instance
|
||||
LikelihoodModelResults holds a reference to the model that is fit.
|
||||
params : 1d array_like
|
||||
parameter estimates from estimated model
|
||||
normalized_cov_params : 2d array
|
||||
Normalized (before scaling) covariance of params. (dot(X.T,X))**-1
|
||||
scale : float
|
||||
For (some subset of models) scale will typically be the
|
||||
mean square error from the estimated model (sigma^2)
|
||||
Returns
|
||||
-------
|
||||
**Attributes**
|
||||
mle_retvals : dict
|
||||
Contains the values returned from the chosen optimization method if
|
||||
full_output is True during the fit. Available only if the model
|
||||
is fit by maximum likelihood. See notes below for the output from
|
||||
the different methods.
|
||||
mle_settings : dict
|
||||
Contains the arguments passed to the chosen optimization method.
|
||||
Available if the model is fit by maximum likelihood. See
|
||||
LikelihoodModel.fit for more information.
|
||||
model : model instance
|
||||
LikelihoodResults contains a reference to the model that is fit.
|
||||
params : ndarray
|
||||
The parameters estimated for the model.
|
||||
scale : float
|
||||
The scaling factor of the model given during instantiation.
|
||||
tvalues : array
|
||||
The t-values of the standard errors.
|
||||
Notes
|
||||
-----
|
||||
The covariance of params is given by scale times normalized_cov_params.
|
||||
Return values by solver if full_output is True during fit:
|
||||
'newton'
|
||||
fopt : float
|
||||
The value of the (negative) loglikelihood at its
|
||||
minimum.
|
||||
iterations : int
|
||||
Number of iterations performed.
|
||||
score : ndarray
|
||||
The score vector at the optimum.
|
||||
Hessian : ndarray
|
||||
The Hessian at the optimum.
|
||||
warnflag : int
|
||||
1 if maxiter is exceeded. 0 if successful convergence.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
List of solutions at each iteration.
|
||||
'nm'
|
||||
fopt : float
|
||||
The value of the (negative) loglikelihood at its
|
||||
minimum.
|
||||
iterations : int
|
||||
Number of iterations performed.
|
||||
warnflag : int
|
||||
1: Maximum number of function evaluations made.
|
||||
2: Maximum number of iterations reached.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
List of solutions at each iteration.
|
||||
'bfgs'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
gopt : float
|
||||
Value of gradient at minimum, which should be near 0.
|
||||
Hinv : ndarray
|
||||
value of the inverse Hessian matrix at minimum. Note
|
||||
that this is just an approximation and will often be
|
||||
different from the value of the analytic Hessian.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
gcalls : int
|
||||
Number of calls to gradient/score.
|
||||
warnflag : int
|
||||
1: Maximum number of iterations exceeded. 2: Gradient
|
||||
and/or function calls are not changing.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
Results at each iteration.
|
||||
'lbfgs'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
gopt : float
|
||||
Value of gradient at minimum, which should be near 0.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
warnflag : int
|
||||
Warning flag:
|
||||
- 0 if converged
|
||||
- 1 if too many function evaluations or too many iterations
|
||||
- 2 if stopped for another reason
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
'powell'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
direc : ndarray
|
||||
Current direction set.
|
||||
iterations : int
|
||||
Number of iterations performed.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
warnflag : int
|
||||
1: Maximum number of function evaluations. 2: Maximum number
|
||||
of iterations.
|
||||
converged : bool
|
||||
True : converged. False: did not converge.
|
||||
allvecs : list
|
||||
Results at each iteration.
|
||||
'cg'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
gcalls : int
|
||||
Number of calls to gradient/score.
|
||||
warnflag : int
|
||||
1: Maximum number of iterations exceeded. 2: Gradient and/
|
||||
or function calls not changing.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
Results at each iteration.
|
||||
'ncg'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
gcalls : int
|
||||
Number of calls to gradient/score.
|
||||
hcalls : int
|
||||
Number of calls to hessian.
|
||||
warnflag : int
|
||||
1: Maximum number of iterations exceeded.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
Results at each iteration.
|
||||
"""
|
||||
|
||||
# by default we use normal distribution
|
||||
# can be overwritten by instances or subclasses
|
||||
use_t = False
|
||||
|
||||
def __init__(self, model, params, normalized_cov_params=None, scale=1.,
|
||||
**kwargs):
|
||||
super(LikelihoodModelResults, self).__init__(model, params)
|
||||
self.normalized_cov_params = normalized_cov_params
|
||||
self.scale = scale
|
||||
|
||||
# robust covariance
|
||||
# We put cov_type in kwargs so subclasses can decide in fit whether to
|
||||
# use this generic implementation
|
||||
if 'use_t' in kwargs:
|
||||
use_t = kwargs['use_t']
|
||||
if use_t is not None:
|
||||
self.use_t = use_t
|
||||
if 'cov_type' in kwargs:
|
||||
cov_type = kwargs.get('cov_type', 'nonrobust')
|
||||
cov_kwds = kwargs.get('cov_kwds', {})
|
||||
|
||||
if cov_type == 'nonrobust':
|
||||
self.cov_type = 'nonrobust'
|
||||
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
|
||||
'covariance matrix of the errors is correctly ' +
|
||||
'specified.'}
|
||||
else:
|
||||
from statsmodels.base.covtype import get_robustcov_results
|
||||
if cov_kwds is None:
|
||||
cov_kwds = {}
|
||||
use_t = self.use_t
|
||||
# TODO: we shouldn't need use_t in get_robustcov_results
|
||||
get_robustcov_results(self, cov_type=cov_type, use_self=True,
|
||||
use_t=use_t, **cov_kwds)
|
||||
|
||||
|
||||
def normalized_cov_params(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def _get_robustcov_results(self, cov_type='nonrobust', use_self=True,
|
||||
use_t=None, **cov_kwds):
|
||||
from statsmodels.base.covtype import get_robustcov_results
|
||||
if cov_kwds is None:
|
||||
cov_kwds = {}
|
||||
|
||||
if cov_type == 'nonrobust':
|
||||
self.cov_type = 'nonrobust'
|
||||
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
|
||||
'covariance matrix of the errors is correctly ' +
|
||||
'specified.'}
|
||||
else:
|
||||
# TODO: we shouldn't need use_t in get_robustcov_results
|
||||
get_robustcov_results(self, cov_type=cov_type, use_self=True,
|
||||
use_t=use_t, **cov_kwds)
|
||||
|
||||
@cache_readonly
|
||||
def llf(self):
|
||||
return self.model.loglike(self.params)
|
||||
|
||||
@cache_readonly
|
||||
def bse(self):
|
||||
return np.sqrt(np.diag(self.cov_params()))
|
||||
|
||||
@cache_readonly
|
||||
def tvalues(self):
|
||||
"""
|
||||
Return the t-statistic for a given parameter estimate.
|
||||
"""
|
||||
return self.params / self.bse
|
||||
|
||||
@cache_readonly
|
||||
def pvalues(self):
|
||||
if self.use_t:
|
||||
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
|
||||
return stats.t.sf(np.abs(self.tvalues), df_resid)*2
|
||||
else:
|
||||
return stats.norm.sf(np.abs(self.tvalues))*2
|
||||
|
||||
|
||||
def cov_params(self, r_matrix=None, column=None, scale=None, cov_p=None,
|
||||
other=None):
|
||||
"""
|
||||
Returns the variance/covariance matrix.
|
||||
The variance/covariance matrix can be of a linear contrast
|
||||
of the estimates of params or all params multiplied by scale which
|
||||
will usually be an estimate of sigma^2. Scale is assumed to be
|
||||
a scalar.
|
||||
Parameters
|
||||
----------
|
||||
r_matrix : array-like
|
||||
Can be 1d, or 2d. Can be used alone or with other.
|
||||
column : array-like, optional
|
||||
Must be used on its own. Can be 0d or 1d see below.
|
||||
scale : float, optional
|
||||
Can be specified or not. Default is None, which means that
|
||||
the scale argument is taken from the model.
|
||||
other : array-like, optional
|
||||
Can be used when r_matrix is specified.
|
||||
Returns
|
||||
-------
|
||||
cov : ndarray
|
||||
covariance matrix of the parameter estimates or of linear
|
||||
combination of parameter estimates. See Notes.
|
||||
Notes
|
||||
-----
|
||||
(The below are assumed to be in matrix notation.)
|
||||
If no argument is specified returns the covariance matrix of a model
|
||||
``(scale)*(X.T X)^(-1)``
|
||||
If contrast is specified it pre and post-multiplies as follows
|
||||
``(scale) * r_matrix (X.T X)^(-1) r_matrix.T``
|
||||
If contrast and other are specified returns
|
||||
``(scale) * r_matrix (X.T X)^(-1) other.T``
|
||||
If column is specified returns
|
||||
``(scale) * (X.T X)^(-1)[column,column]`` if column is 0d
|
||||
OR
|
||||
``(scale) * (X.T X)^(-1)[column][:,column]`` if column is 1d
|
||||
"""
|
||||
if (hasattr(self, 'mle_settings') and
|
||||
self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
|
||||
dot_fun = nan_dot
|
||||
else:
|
||||
dot_fun = np.dot
|
||||
|
||||
if (cov_p is None and self.normalized_cov_params is None and
|
||||
not hasattr(self, 'cov_params_default')):
|
||||
raise ValueError('need covariance of parameters for computing '
|
||||
'(unnormalized) covariances')
|
||||
if column is not None and (r_matrix is not None or other is not None):
|
||||
raise ValueError('Column should be specified without other '
|
||||
'arguments.')
|
||||
if other is not None and r_matrix is None:
|
||||
raise ValueError('other can only be specified with r_matrix')
|
||||
|
||||
if cov_p is None:
|
||||
if hasattr(self, 'cov_params_default'):
|
||||
cov_p = self.cov_params_default
|
||||
else:
|
||||
if scale is None:
|
||||
scale = self.scale
|
||||
cov_p = self.normalized_cov_params * scale
|
||||
|
||||
if column is not None:
|
||||
column = np.asarray(column)
|
||||
if column.shape == ():
|
||||
return cov_p[column, column]
|
||||
else:
|
||||
#return cov_p[column][:, column]
|
||||
return cov_p[column[:, None], column]
|
||||
elif r_matrix is not None:
|
||||
r_matrix = np.asarray(r_matrix)
|
||||
if r_matrix.shape == ():
|
||||
raise ValueError("r_matrix should be 1d or 2d")
|
||||
if other is None:
|
||||
other = r_matrix
|
||||
else:
|
||||
other = np.asarray(other)
|
||||
tmp = dot_fun(r_matrix, dot_fun(cov_p, np.transpose(other)))
|
||||
return tmp
|
||||
else: # if r_matrix is None and column is None:
|
||||
return cov_p
|
||||
|
||||
#TODO: make sure this works as needed for GLMs
|
||||
def t_test(self, r_matrix, cov_p=None, scale=None,
|
||||
use_t=None):
|
||||
"""
|
||||
Compute a t-test for a each linear hypothesis of the form Rb = q
|
||||
Parameters
|
||||
----------
|
||||
r_matrix : array-like, str, tuple
|
||||
- array : If an array is given, a p x k 2d array or length k 1d
|
||||
array specifying the linear restrictions. It is assumed
|
||||
that the linear combination is equal to zero.
|
||||
- str : The full hypotheses to test can be given as a string.
|
||||
See the examples.
|
||||
- tuple : A tuple of arrays in the form (R, q). If q is given,
|
||||
can be either a scalar or a length p row vector.
|
||||
cov_p : array-like, optional
|
||||
An alternative estimate for the parameter covariance matrix.
|
||||
If None is given, self.normalized_cov_params is used.
|
||||
scale : float, optional
|
||||
An optional `scale` to use. Default is the scale specified
|
||||
by the model fit.
|
||||
use_t : bool, optional
|
||||
If use_t is None, then the default of the model is used.
|
||||
If use_t is True, then the p-values are based on the t
|
||||
distribution.
|
||||
If use_t is False, then the p-values are based on the normal
|
||||
distribution.
|
||||
Returns
|
||||
-------
|
||||
res : ContrastResults instance
|
||||
The results for the test are attributes of this results instance.
|
||||
The available results have the same elements as the parameter table
|
||||
in `summary()`.
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import statsmodels.api as sm
|
||||
>>> data = sm.datasets.longley.load()
|
||||
>>> data.exog = sm.add_constant(data.exog)
|
||||
>>> results = sm.OLS(data.endog, data.exog).fit()
|
||||
>>> r = np.zeros_like(results.params)
|
||||
>>> r[5:] = [1,-1]
|
||||
>>> print(r)
|
||||
[ 0. 0. 0. 0. 0. 1. -1.]
|
||||
r tests that the coefficients on the 5th and 6th independent
|
||||
variable are the same.
|
||||
>>> T_test = results.t_test(r)
|
||||
>>> print(T_test)
|
||||
<T contrast: effect=-1829.2025687192481, sd=455.39079425193762,
|
||||
t=-4.0167754636411717, p=0.0015163772380899498, df_denom=9>
|
||||
>>> T_test.effect
|
||||
-1829.2025687192481
|
||||
>>> T_test.sd
|
||||
455.39079425193762
|
||||
>>> T_test.tvalue
|
||||
-4.0167754636411717
|
||||
>>> T_test.pvalue
|
||||
0.0015163772380899498
|
||||
Alternatively, you can specify the hypothesis tests using a string
|
||||
>>> from statsmodels.formula.api import ols
|
||||
>>> dta = sm.datasets.longley.load_pandas().data
|
||||
>>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
|
||||
>>> results = ols(formula, dta).fit()
|
||||
>>> hypotheses = 'GNPDEFL = GNP, UNEMP = 2, YEAR/1829 = 1'
|
||||
>>> t_test = results.t_test(hypotheses)
|
||||
>>> print(t_test)
|
||||
See Also
|
||||
---------
|
||||
tvalues : individual t statistics
|
||||
f_test : for F tests
|
||||
patsy.DesignInfo.linear_constraint
|
||||
"""
|
||||
from patsy import DesignInfo
|
||||
names = self.model.data.param_names
|
||||
LC = DesignInfo(names).linear_constraint(r_matrix)
|
||||
r_matrix, q_matrix = LC.coefs, LC.constants
|
||||
num_ttests = r_matrix.shape[0]
|
||||
num_params = r_matrix.shape[1]
|
||||
|
||||
if (cov_p is None and self.normalized_cov_params is None and
|
||||
not hasattr(self, 'cov_params_default')):
|
||||
raise ValueError('Need covariance of parameters for computing '
|
||||
'T statistics')
|
||||
if num_params != self.params.shape[0]:
|
||||
raise ValueError('r_matrix and params are not aligned')
|
||||
if q_matrix is None:
|
||||
q_matrix = np.zeros(num_ttests)
|
||||
else:
|
||||
q_matrix = np.asarray(q_matrix)
|
||||
q_matrix = q_matrix.squeeze()
|
||||
if q_matrix.size > 1:
|
||||
if q_matrix.shape[0] != num_ttests:
|
||||
raise ValueError("r_matrix and q_matrix must have the same "
|
||||
"number of rows")
|
||||
|
||||
if use_t is None:
|
||||
#switch to use_t false if undefined
|
||||
use_t = (hasattr(self, 'use_t') and self.use_t)
|
||||
|
||||
_t = _sd = None
|
||||
|
||||
_effect = np.dot(r_matrix, self.params)
|
||||
# nan_dot multiplies with the convention nan * 0 = 0
|
||||
|
||||
# Perform the test
|
||||
if num_ttests > 1:
|
||||
_sd = np.sqrt(np.diag(self.cov_params(
|
||||
r_matrix=r_matrix, cov_p=cov_p)))
|
||||
else:
|
||||
_sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))
|
||||
_t = (_effect - q_matrix) * recipr(_sd)
|
||||
|
||||
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
|
||||
|
||||
if use_t:
|
||||
return ContrastResults(effect=_effect, t=_t, sd=_sd,
|
||||
df_denom=df_resid)
|
||||
else:
|
||||
return ContrastResults(effect=_effect, statistic=_t, sd=_sd,
|
||||
df_denom=df_resid,
|
||||
distribution='norm')
|
||||
|
||||
def f_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None):
|
||||
"""
|
||||
Compute the F-test for a joint linear hypothesis.
|
||||
This is a special case of `wald_test` that always uses the F
|
||||
distribution.
|
||||
Parameters
|
||||
----------
|
||||
r_matrix : array-like, str, or tuple
|
||||
- array : An r x k array where r is the number of restrictions to
|
||||
test and k is the number of regressors. It is assumed
|
||||
that the linear combination is equal to zero.
|
||||
- str : The full hypotheses to test can be given as a string.
|
||||
See the examples.
|
||||
- tuple : A tuple of arrays in the form (R, q), ``q`` can be
|
||||
either a scalar or a length k row vector.
|
||||
cov_p : array-like, optional
|
||||
An alternative estimate for the parameter covariance matrix.
|
||||
If None is given, self.normalized_cov_params is used.
|
||||
scale : float, optional
|
||||
Default is 1.0 for no scaling.
|
||||
invcov : array-like, optional
|
||||
A q x q array to specify an inverse covariance matrix based on a
|
||||
restrictions matrix.
|
||||
Returns
|
||||
-------
|
||||
res : ContrastResults instance
|
||||
The results for the test are attributes of this results instance.
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import statsmodels.api as sm
|
||||
>>> data = sm.datasets.longley.load()
|
||||
>>> data.exog = sm.add_constant(data.exog)
|
||||
>>> results = sm.OLS(data.endog, data.exog).fit()
|
||||
>>> A = np.identity(len(results.params))
|
||||
>>> A = A[1:,:]
|
||||
This tests that each coefficient is jointly statistically
|
||||
significantly different from zero.
|
||||
>>> print(results.f_test(A))
|
||||
<F contrast: F=330.28533923463488, p=4.98403052872e-10,
|
||||
df_denom=9, df_num=6>
|
||||
Compare this to
|
||||
>>> results.fvalue
|
||||
330.2853392346658
|
||||
>>> results.f_pvalue
|
||||
4.98403096572e-10
|
||||
>>> B = np.array(([0,0,1,-1,0,0,0],[0,0,0,0,0,1,-1]))
|
||||
This tests that the coefficient on the 2nd and 3rd regressors are
|
||||
equal and jointly that the coefficient on the 5th and 6th regressors
|
||||
are equal.
|
||||
>>> print(results.f_test(B))
|
||||
<F contrast: F=9.740461873303655, p=0.00560528853174, df_denom=9,
|
||||
df_num=2>
|
||||
Alternatively, you can specify the hypothesis tests using a string
|
||||
>>> from statsmodels.datasets import longley
|
||||
>>> from statsmodels.formula.api import ols
|
||||
>>> dta = longley.load_pandas().data
|
||||
>>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
|
||||
>>> results = ols(formula, dta).fit()
|
||||
>>> hypotheses = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
|
||||
>>> f_test = results.f_test(hypotheses)
|
||||
>>> print(f_test)
|
||||
See Also
|
||||
--------
|
||||
statsmodels.stats.contrast.ContrastResults
|
||||
wald_test
|
||||
t_test
|
||||
patsy.DesignInfo.linear_constraint
|
||||
Notes
|
||||
-----
|
||||
The matrix `r_matrix` is assumed to be non-singular. More precisely,
|
||||
r_matrix (pX pX.T) r_matrix.T
|
||||
is assumed invertible. Here, pX is the generalized inverse of the
|
||||
design matrix of the model. There can be problems in non-OLS models
|
||||
where the rank of the covariance of the noise is not full.
|
||||
"""
|
||||
res = self.wald_test(r_matrix, cov_p=cov_p, scale=scale,
|
||||
invcov=invcov, use_f=True)
|
||||
return res
|
||||
|
||||
#TODO: untested for GLMs?
|
||||
def wald_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None,
|
||||
use_f=None):
|
||||
"""
|
||||
Compute a Wald-test for a joint linear hypothesis.
|
||||
Parameters
|
||||
----------
|
||||
r_matrix : array-like, str, or tuple
|
||||
- array : An r x k array where r is the number of restrictions to
|
||||
test and k is the number of regressors. It is assumed that the
|
||||
linear combination is equal to zero.
|
||||
- str : The full hypotheses to test can be given as a string.
|
||||
See the examples.
|
||||
- tuple : A tuple of arrays in the form (R, q), ``q`` can be
|
||||
either a scalar or a length p row vector.
|
||||
cov_p : array-like, optional
|
||||
An alternative estimate for the parameter covariance matrix.
|
||||
If None is given, self.normalized_cov_params is used.
|
||||
scale : float, optional
|
||||
Default is 1.0 for no scaling.
|
||||
invcov : array-like, optional
|
||||
A q x q array to specify an inverse covariance matrix based on a
|
||||
restrictions matrix.
|
||||
use_f : bool
|
||||
If True, then the F-distribution is used. If False, then the
|
||||
asymptotic distribution, chisquare is used. If use_f is None, then
|
||||
the F distribution is used if the model specifies that use_t is True.
|
||||
The test statistic is proportionally adjusted for the distribution
|
||||
by the number of constraints in the hypothesis.
|
||||
Returns
|
||||
-------
|
||||
res : ContrastResults instance
|
||||
The results for the test are attributes of this results instance.
|
||||
See also
|
||||
--------
|
||||
statsmodels.stats.contrast.ContrastResults
|
||||
f_test
|
||||
t_test
|
||||
patsy.DesignInfo.linear_constraint
|
||||
Notes
|
||||
-----
|
||||
The matrix `r_matrix` is assumed to be non-singular. More precisely,
|
||||
r_matrix (pX pX.T) r_matrix.T
|
||||
is assumed invertible. Here, pX is the generalized inverse of the
|
||||
design matrix of the model. There can be problems in non-OLS models
|
||||
where the rank of the covariance of the noise is not full.
|
||||
"""
|
||||
if use_f is None:
|
||||
#switch to use_t false if undefined
|
||||
use_f = (hasattr(self, 'use_t') and self.use_t)
|
||||
|
||||
from patsy import DesignInfo
|
||||
names = self.model.data.param_names
|
||||
LC = DesignInfo(names).linear_constraint(r_matrix)
|
||||
r_matrix, q_matrix = LC.coefs, LC.constants
|
||||
|
||||
if (self.normalized_cov_params is None and cov_p is None and
|
||||
invcov is None and not hasattr(self, 'cov_params_default')):
|
||||
raise ValueError('need covariance of parameters for computing '
|
||||
'F statistics')
|
||||
|
||||
cparams = np.dot(r_matrix, self.params[:, None])
|
||||
J = float(r_matrix.shape[0]) # number of restrictions
|
||||
if q_matrix is None:
|
||||
q_matrix = np.zeros(J)
|
||||
else:
|
||||
q_matrix = np.asarray(q_matrix)
|
||||
if q_matrix.ndim == 1:
|
||||
q_matrix = q_matrix[:, None]
|
||||
if q_matrix.shape[0] != J:
|
||||
raise ValueError("r_matrix and q_matrix must have the same "
|
||||
"number of rows")
|
||||
Rbq = cparams - q_matrix
|
||||
if invcov is None:
|
||||
cov_p = self.cov_params(r_matrix=r_matrix, cov_p=cov_p)
|
||||
if np.isnan(cov_p).max():
|
||||
raise ValueError("r_matrix performs f_test for using "
|
||||
"dimensions that are asymptotically "
|
||||
"non-normal")
|
||||
invcov = np.linalg.inv(cov_p)
|
||||
|
||||
if (hasattr(self, 'mle_settings') and
|
||||
self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
|
||||
F = nan_dot(nan_dot(Rbq.T, invcov), Rbq)
|
||||
else:
|
||||
F = np.dot(np.dot(Rbq.T, invcov), Rbq)
|
||||
|
||||
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
|
||||
if use_f:
|
||||
F /= J
|
||||
return ContrastResults(F=F, df_denom=df_resid,
|
||||
df_num=invcov.shape[0])
|
||||
else:
|
||||
return ContrastResults(chi2=F, df_denom=J, statistic=F,
|
||||
distribution='chi2', distargs=(J,))
|
||||
|
||||
|
||||
def wald_test_terms(self, skip_single=False, extra_constraints=None,
|
||||
combine_terms=None):
|
||||
"""
|
||||
Compute a sequence of Wald tests for terms over multiple columns
|
||||
This computes joined Wald tests for the hypothesis that all
|
||||
coefficients corresponding to a `term` are zero.
|
||||
`Terms` are defined by the underlying formula or by string matching.
|
||||
Parameters
|
||||
----------
|
||||
skip_single : boolean
|
||||
If true, then terms that consist only of a single column and,
|
||||
therefore, refers only to a single parameter is skipped.
|
||||
If false, then all terms are included.
|
||||
extra_constraints : ndarray
|
||||
not tested yet
|
||||
combine_terms : None or list of strings
|
||||
Each string in this list is matched to the name of the terms or
|
||||
the name of the exogenous variables. All columns whose name
|
||||
includes that string are combined in one joint test.
|
||||
Returns
|
||||
-------
|
||||
test_result : result instance
|
||||
The result instance contains `table` which is a pandas DataFrame
|
||||
with the test results: test statistic, degrees of freedom and
|
||||
pvalues.
|
||||
Examples
|
||||
--------
|
||||
>>> res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
|
||||
data).fit()
|
||||
>>> res_ols.wald_test_terms()
|
||||
<class 'statsmodels.stats.contrast.WaldTestResults'>
|
||||
F P>F df constraint df denom
|
||||
Intercept 279.754525 2.37985521351e-22 1 51
|
||||
C(Duration, Sum) 5.367071 0.0245738436636 1 51
|
||||
C(Weight, Sum) 12.432445 3.99943118767e-05 2 51
|
||||
C(Duration, Sum):C(Weight, Sum) 0.176002 0.83912310946 2 51
|
||||
>>> res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)",
|
||||
data).fit(cov_type='HC0')
|
||||
>>> wt = res_poi.wald_test_terms(skip_single=False,
|
||||
combine_terms=['Duration', 'Weight'])
|
||||
>>> print(wt)
|
||||
chi2 P>chi2 df constraint
|
||||
Intercept 15.695625 7.43960374424e-05 1
|
||||
C(Weight) 16.132616 0.000313940174705 2
|
||||
C(Duration) 1.009147 0.315107378931 1
|
||||
C(Weight):C(Duration) 0.216694 0.897315972824 2
|
||||
Duration 11.187849 0.010752286833 3
|
||||
Weight 30.263368 4.32586407145e-06 4
|
||||
"""
|
||||
# lazy import
|
||||
from collections import defaultdict
|
||||
|
||||
result = self
|
||||
if extra_constraints is None:
|
||||
extra_constraints = []
|
||||
if combine_terms is None:
|
||||
combine_terms = []
|
||||
design_info = getattr(result.model.data.orig_exog, 'design_info', None)
|
||||
|
||||
if design_info is None and extra_constraints is None:
|
||||
raise ValueError('no constraints, nothing to do')
|
||||
|
||||
|
||||
identity = np.eye(len(result.params))
|
||||
constraints = []
|
||||
combined = defaultdict(list)
|
||||
if design_info is not None:
|
||||
for term in design_info.terms:
|
||||
cols = design_info.slice(term)
|
||||
name = term.name()
|
||||
constraint_matrix = identity[cols]
|
||||
|
||||
# check if in combined
|
||||
for cname in combine_terms:
|
||||
if cname in name:
|
||||
combined[cname].append(constraint_matrix)
|
||||
|
||||
k_constraint = constraint_matrix.shape[0]
|
||||
if skip_single:
|
||||
if k_constraint == 1:
|
||||
continue
|
||||
|
||||
constraints.append((name, constraint_matrix))
|
||||
|
||||
combined_constraints = []
|
||||
for cname in combine_terms:
|
||||
combined_constraints.append((cname, np.vstack(combined[cname])))
|
||||
else:
|
||||
# check by exog/params names if there is no formula info
|
||||
for col, name in enumerate(result.model.exog_names):
|
||||
constraint_matrix = identity[col]
|
||||
|
||||
# check if in combined
|
||||
for cname in combine_terms:
|
||||
if cname in name:
|
||||
combined[cname].append(constraint_matrix)
|
||||
|
||||
if skip_single:
|
||||
continue
|
||||
|
||||
constraints.append((name, constraint_matrix))
|
||||
|
||||
combined_constraints = []
|
||||
for cname in combine_terms:
|
||||
combined_constraints.append((cname, np.vstack(combined[cname])))
|
||||
|
||||
use_t = result.use_t
|
||||
distribution = ['chi2', 'F'][use_t]
|
||||
|
||||
res_wald = []
|
||||
index = []
|
||||
for name, constraint in constraints + combined_constraints + extra_constraints:
|
||||
wt = result.wald_test(constraint)
|
||||
row = [wt.statistic.item(), wt.pvalue, constraint.shape[0]]
|
||||
if use_t:
|
||||
row.append(wt.df_denom)
|
||||
res_wald.append(row)
|
||||
index.append(name)
|
||||
|
||||
# distribution nerutral names
|
||||
col_names = ['statistic', 'pvalue', 'df_constraint']
|
||||
if use_t:
|
||||
col_names.append('df_denom')
|
||||
# TODO: maybe move DataFrame creation to results class
|
||||
from pandas import DataFrame
|
||||
table = DataFrame(res_wald, index=index, columns=col_names)
|
||||
res = WaldTestResults(None, distribution, None, table=table)
|
||||
# TODO: remove temp again, added for testing
|
||||
res.temp = constraints + combined_constraints + extra_constraints
|
||||
return res
|
||||
|
||||
|
||||
def conf_int(self, alpha=.05, cols=None, method='default'):
|
||||
"""
|
||||
Returns the confidence interval of the fitted parameters.
|
||||
Parameters
|
||||
----------
|
||||
alpha : float, optional
|
||||
The significance level for the confidence interval.
|
||||
ie., The default `alpha` = .05 returns a 95% confidence interval.
|
||||
cols : array-like, optional
|
||||
`cols` specifies which confidence intervals to return
|
||||
method : string
|
||||
Not Implemented Yet
|
||||
Method to estimate the confidence_interval.
|
||||
"Default" : uses self.bse which is based on inverse Hessian for MLE
|
||||
"hjjh" :
|
||||
"jac" :
|
||||
"boot-bse"
|
||||
"boot_quant"
|
||||
"profile"
|
||||
Returns
|
||||
--------
|
||||
conf_int : array
|
||||
Each row contains [lower, upper] limits of the confidence interval
|
||||
for the corresponding parameter. The first column contains all
|
||||
lower, the second column contains all upper limits.
|
||||
Examples
|
||||
--------
|
||||
>>> import statsmodels.api as sm
|
||||
>>> data = sm.datasets.longley.load()
|
||||
>>> data.exog = sm.add_constant(data.exog)
|
||||
>>> results = sm.OLS(data.endog, data.exog).fit()
|
||||
>>> results.conf_int()
|
||||
array([[-5496529.48322745, -1467987.78596704],
|
||||
[ -177.02903529, 207.15277984],
|
||||
[ -0.1115811 , 0.03994274],
|
||||
[ -3.12506664, -0.91539297],
|
||||
[ -1.5179487 , -0.54850503],
|
||||
[ -0.56251721, 0.460309 ],
|
||||
[ 798.7875153 , 2859.51541392]])
|
||||
>>> results.conf_int(cols=(2,3))
|
||||
array([[-0.1115811 , 0.03994274],
|
||||
[-3.12506664, -0.91539297]])
|
||||
Notes
|
||||
-----
|
||||
The confidence interval is based on the standard normal distribution.
|
||||
Models wish to use a different distribution should overwrite this
|
||||
method.
|
||||
"""
|
||||
bse = self.bse
|
||||
|
||||
if self.use_t:
|
||||
dist = stats.t
|
||||
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
|
||||
q = dist.ppf(1 - alpha / 2, df_resid)
|
||||
else:
|
||||
dist = stats.norm
|
||||
q = dist.ppf(1 - alpha / 2)
|
||||
|
||||
if cols is None:
|
||||
lower = self.params - q * bse
|
||||
upper = self.params + q * bse
|
||||
else:
|
||||
cols = np.asarray(cols)
|
||||
lower = self.params[cols] - q * bse[cols]
|
||||
upper = self.params[cols] + q * bse[cols]
|
||||
return np.asarray(lzip(lower, upper))
|
||||
|
||||
def save(self, fname, remove_data=False):
|
||||
'''
|
||||
save a pickle of this instance
|
||||
Parameters
|
||||
----------
|
||||
fname : string or filehandle
|
||||
fname can be a string to a file path or filename, or a filehandle.
|
||||
remove_data : bool
|
||||
If False (default), then the instance is pickled without changes.
|
||||
If True, then all arrays with length nobs are set to None before
|
||||
pickling. See the remove_data method.
|
||||
In some cases not all arrays will be set to None.
|
||||
Notes
|
||||
-----
|
||||
If remove_data is true and the model result does not implement a
|
||||
remove_data method then this will raise an exception.
|
||||
'''
|
||||
|
||||
from statsmodels.iolib.smpickle import save_pickle
|
||||
|
||||
if remove_data:
|
||||
self.remove_data()
|
||||
|
||||
save_pickle(self, fname)
|
||||
|
||||
@classmethod
|
||||
def load(cls, fname):
|
||||
'''
|
||||
load a pickle, (class method)
|
||||
Parameters
|
||||
----------
|
||||
fname : string or filehandle
|
||||
fname can be a string to a file path or filename, or a filehandle.
|
||||
Returns
|
||||
-------
|
||||
unpickled instance
|
||||
'''
|
||||
|
||||
from statsmodels.iolib.smpickle import load_pickle
|
||||
return load_pickle(fname)
|
||||
|
||||
def remove_data(self):
|
||||
'''remove data arrays, all nobs arrays from result and model
|
||||
This reduces the size of the instance, so it can be pickled with less
|
||||
memory. Currently tested for use with predict from an unpickled
|
||||
results and model instance.
|
||||
.. warning:: Since data and some intermediate results have been removed
|
||||
calculating new statistics that require them will raise exceptions.
|
||||
The exception will occur the first time an attribute is accessed
|
||||
that has been set to None.
|
||||
Not fully tested for time series models, tsa, and might delete too much
|
||||
for prediction or not all that would be possible.
|
||||
The list of arrays to delete is maintained as an attribute of the
|
||||
result and model instance, except for cached values. These lists could
|
||||
be changed before calling remove_data.
|
||||
'''
|
||||
def wipe(obj, att):
|
||||
#get to last element in attribute path
|
||||
p = att.split('.')
|
||||
att_ = p.pop(-1)
|
||||
try:
|
||||
obj_ = reduce(getattr, [obj] + p)
|
||||
|
||||
#print(repr(obj), repr(att))
|
||||
#print(hasattr(obj_, att_))
|
||||
if hasattr(obj_, att_):
|
||||
#print('removing3', att_)
|
||||
setattr(obj_, att_, None)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
model_attr = ['model.' + i for i in self.model._data_attr]
|
||||
for att in self._data_attr + model_attr:
|
||||
#print('removing', att)
|
||||
wipe(self, att)
|
||||
|
||||
data_in_cache = getattr(self, 'data_in_cache', [])
|
||||
data_in_cache += ['fittedvalues', 'resid', 'wresid']
|
||||
for key in data_in_cache:
|
||||
try:
|
||||
self._cache[key] = None
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
|
||||
def lzip(*args, **kwargs):
|
||||
return list(zip(*args, **kwargs))
|
1845
src/py/crankshaft/crankshaft/regression/glm/family.py
Normal file
1845
src/py/crankshaft/crankshaft/regression/glm/family.py
Normal file
File diff suppressed because it is too large
Load Diff
326
src/py/crankshaft/crankshaft/regression/glm/glm.py
Normal file
326
src/py/crankshaft/crankshaft/regression/glm/glm.py
Normal file
@ -0,0 +1,326 @@
|
||||
|
||||
import numpy as np
|
||||
import numpy.linalg as la
|
||||
from pysal.spreg.utils import RegressionPropsY, spdot
|
||||
import pysal.spreg.user_output as USER
|
||||
from utils import cache_readonly
|
||||
from base import LikelihoodModelResults
|
||||
import family
|
||||
from iwls import iwls
|
||||
|
||||
__all__ = ['GLM']
|
||||
|
||||
class GLM(RegressionPropsY):
|
||||
"""
|
||||
Generalised linear models. Can currently estimate Guassian, Poisson and
|
||||
Logisitc regression coefficients. GLM object prepares model input and fit
|
||||
method performs estimation which then returns a GLMResults object.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
X : array
|
||||
n*k, independent variable, exlcuding the constant.
|
||||
family : string
|
||||
Model type: 'Gaussian', 'Poisson', 'Binomial'
|
||||
|
||||
Attributes
|
||||
----------
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
X : array
|
||||
n*k, independent variable, including constant.
|
||||
family : string
|
||||
Model type: 'Gaussian', 'Poisson', 'logistic'
|
||||
n : integer
|
||||
Number of observations
|
||||
k : integer
|
||||
Number of independent variables
|
||||
df_model : float
|
||||
k-1, where k is the number of variables (including
|
||||
intercept)
|
||||
df_residual : float
|
||||
observations minus variables (n-k)
|
||||
mean_y : float
|
||||
Mean of y
|
||||
std_y : float
|
||||
Standard deviation of y
|
||||
fit_params : dict
|
||||
Parameters passed into fit method to define estimation
|
||||
routine.
|
||||
normalized_cov_params : array
|
||||
k*k, approximates [X.T*X]-1
|
||||
"""
|
||||
def __init__(self, y, X, family=family.Gaussian(), constant=True):
|
||||
"""
|
||||
Initialize class
|
||||
"""
|
||||
self.n = USER.check_arrays(y, X)
|
||||
USER.check_y(y, self.n)
|
||||
self.y = y
|
||||
if constant:
|
||||
self.X = USER.check_constant(X)
|
||||
else:
|
||||
self.X = X
|
||||
self.family = family
|
||||
self.k = self.X.shape[1]
|
||||
self.fit_params = {}
|
||||
|
||||
def fit(self, ini_betas=None, tol=1.0e-6, max_iter=200, solve='iwls'):
|
||||
"""
|
||||
Method that fits a model with a particular estimation routine.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
ini_betas : array
|
||||
k*1, initial coefficient values, including constant.
|
||||
Default is None, which calculates initial values during
|
||||
estimation.
|
||||
tol: float
|
||||
Tolerence for estimation convergence.
|
||||
max_iter : integer
|
||||
Maximum number of iterations if convergence not
|
||||
achieved.
|
||||
solve :string
|
||||
Technique to solve MLE equations.
|
||||
'iwls' = iteratively (re)weighted least squares (default)
|
||||
"""
|
||||
self.fit_params['ini_betas'] = ini_betas
|
||||
self.fit_params['tol'] = tol
|
||||
self.fit_params['max_iter'] = max_iter
|
||||
self.fit_params['solve']=solve
|
||||
if solve.lower() == 'iwls':
|
||||
params, predy, w, n_iter = iwls(self.y, self.X, self.family,
|
||||
ini_betas=ini_betas, tol=tol, max_iter=max_iter)
|
||||
self.fit_params['n_iter'] = n_iter
|
||||
return GLMResults(self, params.flatten(), predy, w)
|
||||
|
||||
@cache_readonly
|
||||
def df_model(self):
|
||||
return self.X.shape[1] - 1
|
||||
|
||||
@cache_readonly
|
||||
def df_resid(self):
|
||||
return self.n - self.df_model - 1
|
||||
|
||||
class GLMResults(LikelihoodModelResults):
|
||||
"""
|
||||
Results of estimated GLM and diagnostics.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : GLM object
|
||||
Pointer to GLM object with estimation parameters.
|
||||
params : array
|
||||
k*1, estimared coefficients
|
||||
mu : array
|
||||
n*1, predicted y values.
|
||||
w : array
|
||||
n*1, final weight used for iwls
|
||||
|
||||
Attributes
|
||||
----------
|
||||
model : GLM Object
|
||||
Points to GLM object for which parameters have been
|
||||
estimated.
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
x : array
|
||||
n*k, independent variable, including constant.
|
||||
family : string
|
||||
Model type: 'Gaussian', 'Poisson', 'Logistic'
|
||||
n : integer
|
||||
Number of observations
|
||||
k : integer
|
||||
Number of independent variables
|
||||
df_model : float
|
||||
k-1, where k is the number of variables (including
|
||||
intercept)
|
||||
df_residual : float
|
||||
observations minus variables (n-k)
|
||||
fit_params : dict
|
||||
parameters passed into fit method to define estimation
|
||||
routine.
|
||||
scale : float
|
||||
sigma squared used for subsequent computations.
|
||||
params : array
|
||||
n*k, estimared beta coefficients
|
||||
w : array
|
||||
n*1, final weight values of x
|
||||
mu : array
|
||||
n*1, predicted value of y (i.e., fittedvalues)
|
||||
cov_params : array
|
||||
Variance covariance matrix (kxk) of betas which has been
|
||||
appropriately scaled by sigma-squared
|
||||
bse : array
|
||||
k*1, standard errors of betas
|
||||
pvalues : array
|
||||
k*1, two-tailed pvalues of parameters
|
||||
tvalues : array
|
||||
k*1, the tvalues of the standard errors
|
||||
null : array
|
||||
n*1, predicted values of y for null model
|
||||
deviance : float
|
||||
value of the deviance function evalued at params;
|
||||
see family.py for distribution-specific deviance
|
||||
null_deviance : float
|
||||
value of the deviance function for the model fit with
|
||||
a constant as the only regressor
|
||||
llf : float
|
||||
value of the loglikelihood function evalued at params;
|
||||
see family.py for distribution-specific loglikelihoods
|
||||
llnull : float
|
||||
value of log-likelihood function evaluated at null
|
||||
aic : float
|
||||
AIC
|
||||
bic : float
|
||||
BIC
|
||||
D2 : float
|
||||
percent deviance explained
|
||||
adj_D2 : float
|
||||
adjusted percent deviance explained
|
||||
pseudo_R2 : float
|
||||
McFadden's pseudo R2 (coefficient of determination)
|
||||
adj_pseudoR2 : float
|
||||
adjusted McFadden's pseudo R2
|
||||
resid_response : array
|
||||
response residuals; defined as y-mu
|
||||
resid_pearson : array
|
||||
Pearson residuals; defined as (y-mu)/sqrt(VAR(mu))
|
||||
where VAR is the distribution specific variance
|
||||
function; see family.py and varfuncs.py for more information.
|
||||
resid_working : array
|
||||
Working residuals; the working residuals are defined as
|
||||
resid_response/link'(mu); see links.py for the
|
||||
derivatives of the link functions.
|
||||
|
||||
resid_anscombe : array
|
||||
Anscombe residuals; see family.py for
|
||||
distribution-specific Anscombe residuals.
|
||||
|
||||
resid_deviance : array
|
||||
deviance residuals; see family.py for
|
||||
distribution-specific deviance residuals.
|
||||
|
||||
pearson_chi2 : float
|
||||
chi-Squared statistic is defined as the sum
|
||||
of the squares of the Pearson residuals
|
||||
|
||||
normalized_cov_params : array
|
||||
k*k, approximates [X.T*X]-1
|
||||
"""
|
||||
def __init__(self, model, params, mu, w):
|
||||
self.model = model
|
||||
self.n = model.n
|
||||
self.y = model.y.T.flatten()
|
||||
self.X = model.X
|
||||
self.k = model.k
|
||||
self.family = model.family
|
||||
self.fit_params = model.fit_params
|
||||
self.params = params
|
||||
self.w = w
|
||||
self.mu = mu.flatten()
|
||||
self._cache = {}
|
||||
|
||||
@cache_readonly
|
||||
def df_model(self):
|
||||
return self.model.df_model
|
||||
|
||||
@cache_readonly
|
||||
def df_resid(self):
|
||||
return self.model.df_resid
|
||||
|
||||
@cache_readonly
|
||||
def normalized_cov_params(self):
|
||||
return la.inv(spdot(self.w.T, self.w))
|
||||
|
||||
@cache_readonly
|
||||
def resid_response(self):
|
||||
return (self.y-self.mu)
|
||||
|
||||
@cache_readonly
|
||||
def resid_pearson(self):
|
||||
return ((self.y-self.mu) /
|
||||
np.sqrt(self.family.variance(self.mu)))
|
||||
|
||||
@cache_readonly
|
||||
def resid_working(self):
|
||||
return (self.resid_response / self.family.link.deriv(self.mu))
|
||||
|
||||
@cache_readonly
|
||||
def resid_anscombe(self):
|
||||
return (self.family.resid_anscombe(self.y, self.mu))
|
||||
|
||||
@cache_readonly
|
||||
def resid_deviance(self):
|
||||
return (self.family.resid_dev(self.y, self.mu))
|
||||
|
||||
@cache_readonly
|
||||
def pearson_chi2(self):
|
||||
chisq = (self.y - self.mu)**2 / self.family.variance(self.mu)
|
||||
chisqsum = np.sum(chisq)
|
||||
return chisqsum
|
||||
|
||||
@cache_readonly
|
||||
def null(self):
|
||||
y = np.reshape(self.y, (-1,1))
|
||||
model = self.model
|
||||
X = np.ones((len(y), 1))
|
||||
null_mod = GLM(y, X, family=self.family, constant=False)
|
||||
return null_mod.fit().mu
|
||||
|
||||
@cache_readonly
|
||||
def scale(self):
|
||||
if isinstance(self.family, (family.Binomial, family.Poisson)):
|
||||
return 1.
|
||||
else:
|
||||
return (((np.power(self.resid_response, 2) /
|
||||
self.family.variance(self.mu))).sum() /
|
||||
(self.df_resid))
|
||||
@cache_readonly
|
||||
def deviance(self):
|
||||
return self.family.deviance(self.y, self.mu)
|
||||
|
||||
@cache_readonly
|
||||
def null_deviance(self):
|
||||
return self.family.deviance(self.y, self.null)
|
||||
|
||||
@cache_readonly
|
||||
def llnull(self):
|
||||
return self.family.loglike(self.y, self.null, scale=self.scale)
|
||||
|
||||
@cache_readonly
|
||||
def llf(self):
|
||||
return self.family.loglike(self.y, self.mu, scale=self.scale)
|
||||
|
||||
@cache_readonly
|
||||
def aic(self):
|
||||
if isinstance(self.family, family.QuasiPoisson):
|
||||
return np.nan
|
||||
else:
|
||||
return -2 * self.llf + 2*(self.df_model+1)
|
||||
|
||||
@cache_readonly
|
||||
def bic(self):
|
||||
return (self.deviance -
|
||||
(self.model.n - self.df_model - 1) *
|
||||
np.log(self.model.n))
|
||||
|
||||
@cache_readonly
|
||||
def D2(self):
|
||||
return 1 - (self.deviance / self.null_deviance)
|
||||
|
||||
@cache_readonly
|
||||
def adj_D2(self):
|
||||
return 1.0 - (float(self.n) - 1.0)/(float(self.n) - float(self.k)) * (1.0-self.D2)
|
||||
|
||||
@cache_readonly
|
||||
def pseudoR2(self):
|
||||
return 1 - (self.llf/self.llnull)
|
||||
|
||||
@cache_readonly
|
||||
def adj_pseudoR2(self):
|
||||
return 1 - ((self.llf-self.k)/self.llnull)
|
||||
|
84
src/py/crankshaft/crankshaft/regression/glm/iwls.py
Normal file
84
src/py/crankshaft/crankshaft/regression/glm/iwls.py
Normal file
@ -0,0 +1,84 @@
|
||||
import numpy as np
|
||||
import numpy.linalg as la
|
||||
from scipy import sparse as sp
|
||||
from scipy.sparse import linalg as spla
|
||||
from pysal.spreg.utils import spdot, spmultiply
|
||||
from family import Binomial, Poisson
|
||||
|
||||
def _compute_betas(y, x):
|
||||
"""
|
||||
compute MLE coefficients using iwls routine
|
||||
|
||||
Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
"""
|
||||
xT = x.T
|
||||
xtx = spdot(xT, x)
|
||||
xtx_inv = la.inv(xtx)
|
||||
xtx_inv = sp.csr_matrix(xtx_inv)
|
||||
xTy = spdot(xT, y, array_out=False)
|
||||
betas = spdot(xtx_inv, xTy)
|
||||
return betas
|
||||
|
||||
def _compute_betas_gwr(y, x, wi):
|
||||
"""
|
||||
compute MLE coefficients using iwls routine
|
||||
|
||||
Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
"""
|
||||
xT = (x * wi).T
|
||||
xtx = np.dot(xT, x)
|
||||
xtx_inv = la.inv(xtx)
|
||||
xtx_inv_xt = np.dot(xtx_inv, xT)
|
||||
betas = np.dot(xtx_inv_xt, y)
|
||||
return betas, xtx_inv_xt
|
||||
|
||||
def iwls(y, x, family, offset=1.0, ini_betas=None, tol=1.0e-8, max_iter=200, wi=None):
|
||||
"""
|
||||
Iteratively re-weighted least squares estimation routine
|
||||
"""
|
||||
n_iter = 0
|
||||
diff = 1.0e6
|
||||
if ini_betas is None:
|
||||
betas = np.zeros((x.shape[1], 1), np.float)
|
||||
else:
|
||||
betas = ini_betas
|
||||
if isinstance(family, Binomial):
|
||||
y = family.link._clean(y)
|
||||
if isinstance(family, Poisson):
|
||||
y_off = y/offset
|
||||
y_off = family.starting_mu(y_off)
|
||||
v = family.predict(y_off)
|
||||
mu = family.starting_mu(y)
|
||||
else:
|
||||
mu = family.starting_mu(y)
|
||||
v = family.predict(mu)
|
||||
|
||||
while diff > tol and n_iter < max_iter:
|
||||
n_iter += 1
|
||||
w = family.weights(mu)
|
||||
z = v + (family.link.deriv(mu)*(y-mu))
|
||||
w = np.sqrt(w)
|
||||
if type(x) != np.ndarray:
|
||||
w = sp.csr_matrix(w)
|
||||
z = sp.csr_matrix(z)
|
||||
wx = spmultiply(x, w, array_out=False)
|
||||
wz = spmultiply(z, w, array_out=False)
|
||||
if wi is None:
|
||||
n_betas = _compute_betas(wz, wx)
|
||||
else:
|
||||
n_betas, xtx_inv_xt = _compute_betas_gwr(wz, wx, wi)
|
||||
v = spdot(x, n_betas)
|
||||
mu = family.fitted(v)
|
||||
if isinstance(family, Poisson):
|
||||
mu = mu * offset
|
||||
diff = min(abs(n_betas-betas))
|
||||
betas = n_betas
|
||||
|
||||
if wi is None:
|
||||
return betas, mu, wx, n_iter
|
||||
else:
|
||||
return betas, mu, v, w, z, xtx_inv_xt, n_iter
|
953
src/py/crankshaft/crankshaft/regression/glm/links.py
Normal file
953
src/py/crankshaft/crankshaft/regression/glm/links.py
Normal file
@ -0,0 +1,953 @@
|
||||
'''
|
||||
Defines the link functions to be used with GLM and GEE families.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import scipy.stats
|
||||
FLOAT_EPS = np.finfo(float).eps
|
||||
|
||||
|
||||
class Link(object):
|
||||
"""
|
||||
A generic link function for one-parameter exponential family.
|
||||
|
||||
`Link` does nothing, but lays out the methods expected of any subclass.
|
||||
"""
|
||||
|
||||
def __call__(self, p):
|
||||
"""
|
||||
Return the value of the link function. This is just a placeholder.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
g(p) : array-like
|
||||
The value of the link function g(p) = z
|
||||
"""
|
||||
return NotImplementedError
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of the link function. Just a placeholder.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
`z` is usually the linear predictor of the transformed variable
|
||||
in the IRLS algorithm for GLM.
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)(z) : array
|
||||
The value of the inverse of the link function g^(-1)(z) = p
|
||||
|
||||
|
||||
"""
|
||||
return NotImplementedError
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of the link function g'(p). Just a placeholder.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
The value of the derivative of the link function g'(p)
|
||||
"""
|
||||
return NotImplementedError
|
||||
|
||||
def deriv2(self, p):
|
||||
"""Second derivative of the link function g''(p)
|
||||
|
||||
implemented through numerical differentiation
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime_cs
|
||||
# TODO: workaround proplem with numdiff for 1d
|
||||
return np.diag(approx_fprime_cs(p, self.deriv))
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse link function g^(-1)(z).
|
||||
|
||||
Notes
|
||||
-----
|
||||
This reference implementation gives the correct result but is
|
||||
inefficient, so it can be overriden in subclasses.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
`z` is usually the linear predictor for a GLM or GEE model.
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'^(-1)(z) : array
|
||||
The value of the derivative of the inverse of the link function
|
||||
|
||||
"""
|
||||
return 1 / self.deriv(self.inverse(z))
|
||||
|
||||
|
||||
class Logit(Link):
|
||||
"""
|
||||
The logit transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
call and derivative use a private method _clean to make trim p by
|
||||
machine epsilon so that p is in (0,1)
|
||||
|
||||
Alias of Logit:
|
||||
logit = Logit()
|
||||
"""
|
||||
|
||||
def _clean(self, p):
|
||||
"""
|
||||
Clip logistic values to range (eps, 1-eps)
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
p : array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
--------
|
||||
pclip : array
|
||||
Clipped probabilities
|
||||
"""
|
||||
return np.clip(p, FLOAT_EPS, 1. - FLOAT_EPS)
|
||||
|
||||
def __call__(self, p):
|
||||
"""
|
||||
The logit transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
Logit transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(p / (1 - p))
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return np.log(p / (1. - p))
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of the logit transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
The value of the logit transform at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
Probabilities
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(z) = exp(z)/(1+exp(z))
|
||||
"""
|
||||
z = np.asarray(z)
|
||||
t = np.exp(-z)
|
||||
return 1. / (1. + t)
|
||||
|
||||
def deriv(self, p):
|
||||
|
||||
"""
|
||||
Derivative of the logit transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p: array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
Value of the derivative of logit transform at `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(p) = 1 / (p * (1 - p))
|
||||
|
||||
Alias for `Logit`:
|
||||
logit = Logit()
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return 1. / (p * (1 - p))
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the logit transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
`z` is usually the linear predictor for a GLM or GEE model.
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'^(-1)(z) : array
|
||||
The value of the derivative of the inverse of the logit function
|
||||
|
||||
"""
|
||||
t = np.exp(z)
|
||||
return t/(1 + t)**2
|
||||
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the logit function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(z) : array
|
||||
The value of the second derivative of the logit function
|
||||
"""
|
||||
v = p * (1 - p)
|
||||
return (2*p - 1) / v**2
|
||||
|
||||
class logit(Logit):
|
||||
pass
|
||||
|
||||
|
||||
class Power(Link):
|
||||
"""
|
||||
The power transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
power : float
|
||||
The exponent of the power transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
Aliases of Power:
|
||||
inverse = Power(power=-1)
|
||||
sqrt = Power(power=.5)
|
||||
inverse_squared = Power(power=-2.)
|
||||
identity = Power(power=1.)
|
||||
"""
|
||||
|
||||
def __init__(self, power=1.):
|
||||
self.power = power
|
||||
|
||||
def __call__(self, p):
|
||||
"""
|
||||
Power transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array-like
|
||||
Power transform of x
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = x**self.power
|
||||
"""
|
||||
|
||||
z = np.power(p, self.power)
|
||||
return z
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of the power transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
`z` : array-like
|
||||
Value of the transformed mean parameters at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
`p` : array
|
||||
Mean parameters
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(z`) = `z`**(1/`power`)
|
||||
"""
|
||||
|
||||
p = np.power(z, 1. / self.power)
|
||||
return p
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of the power transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
--------
|
||||
g'(p) : array
|
||||
Derivative of power transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(`p`) = `power` * `p`**(`power` - 1)
|
||||
"""
|
||||
return self.power * np.power(p, self.power - 1)
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the power transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
--------
|
||||
g''(p) : array
|
||||
Second derivative of the power transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g''(`p`) = `power` * (`power` - 1) * `p`**(`power` - 2)
|
||||
"""
|
||||
return self.power * (self.power - 1) * np.power(p, self.power - 2)
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the power transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
`z` is usually the linear predictor for a GLM or GEE model.
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The value of the derivative of the inverse of the power transform
|
||||
function
|
||||
"""
|
||||
return np.power(z, (1 - self.power)/self.power) / self.power
|
||||
|
||||
|
||||
class inverse_power(Power):
|
||||
"""
|
||||
The inverse transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = 1/p
|
||||
|
||||
Alias of statsmodels.family.links.Power(power=-1.)
|
||||
"""
|
||||
def __init__(self):
|
||||
super(inverse_power, self).__init__(power=-1.)
|
||||
|
||||
|
||||
class sqrt(Power):
|
||||
"""
|
||||
The square-root transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = sqrt(`p`)
|
||||
|
||||
Alias of statsmodels.family.links.Power(power=.5)
|
||||
"""
|
||||
def __init__(self):
|
||||
super(sqrt, self).__init__(power=.5)
|
||||
|
||||
|
||||
class inverse_squared(Power):
|
||||
"""
|
||||
The inverse squared transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = 1/(`p`\ \*\*2)
|
||||
|
||||
Alias of statsmodels.family.links.Power(power=2.)
|
||||
"""
|
||||
def __init__(self):
|
||||
super(inverse_squared, self).__init__(power=-2.)
|
||||
|
||||
|
||||
class identity(Power):
|
||||
"""
|
||||
The identity transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = `p`
|
||||
|
||||
Alias of statsmodels.family.links.Power(power=1.)
|
||||
"""
|
||||
def __init__(self):
|
||||
super(identity, self).__init__(power=1.)
|
||||
|
||||
|
||||
class Log(Link):
|
||||
"""
|
||||
The log transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
call and derivative call a private method _clean to trim the data by
|
||||
machine epsilon so that p is in (0,1). log is an alias of Log.
|
||||
"""
|
||||
|
||||
def _clean(self, x):
|
||||
return np.clip(x, FLOAT_EPS, np.inf)
|
||||
|
||||
def __call__(self, p, **extra):
|
||||
"""
|
||||
Log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
log(x)
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(p)
|
||||
"""
|
||||
x = self._clean(p)
|
||||
return np.log(x)
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array
|
||||
The inverse of the link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
The mean probabilities given the value of the inverse `z`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^{-1}(z) = exp(z)
|
||||
"""
|
||||
return np.exp(z)
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
derivative of log transform of x
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(x) = 1/x
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return 1. / p
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(p) : array
|
||||
Second derivative of log transform of x
|
||||
|
||||
Notes
|
||||
-----
|
||||
g''(x) = -1/x^2
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return -1. / p**2
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array
|
||||
The inverse of the link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The value of the derivative of the inverse of the log function,
|
||||
the exponential function
|
||||
"""
|
||||
return np.exp(z)
|
||||
|
||||
|
||||
class log(Log):
|
||||
"""
|
||||
The log transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
log is a an alias of Log.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# TODO: the CDFLink is untested
|
||||
class CDFLink(Logit):
|
||||
"""
|
||||
The use the CDF of a scipy.stats distribution
|
||||
|
||||
CDFLink is a subclass of logit in order to use its _clean method
|
||||
for the link and its derivative.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dbn : scipy.stats distribution
|
||||
Default is dbn=scipy.stats.norm
|
||||
|
||||
Notes
|
||||
-----
|
||||
The CDF link is untested.
|
||||
"""
|
||||
|
||||
def __init__(self, dbn=scipy.stats.norm):
|
||||
self.dbn = dbn
|
||||
|
||||
def __call__(self, p):
|
||||
"""
|
||||
CDF link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
(ppf) inverse of CDF transform of p
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = `dbn`.ppf(`p`)
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return self.dbn.ppf(p)
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
The inverse of the CDF link
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
The value of the inverse of the link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
Mean probabilities. The value of the inverse of CDF link of `z`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(`z`) = `dbn`.cdf(`z`)
|
||||
"""
|
||||
return self.dbn.cdf(z)
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of CDF link
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
The derivative of CDF transform at `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(`p`) = 1./ `dbn`.pdf(`dbn`.ppf(`p`))
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return 1. / self.dbn.pdf(self.dbn.ppf(p))
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the link function g''(p)
|
||||
|
||||
implemented through numerical differentiation
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime
|
||||
p = np.atleast_1d(p)
|
||||
# Note: special function for norm.ppf does not support complex
|
||||
return np.diag(approx_fprime(p, self.deriv, centered=True))
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the CDF transformation link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array
|
||||
The inverse of the link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The value of the derivative of the inverse of the logit function
|
||||
"""
|
||||
return 1/self.deriv(self.inverse(z))
|
||||
|
||||
|
||||
class probit(CDFLink):
|
||||
"""
|
||||
The probit (standard normal CDF) transform
|
||||
|
||||
Notes
|
||||
--------
|
||||
g(p) = scipy.stats.norm.ppf(p)
|
||||
|
||||
probit is an alias of CDFLink.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class cauchy(CDFLink):
|
||||
"""
|
||||
The Cauchy (standard Cauchy CDF) transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = scipy.stats.cauchy.ppf(p)
|
||||
|
||||
cauchy is an alias of CDFLink with dbn=scipy.stats.cauchy
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(cauchy, self).__init__(dbn=scipy.stats.cauchy)
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the Cauchy link function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p: array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(p) : array
|
||||
Value of the second derivative of Cauchy link function at `p`
|
||||
"""
|
||||
a = np.pi * (p - 0.5)
|
||||
d2 = 2 * np.pi**2 * np.sin(a) / np.cos(a)**3
|
||||
return d2
|
||||
|
||||
class CLogLog(Logit):
|
||||
"""
|
||||
The complementary log-log transform
|
||||
|
||||
CLogLog inherits from Logit in order to have access to its _clean method
|
||||
for the link and its derivative.
|
||||
|
||||
Notes
|
||||
-----
|
||||
CLogLog is untested.
|
||||
"""
|
||||
def __call__(self, p):
|
||||
"""
|
||||
C-Log-Log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
The CLogLog transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(-log(1-p))
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return np.log(-np.log(1 - p))
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of C-Log-Log transform link function
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
The value of the inverse of the CLogLog link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
Mean parameters
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(`z`) = 1-exp(-exp(`z`))
|
||||
"""
|
||||
return 1 - np.exp(-np.exp(z))
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of C-Log-Log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
The derivative of the CLogLog transform link function
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(p) = - 1 / ((p-1)*log(1-p))
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return 1. / ((p - 1) * (np.log(1 - p)))
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the C-Log-Log ink function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(p) : array
|
||||
The second derivative of the CLogLog link function
|
||||
"""
|
||||
p = self._clean(p)
|
||||
fl = np.log(1 - p)
|
||||
d2 = -1 / ((1 - p)**2 * fl)
|
||||
d2 *= 1 + 1 / fl
|
||||
return d2
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the C-Log-Log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
The value of the inverse of the CLogLog link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The derivative of the inverse of the CLogLog link function
|
||||
"""
|
||||
return np.exp(z - np.exp(z))
|
||||
|
||||
|
||||
class cloglog(CLogLog):
|
||||
"""
|
||||
The CLogLog transform link function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = log(-log(1-`p`))
|
||||
|
||||
cloglog is an alias for CLogLog
|
||||
cloglog = CLogLog()
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class NegativeBinomial(object):
|
||||
'''
|
||||
The negative binomial link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float, optional
|
||||
Alpha is the ancillary parameter of the Negative Binomial link
|
||||
function. It is assumed to be nonstochastic. The default value is 1.
|
||||
Permissible values are usually assumed to be in (.01, 2).
|
||||
'''
|
||||
|
||||
def __init__(self, alpha=1.):
|
||||
self.alpha = alpha
|
||||
|
||||
def _clean(self, x):
|
||||
return np.clip(x, FLOAT_EPS, np.inf)
|
||||
|
||||
def __call__(self, p):
|
||||
'''
|
||||
Negative Binomial transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
The negative binomial transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(p/(p + 1/alpha))
|
||||
'''
|
||||
p = self._clean(p)
|
||||
return np.log(p/(p + 1/self.alpha))
|
||||
|
||||
def inverse(self, z):
|
||||
'''
|
||||
Inverse of the negative binomial transform
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
z : array-like
|
||||
The value of the inverse of the negative binomial link at `p`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
Mean parameters
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(z) = exp(z)/(alpha*(1-exp(z)))
|
||||
'''
|
||||
return -1/(self.alpha * (1 - np.exp(-z)))
|
||||
|
||||
def deriv(self, p):
|
||||
'''
|
||||
Derivative of the negative binomial transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
The derivative of the negative binomial transform link function
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(x) = 1/(x+alpha*x^2)
|
||||
'''
|
||||
return 1/(p + self.alpha * p**2)
|
||||
|
||||
def deriv2(self,p):
|
||||
'''
|
||||
Second derivative of the negative binomial link function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(p) : array
|
||||
The second derivative of the negative binomial transform link
|
||||
function
|
||||
|
||||
Notes
|
||||
-----
|
||||
g''(x) = -(1+2*alpha*x)/(x+alpha*x^2)^2
|
||||
'''
|
||||
numer = -(1 + 2 * self.alpha * p)
|
||||
denom = (p + self.alpha * p**2)**2
|
||||
return numer / denom
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
'''
|
||||
Derivative of the inverse of the negative binomial transform
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
z : array-like
|
||||
Usually the linear predictor for a GLM or GEE model
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The value of the derivative of the inverse of the negative
|
||||
binomial link
|
||||
'''
|
||||
t = np.exp(z)
|
||||
return t / (self.alpha * (1-t)**2)
|
||||
|
||||
|
||||
class nbinom(NegativeBinomial):
|
||||
"""
|
||||
The negative binomial link function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(p/(p + 1/alpha))
|
||||
|
||||
nbinom is an alias of NegativeBinomial.
|
||||
nbinom = NegativeBinomial(alpha=1.)
|
||||
"""
|
||||
pass
|
993
src/py/crankshaft/crankshaft/regression/glm/tests/test_glm.py
Normal file
993
src/py/crankshaft/crankshaft/regression/glm/tests/test_glm.py
Normal file
@ -0,0 +1,993 @@
|
||||
"""
|
||||
Tests for generalized linear models. Majority of code either directly borrowed
|
||||
or closely adapted from statsmodels package. Model results verfiied using glm
|
||||
function in R and GLM function in statsmodels.
|
||||
"""
|
||||
|
||||
__author__ = 'Taylor Oshan tayoshan@gmail.com'
|
||||
|
||||
from pysal.contrib.glm.glm import GLM
|
||||
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial, QuasiPoisson
|
||||
import numpy as np
|
||||
import pysal
|
||||
import unittest
|
||||
import math
|
||||
|
||||
|
||||
class TestGaussian(unittest.TestCase):
|
||||
"""
|
||||
Tests for Poisson GLM
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')
|
||||
y = np.array(db.by_col("HOVAL"))
|
||||
self.y = np.reshape(y, (49,1))
|
||||
X = []
|
||||
X.append(db.by_col("INC"))
|
||||
X.append(db.by_col("CRIME"))
|
||||
self.X = np.array(X).T
|
||||
|
||||
def testIWLS(self):
|
||||
model = GLM(self.y, self.X, family=Gaussian())
|
||||
results = model.fit()
|
||||
self.assertEqual(results.n, 49)
|
||||
self.assertEqual(results.df_model, 2)
|
||||
self.assertEqual(results.df_resid, 46)
|
||||
self.assertEqual(results.aic, 408.73548964604873)
|
||||
self.assertEqual(results.bic, 10467.991340493107)
|
||||
self.assertEqual(results.deviance, 10647.015074206196)
|
||||
self.assertEqual(results.llf, -201.36774482302437)
|
||||
self.assertEqual(results.null_deviance, 16367.794631703124)
|
||||
self.assertEqual(results.scale, 231.45684943926514)
|
||||
np.testing.assert_allclose(results.params, [ 46.42818268, 0.62898397,
|
||||
-0.48488854])
|
||||
np.testing.assert_allclose(results.bse, [ 13.19175703, 0.53591045,
|
||||
0.18267291])
|
||||
np.testing.assert_allclose(results.cov_params(),
|
||||
[[ 1.74022453e+02, -6.52060364e+00, -2.15109867e+00],
|
||||
[ -6.52060364e+00, 2.87200008e-01, 6.80956787e-02],
|
||||
[ -2.15109867e+00, 6.80956787e-02, 3.33693910e-02]])
|
||||
np.testing.assert_allclose(results.tvalues, [ 3.51948437, 1.17367365,
|
||||
-2.65440864])
|
||||
np.testing.assert_allclose(results.pvalues, [ 0.00043239, 0.24052577,
|
||||
0.00794475], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.conf_int(),
|
||||
[[ 20.57281401, 72.28355135],
|
||||
[ -0.42138121, 1.67934915],
|
||||
[ -0.84292086, -0.12685622]])
|
||||
np.testing.assert_allclose(results.normalized_cov_params,
|
||||
[[ 7.51857004e-01, -2.81720055e-02, -9.29373521e-03],
|
||||
[ -2.81720055e-02, 1.24083607e-03, 2.94204638e-04],
|
||||
[ -9.29373521e-03, 2.94204638e-04, 1.44171110e-04]])
|
||||
np.testing.assert_allclose(results.mu,
|
||||
[ 51.08752105, 50.66601521, 41.61367567, 33.53969014,
|
||||
28.90638232, 43.87074227, 51.64910882, 34.92671563,
|
||||
42.69267622, 38.49449134, 20.92815471, 25.25228436,
|
||||
29.78223486, 25.02403635, 29.07959539, 24.63352275,
|
||||
34.71372149, 33.40443052, 27.29864225, 65.86219802,
|
||||
33.69854751, 37.44976435, 50.01304928, 36.81219959,
|
||||
22.02674837, 31.64775955, 27.63563294, 23.7697291 ,
|
||||
22.43119725, 21.76987089, 48.51169321, 49.05891819,
|
||||
32.31656426, 44.20550354, 35.49244888, 51.27811308,
|
||||
36.55047181, 27.37048914, 48.78812922, 57.31744163,
|
||||
51.22914162, 54.70515578, 37.06622277, 44.5075759 ,
|
||||
41.24328983, 49.93821824, 44.85644299, 40.93838609, 47.32045464])
|
||||
self.assertEqual(results.pearson_chi2, 10647.015074206196)
|
||||
np.testing.assert_allclose(results.resid_response,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.resid_working,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.resid_pearson,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.resid_anscombe,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.resid_deviance,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.null,
|
||||
[ 38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447, 38.43622447])
|
||||
self.assertAlmostEqual(results.D2, .349514377851)
|
||||
self.assertAlmostEqual(results.adj_D2, 0.32123239427957673)
|
||||
|
||||
class TestPoisson(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')
|
||||
y = np.array(db.by_col("HOVAL"))
|
||||
y = np.reshape(y, (49,1))
|
||||
self.y = np.round(y).astype(int)
|
||||
X = []
|
||||
X.append(db.by_col("INC"))
|
||||
X.append(db.by_col("CRIME"))
|
||||
self.X = np.array(X).T
|
||||
|
||||
def testIWLS(self):
|
||||
model = GLM(self.y, self.X, family=Poisson())
|
||||
results = model.fit()
|
||||
self.assertEqual(results.n, 49)
|
||||
self.assertEqual(results.df_model, 2)
|
||||
self.assertEqual(results.df_resid, 46)
|
||||
self.assertAlmostEqual(results.aic, 500.85184179938756)
|
||||
self.assertAlmostEqual(results.bic, 51.436404535087661)
|
||||
self.assertAlmostEqual(results.deviance, 230.46013824817649)
|
||||
self.assertAlmostEqual(results.llf, -247.42592089969378)
|
||||
self.assertAlmostEqual(results.null_deviance, 376.97293610347361)
|
||||
self.assertEqual(results.scale, 1.0)
|
||||
np.testing.assert_allclose(results.params, [ 3.92159085, 0.01183491,
|
||||
-0.01371397], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.bse, [ 0.13049161, 0.00511599,
|
||||
0.00193769], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.cov_params(),
|
||||
[[ 1.70280610e-02, -6.18628383e-04, -2.21386966e-04],
|
||||
[ -6.18628383e-04, 2.61733917e-05, 6.77496445e-06],
|
||||
[ -2.21386966e-04, 6.77496445e-06, 3.75463502e-06]])
|
||||
np.testing.assert_allclose(results.tvalues, [ 30.0524361 , 2.31331634,
|
||||
-7.07748998])
|
||||
np.testing.assert_allclose(results.pvalues, [ 2.02901657e-198,
|
||||
2.07052532e-002, 1.46788805e-012])
|
||||
np.testing.assert_allclose(results.conf_int(),
|
||||
[[ 3.66583199e+00, 4.17734972e+00],
|
||||
[ 1.80774841e-03, 2.18620753e-02],
|
||||
[ -1.75117666e-02, -9.91616901e-03]])
|
||||
np.testing.assert_allclose(results.normalized_cov_params,
|
||||
[[ 1.70280610e-02, -6.18628383e-04, -2.21386966e-04],
|
||||
[ -6.18628383e-04, 2.61733917e-05, 6.77496445e-06],
|
||||
[ -2.21386966e-04, 6.77496445e-06, 3.75463502e-06]])
|
||||
np.testing.assert_allclose(results.mu,
|
||||
[ 51.26831574, 50.15022766, 40.06142973, 34.13799739,
|
||||
28.76119226, 42.6836241 , 55.64593703, 34.08277997,
|
||||
40.90389582, 37.19727958, 23.47459217, 26.12384057,
|
||||
29.78303507, 25.96888223, 29.14073823, 26.04369592,
|
||||
34.18996367, 32.28924005, 27.42284396, 72.69207879,
|
||||
33.05316347, 36.52276972, 49.2551479 , 35.33439632,
|
||||
24.07252457, 31.67153709, 27.81699478, 25.38021219,
|
||||
24.31759259, 23.13586161, 48.40724678, 48.57969818,
|
||||
31.92596006, 43.3679231 , 34.32925819, 51.78908089,
|
||||
34.49778584, 27.56236198, 48.34273194, 57.50829097,
|
||||
50.66038226, 54.68701352, 35.77103116, 43.21886784,
|
||||
40.07615759, 49.98658004, 43.13352883, 40.28520774, 46.28910294])
|
||||
self.assertAlmostEqual(results.pearson_chi2, 264.62262932090221)
|
||||
np.testing.assert_allclose(results.resid_response,
|
||||
[ 28.73168426, -5.15022766, -14.06142973, -1.13799739,
|
||||
-5.76119226, -13.6836241 , 19.35406297, 2.91722003,
|
||||
12.09610418, 58.80272042, -3.47459217, -6.12384057,
|
||||
12.21696493, 17.03111777, -11.14073823, -7.04369592,
|
||||
7.81003633, 27.71075995, 3.57715604, 8.30792121,
|
||||
-13.05316347, -6.52276972, -1.2551479 , 17.66560368,
|
||||
-6.07252457, -11.67153709, 6.18300522, -2.38021219,
|
||||
7.68240741, -1.13586161, -16.40724678, -8.57969818,
|
||||
-7.92596006, -15.3679231 , -7.32925819, -15.78908089,
|
||||
8.50221416, -4.56236198, -8.34273194, 4.49170903,
|
||||
-8.66038226, -10.68701352, -9.77103116, -9.21886784,
|
||||
-12.07615759, 26.01341996, -1.13352883, -13.28520774, -10.28910294])
|
||||
np.testing.assert_allclose(results.resid_working,
|
||||
[ 1473.02506034, -258.28508941, -563.32097891, -38.84895192,
|
||||
-165.69875817, -584.06666725, 1076.97496919, 99.42696848,
|
||||
494.77778514, 2187.30123163, -81.56463405, -159.97823479,
|
||||
363.858295 , 442.27909165, -324.64933645, -183.44387481,
|
||||
267.02485844, 894.75938 , 98.09579187, 603.9200634 ,
|
||||
-431.44834594, -238.2296165 , -61.82249568, 624.20344168,
|
||||
-146.18099686, -369.65551968, 171.99262399, -60.41029031,
|
||||
186.81765356, -26.27913713, -794.22964417, -416.79914795,
|
||||
-253.04388425, -666.47490701, -251.6079969 , -817.70198717,
|
||||
293.30756327, -125.74947222, -403.31045369, 258.31051005,
|
||||
-438.73827602, -584.440853 , -349.51985996, -398.42903071,
|
||||
-483.96599444, 1300.32189904, -48.89309853, -535.19735391,
|
||||
-476.27334527])
|
||||
np.testing.assert_allclose(results.resid_pearson,
|
||||
[ 4.01269878, -0.72726045, -2.221602 , -0.19477008, -1.07425881,
|
||||
-2.09445239, 2.59451042, 0.49969118, 1.89131202, 9.64143836,
|
||||
-0.71714142, -1.19813392, 2.23861212, 3.34207756, -2.0637814 ,
|
||||
-1.3802231 , 1.33568403, 4.87662684, 0.68309584, 0.97442591,
|
||||
-2.27043598, -1.07931992, -0.17884182, 2.97186889, -1.23768025,
|
||||
-2.07392709, 1.1723155 , -0.47246327, 1.55789092, -0.23614708,
|
||||
-2.35819937, -1.23096188, -1.40274877, -2.33362391, -1.25091503,
|
||||
-2.19400568, 1.44755952, -0.8690235 , -1.19989348, 0.59230634,
|
||||
-1.21675413, -1.44515442, -1.63370888, -1.40229988, -1.90759306,
|
||||
3.67934693, -0.17259375, -2.09312684, -1.51230062])
|
||||
np.testing.assert_allclose(results.resid_anscombe,
|
||||
[ 3.70889134, -0.74031295, -2.37729865, -0.19586855, -1.11374751,
|
||||
-2.22611959, 2.46352013, 0.49282126, 1.80857757, 8.06444452,
|
||||
-0.73610811, -1.25061371, 2.10820431, 3.05467547, -2.22437611,
|
||||
-1.45136173, 1.28939698, 4.35942058, 0.66904552, 0.95674923,
|
||||
-2.45438937, -1.11429881, -0.17961012, 2.76715848, -1.29658591,
|
||||
-2.22816691, 1.13269136, -0.48017382, 1.48562248, -0.23812278,
|
||||
-2.51664399, -1.2703721 , -1.4683091 , -2.49907536, -1.30026484,
|
||||
-2.32398309, 1.39380683, -0.89495368, -1.23735395, 0.58485202,
|
||||
-1.25435224, -1.4968484 , -1.71888038, -1.45756652, -2.01906267,
|
||||
3.41729922, -0.17335867, -2.22921828, -1.57470549])
|
||||
np.testing.assert_allclose(results.resid_deviance,
|
||||
[ 3.70529668, -0.74027329, -2.37536322, -0.19586751, -1.11349765,
|
||||
-2.22466106, 2.46246446, 0.4928057 , 1.80799655, 8.02696525,
|
||||
-0.73602255, -1.25021555, 2.10699958, 3.05084608, -2.22214376,
|
||||
-1.45072221, 1.28913747, 4.35106213, 0.6689982 , 0.95669662,
|
||||
-2.45171913, -1.11410444, -0.17960956, 2.76494217, -1.29609865,
|
||||
-2.22612429, 1.13247453, -0.48015254, 1.48508549, -0.23812 ,
|
||||
-2.51476072, -1.27015583, -1.46777697, -2.49699318, -1.29992892,
|
||||
-2.32263069, 1.39348459, -0.89482132, -1.23715363, 0.58483655,
|
||||
-1.25415329, -1.49653039, -1.7181055 , -1.45719072, -2.01791949,
|
||||
3.41437156, -0.1733581 , -2.22765605, -1.57426046])
|
||||
np.testing.assert_allclose(results.null,
|
||||
[ 38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143, 38.42857143])
|
||||
self.assertAlmostEqual(results.D2, .388656011675)
|
||||
self.assertAlmostEqual(results.adj_D2, 0.36207583826952761)#.375648692774)
|
||||
|
||||
def testQuasi(self):
|
||||
model = GLM(self.y, self.X, family=QuasiPoisson())
|
||||
results = model.fit()
|
||||
self.assertEqual(results.n, 49)
|
||||
self.assertEqual(results.df_model, 2)
|
||||
self.assertEqual(results.df_resid, 46)
|
||||
self.assertTrue(math.isnan(results.aic))
|
||||
self.assertAlmostEqual(results.bic, 51.436404535087661)
|
||||
self.assertAlmostEqual(results.deviance, 230.46013824817649)
|
||||
self.assertTrue(math.isnan(results.llf))
|
||||
self.assertAlmostEqual(results.null_deviance, 376.97293610347361)
|
||||
self.assertAlmostEqual(results.scale, 5.7526658548022223)
|
||||
np.testing.assert_allclose(results.params, [ 3.92159085, 0.01183491,
|
||||
-0.01371397], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.bse, [ 0.31298042, 0.01227057,
|
||||
0.00464749], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.cov_params(),
|
||||
[[ 9.79567451e-02, -3.55876238e-03, -1.27356524e-03],
|
||||
[ -3.55876238e-03, 1.50566777e-04, 3.89741067e-05],
|
||||
[ -1.27356524e-03, 3.89741067e-05, 2.15991606e-05]])
|
||||
np.testing.assert_allclose(results.tvalues, [ 12.52982796, 0.96449604,
|
||||
-2.95083339])
|
||||
np.testing.assert_allclose(results.pvalues, [ 5.12737770e-36,
|
||||
3.34797291e-01, 3.16917819e-03])
|
||||
np.testing.assert_allclose(results.conf_int(),
|
||||
[[ 3.3081605 , 4.53502121],
|
||||
[-0.01221495, 0.03588478],
|
||||
[-0.02282288, -0.00460506]], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.normalized_cov_params,
|
||||
[[ 1.70280610e-02, -6.18628383e-04, -2.21386966e-04],
|
||||
[ -6.18628383e-04, 2.61733917e-05, 6.77496445e-06],
|
||||
[ -2.21386966e-04, 6.77496445e-06, 3.75463502e-06]])
|
||||
np.testing.assert_allclose(results.mu,
|
||||
[ 51.26831574, 50.15022766, 40.06142973, 34.13799739,
|
||||
28.76119226, 42.6836241 , 55.64593703, 34.08277997,
|
||||
40.90389582, 37.19727958, 23.47459217, 26.12384057,
|
||||
29.78303507, 25.96888223, 29.14073823, 26.04369592,
|
||||
34.18996367, 32.28924005, 27.42284396, 72.69207879,
|
||||
33.05316347, 36.52276972, 49.2551479 , 35.33439632,
|
||||
24.07252457, 31.67153709, 27.81699478, 25.38021219,
|
||||
24.31759259, 23.13586161, 48.40724678, 48.57969818,
|
||||
31.92596006, 43.3679231 , 34.32925819, 51.78908089,
|
||||
34.49778584, 27.56236198, 48.34273194, 57.50829097,
|
||||
50.66038226, 54.68701352, 35.77103116, 43.21886784,
|
||||
40.07615759, 49.98658004, 43.13352883, 40.28520774, 46.28910294])
|
||||
self.assertAlmostEqual(results.pearson_chi2, 264.62262932090221)
|
||||
np.testing.assert_allclose(results.resid_response,
|
||||
[ 28.73168426, -5.15022766, -14.06142973, -1.13799739,
|
||||
-5.76119226, -13.6836241 , 19.35406297, 2.91722003,
|
||||
12.09610418, 58.80272042, -3.47459217, -6.12384057,
|
||||
12.21696493, 17.03111777, -11.14073823, -7.04369592,
|
||||
7.81003633, 27.71075995, 3.57715604, 8.30792121,
|
||||
-13.05316347, -6.52276972, -1.2551479 , 17.66560368,
|
||||
-6.07252457, -11.67153709, 6.18300522, -2.38021219,
|
||||
7.68240741, -1.13586161, -16.40724678, -8.57969818,
|
||||
-7.92596006, -15.3679231 , -7.32925819, -15.78908089,
|
||||
8.50221416, -4.56236198, -8.34273194, 4.49170903,
|
||||
-8.66038226, -10.68701352, -9.77103116, -9.21886784,
|
||||
-12.07615759, 26.01341996, -1.13352883, -13.28520774, -10.28910294])
|
||||
np.testing.assert_allclose(results.resid_working,
|
||||
[ 1473.02506034, -258.28508941, -563.32097891, -38.84895192,
|
||||
-165.69875817, -584.06666725, 1076.97496919, 99.42696848,
|
||||
494.77778514, 2187.30123163, -81.56463405, -159.97823479,
|
||||
363.858295 , 442.27909165, -324.64933645, -183.44387481,
|
||||
267.02485844, 894.75938 , 98.09579187, 603.9200634 ,
|
||||
-431.44834594, -238.2296165 , -61.82249568, 624.20344168,
|
||||
-146.18099686, -369.65551968, 171.99262399, -60.41029031,
|
||||
186.81765356, -26.27913713, -794.22964417, -416.79914795,
|
||||
-253.04388425, -666.47490701, -251.6079969 , -817.70198717,
|
||||
293.30756327, -125.74947222, -403.31045369, 258.31051005,
|
||||
-438.73827602, -584.440853 , -349.51985996, -398.42903071,
|
||||
-483.96599444, 1300.32189904, -48.89309853, -535.19735391,
|
||||
-476.27334527])
|
||||
np.testing.assert_allclose(results.resid_pearson,
|
||||
[ 4.01269878, -0.72726045, -2.221602 , -0.19477008, -1.07425881,
|
||||
-2.09445239, 2.59451042, 0.49969118, 1.89131202, 9.64143836,
|
||||
-0.71714142, -1.19813392, 2.23861212, 3.34207756, -2.0637814 ,
|
||||
-1.3802231 , 1.33568403, 4.87662684, 0.68309584, 0.97442591,
|
||||
-2.27043598, -1.07931992, -0.17884182, 2.97186889, -1.23768025,
|
||||
-2.07392709, 1.1723155 , -0.47246327, 1.55789092, -0.23614708,
|
||||
-2.35819937, -1.23096188, -1.40274877, -2.33362391, -1.25091503,
|
||||
-2.19400568, 1.44755952, -0.8690235 , -1.19989348, 0.59230634,
|
||||
-1.21675413, -1.44515442, -1.63370888, -1.40229988, -1.90759306,
|
||||
3.67934693, -0.17259375, -2.09312684, -1.51230062])
|
||||
np.testing.assert_allclose(results.resid_anscombe,
|
||||
[ 3.70889134, -0.74031295, -2.37729865, -0.19586855, -1.11374751,
|
||||
-2.22611959, 2.46352013, 0.49282126, 1.80857757, 8.06444452,
|
||||
-0.73610811, -1.25061371, 2.10820431, 3.05467547, -2.22437611,
|
||||
-1.45136173, 1.28939698, 4.35942058, 0.66904552, 0.95674923,
|
||||
-2.45438937, -1.11429881, -0.17961012, 2.76715848, -1.29658591,
|
||||
-2.22816691, 1.13269136, -0.48017382, 1.48562248, -0.23812278,
|
||||
-2.51664399, -1.2703721 , -1.4683091 , -2.49907536, -1.30026484,
|
||||
-2.32398309, 1.39380683, -0.89495368, -1.23735395, 0.58485202,
|
||||
-1.25435224, -1.4968484 , -1.71888038, -1.45756652, -2.01906267,
|
||||
3.41729922, -0.17335867, -2.22921828, -1.57470549])
|
||||
np.testing.assert_allclose(results.resid_deviance,
|
||||
[ 3.70529668, -0.74027329, -2.37536322, -0.19586751, -1.11349765,
|
||||
-2.22466106, 2.46246446, 0.4928057 , 1.80799655, 8.02696525,
|
||||
-0.73602255, -1.25021555, 2.10699958, 3.05084608, -2.22214376,
|
||||
-1.45072221, 1.28913747, 4.35106213, 0.6689982 , 0.95669662,
|
||||
-2.45171913, -1.11410444, -0.17960956, 2.76494217, -1.29609865,
|
||||
-2.22612429, 1.13247453, -0.48015254, 1.48508549, -0.23812 ,
|
||||
-2.51476072, -1.27015583, -1.46777697, -2.49699318, -1.29992892,
|
||||
-2.32263069, 1.39348459, -0.89482132, -1.23715363, 0.58483655,
|
||||
-1.25415329, -1.49653039, -1.7181055 , -1.45719072, -2.01791949,
|
||||
3.41437156, -0.1733581 , -2.22765605, -1.57426046])
|
||||
np.testing.assert_allclose(results.null,
|
||||
[ 38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143, 38.42857143])
|
||||
self.assertAlmostEqual(results.D2, .388656011675)
|
||||
self.assertAlmostEqual(results.adj_D2, 0.36207583826952761)
|
||||
|
||||
class TestBinomial(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
#London house price data
|
||||
#y: 'BATH2'
|
||||
y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
self.y = y.reshape((316,1))
|
||||
#X: 'FLOORSZ'
|
||||
X = np.array([ 77, 75, 64, 95, 107, 100, 81, 151, 98, 260, 171, 161, 91,
|
||||
80, 50, 85, 52, 69, 60, 84, 155, 97, 69, 126, 90, 43,
|
||||
51, 41, 140, 80, 52, 86, 66, 60, 40, 155, 138, 97, 115,
|
||||
148, 206, 60, 53, 96, 88, 160, 31, 43, 154, 60, 131, 60,
|
||||
46, 61, 125, 150, 76, 92, 96, 100, 105, 72, 48, 41, 72,
|
||||
65, 60, 65, 98, 33, 144, 111, 91, 108, 38, 48, 95, 63,
|
||||
98, 129, 108, 51, 131, 66, 48, 127, 76, 68, 52, 64, 57,
|
||||
121, 67, 76, 112, 96, 90, 53, 93, 64, 97, 58, 44, 157,
|
||||
53, 70, 71, 167, 47, 70, 96, 77, 75, 71, 67, 47, 71,
|
||||
90, 69, 64, 65, 95, 60, 60, 65, 54, 121, 105, 50, 85,
|
||||
69, 69, 62, 65, 93, 93, 70, 62, 155, 68, 117, 80, 80,
|
||||
75, 98, 114, 86, 70, 50, 51, 163, 124, 59, 95, 51, 63,
|
||||
85, 53, 46, 102, 114, 83, 47, 40, 63, 123, 100, 63, 110,
|
||||
79, 98, 99, 120, 52, 48, 37, 81, 30, 88, 50, 35, 116,
|
||||
67, 45, 80, 86, 109, 59, 75, 60, 71, 141, 121, 50, 168,
|
||||
90, 51, 133, 75, 133, 127, 37, 68, 105, 61, 123, 151, 110,
|
||||
77, 220, 94, 77, 70, 100, 98, 126, 55, 105, 60, 176, 104,
|
||||
68, 62, 70, 48, 102, 80, 97, 66, 80, 102, 160, 55, 60,
|
||||
71, 125, 85, 85, 190, 137, 48, 41, 42, 51, 57, 60, 114,
|
||||
88, 84, 108, 66, 85, 42, 98, 90, 127, 100, 55, 76, 82,
|
||||
63, 80, 71, 76, 121, 109, 92, 160, 109, 185, 100, 90, 90,
|
||||
86, 88, 95, 116, 135, 61, 74, 60, 235, 76, 66, 100, 49,
|
||||
50, 37, 100, 88, 90, 52, 95, 81, 79, 96, 75, 91, 86,
|
||||
83, 180, 108, 80, 96, 49, 117, 117, 86, 46, 66, 95, 57,
|
||||
120, 137, 68, 240])
|
||||
self.X = X.reshape((316,1))
|
||||
|
||||
def testIWLS(self):
|
||||
model = GLM(self.y, self.X, family=Binomial())
|
||||
results = model.fit()
|
||||
self.assertEqual(results.n, 316)
|
||||
self.assertEqual(results.df_model, 1)
|
||||
self.assertEqual(results.df_resid, 314)
|
||||
self.assertEqual(results.aic, 155.19347530342466)
|
||||
self.assertEqual(results.bic, -1656.1095797628657)
|
||||
self.assertEqual(results.deviance, 151.19347530342466)
|
||||
self.assertEqual(results.llf, -75.596737651712331)
|
||||
self.assertEqual(results.null_deviance, 189.16038985881212)
|
||||
self.assertEqual(results.scale, 1.0)
|
||||
np.testing.assert_allclose(results.params, [-5.33638276, 0.0287754 ])
|
||||
np.testing.assert_allclose(results.bse, [ 0.64499904, 0.00518312],
|
||||
atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.cov_params(),
|
||||
[[ 4.16023762e-01, -3.14338457e-03],
|
||||
[ -3.14338457e-03, 2.68646833e-05]])
|
||||
np.testing.assert_allclose(results.tvalues, [-8.27347396, 5.55175826])
|
||||
np.testing.assert_allclose(results.pvalues, [ 1.30111233e-16,
|
||||
2.82810512e-08])
|
||||
np.testing.assert_allclose(results.conf_int(),
|
||||
[[-6.60055765, -4.07220787],
|
||||
[ 0.01861668, 0.03893412]], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.normalized_cov_params,
|
||||
[[ 4.16023762e-01, -3.14338457e-03],
|
||||
[ -3.14338457e-03, 2.68646833e-05]])
|
||||
np.testing.assert_allclose(results.mu,
|
||||
[ 0.04226237, 0.03999333, 0.02946178, 0.0689636 , 0.09471181,
|
||||
0.07879431, 0.04717464, 0.27065598, 0.07471691, 0.89522144,
|
||||
0.39752487, 0.33102718, 0.06192993, 0.04589793, 0.01988679,
|
||||
0.0526265 , 0.02104007, 0.03386636, 0.02634295, 0.05121018,
|
||||
0.29396682, 0.07275173, 0.03386636, 0.15307528, 0.06027915,
|
||||
0.01631789, 0.02045547, 0.01541937, 0.2128508 , 0.04589793,
|
||||
0.02104007, 0.05407977, 0.0311527 , 0.02634295, 0.01498855,
|
||||
0.29396682, 0.20336776, 0.07275173, 0.11637537, 0.25395607,
|
||||
0.64367488, 0.02634295, 0.02164101, 0.07083428, 0.05710047,
|
||||
0.32468619, 0.01160845, 0.01631789, 0.28803008, 0.02634295,
|
||||
0.17267234, 0.02634295, 0.01776301, 0.02709115, 0.14938186,
|
||||
0.26501331, 0.04111287, 0.06362285, 0.07083428, 0.07879431,
|
||||
0.08989109, 0.03680743, 0.0187955 , 0.01541937, 0.03680743,
|
||||
0.03029581, 0.02634295, 0.03029581, 0.07471691, 0.01228768,
|
||||
0.23277197, 0.10505173, 0.06192993, 0.09720799, 0.01416217,
|
||||
0.0187955 , 0.0689636 , 0.02865003, 0.07471691, 0.16460503,
|
||||
0.09720799, 0.02045547, 0.17267234, 0.0311527 , 0.0187955 ,
|
||||
0.15684317, 0.04111287, 0.03293737, 0.02104007, 0.02946178,
|
||||
0.02421701, 0.1353385 , 0.03203302, 0.04111287, 0.10778798,
|
||||
0.07083428, 0.06027915, 0.02164101, 0.06535882, 0.02946178,
|
||||
0.07275173, 0.02490638, 0.01678627, 0.30605146, 0.02164101,
|
||||
0.03482061, 0.03580075, 0.37030921, 0.0182721 , 0.03482061,
|
||||
0.07083428, 0.04226237, 0.03999333, 0.03580075, 0.03203302,
|
||||
0.0182721 , 0.03580075, 0.06027915, 0.03386636, 0.02946178,
|
||||
0.03029581, 0.0689636 , 0.02634295, 0.02634295, 0.03029581,
|
||||
0.02225873, 0.1353385 , 0.08989109, 0.01988679, 0.0526265 ,
|
||||
0.03386636, 0.03386636, 0.02786 , 0.03029581, 0.06535882,
|
||||
0.06535882, 0.03482061, 0.02786 , 0.29396682, 0.03293737,
|
||||
0.12242534, 0.04589793, 0.04589793, 0.03999333, 0.07471691,
|
||||
0.11344884, 0.05407977, 0.03482061, 0.01988679, 0.02045547,
|
||||
0.34389327, 0.14576223, 0.02561486, 0.0689636 , 0.02045547,
|
||||
0.02865003, 0.0526265 , 0.02164101, 0.01776301, 0.08307425,
|
||||
0.11344884, 0.04982997, 0.0182721 , 0.01498855, 0.02865003,
|
||||
0.14221564, 0.07879431, 0.02865003, 0.10237696, 0.04465416,
|
||||
0.07471691, 0.07673078, 0.13200634, 0.02104007, 0.0187955 ,
|
||||
0.01376599, 0.04717464, 0.01128289, 0.05710047, 0.01988679,
|
||||
0.01300612, 0.11936722, 0.03203302, 0.01726786, 0.04589793,
|
||||
0.05407977, 0.09976271, 0.02561486, 0.03999333, 0.02634295,
|
||||
0.03580075, 0.21771181, 0.1353385 , 0.01988679, 0.37704374,
|
||||
0.06027915, 0.02045547, 0.18104935, 0.03999333, 0.18104935,
|
||||
0.15684317, 0.01376599, 0.03293737, 0.08989109, 0.02709115,
|
||||
0.14221564, 0.27065598, 0.10237696, 0.04226237, 0.72991785,
|
||||
0.06713876, 0.04226237, 0.03482061, 0.07879431, 0.07471691,
|
||||
0.15307528, 0.02289366, 0.08989109, 0.02634295, 0.43243779,
|
||||
0.08756457, 0.03293737, 0.02786 , 0.03482061, 0.0187955 ,
|
||||
0.08307425, 0.04589793, 0.07275173, 0.0311527 , 0.04589793,
|
||||
0.08307425, 0.32468619, 0.02289366, 0.02634295, 0.03580075,
|
||||
0.14938186, 0.0526265 , 0.0526265 , 0.53268924, 0.19874565,
|
||||
0.0187955 , 0.01541937, 0.01586237, 0.02045547, 0.02421701,
|
||||
0.02634295, 0.11344884, 0.05710047, 0.05121018, 0.09720799,
|
||||
0.0311527 , 0.0526265 , 0.01586237, 0.07471691, 0.06027915,
|
||||
0.15684317, 0.07879431, 0.02289366, 0.04111287, 0.04848506,
|
||||
0.02865003, 0.04589793, 0.03580075, 0.04111287, 0.1353385 ,
|
||||
0.09976271, 0.06362285, 0.32468619, 0.09976271, 0.49676673,
|
||||
0.07879431, 0.06027915, 0.06027915, 0.05407977, 0.05710047,
|
||||
0.0689636 , 0.11936722, 0.18973955, 0.02709115, 0.03890304,
|
||||
0.02634295, 0.80625182, 0.04111287, 0.0311527 , 0.07879431,
|
||||
0.0193336 , 0.01988679, 0.01376599, 0.07879431, 0.05710047,
|
||||
0.06027915, 0.02104007, 0.0689636 , 0.04717464, 0.04465416,
|
||||
0.07083428, 0.03999333, 0.06192993, 0.05407977, 0.04982997,
|
||||
0.46087756, 0.09720799, 0.04589793, 0.07083428, 0.0193336 ,
|
||||
0.12242534, 0.12242534, 0.05407977, 0.01776301, 0.0311527 ,
|
||||
0.0689636 , 0.02421701, 0.13200634, 0.19874565, 0.03293737,
|
||||
0.82774282], atol=1.0e-8)
|
||||
self.assertAlmostEqual(results.pearson_chi2, 271.21110541713801)
|
||||
np.testing.assert_allclose(results.resid_response,
|
||||
[-0.04226237, -0.03999333, -0.02946178, -0.0689636 , -0.09471181,
|
||||
-0.07879431, -0.04717464, -0.27065598, -0.07471691, 0.10477856,
|
||||
-0.39752487, 0.66897282, -0.06192993, -0.04589793, -0.01988679,
|
||||
-0.0526265 , -0.02104007, -0.03386636, -0.02634295, -0.05121018,
|
||||
-0.29396682, 0.92724827, -0.03386636, -0.15307528, -0.06027915,
|
||||
-0.01631789, -0.02045547, -0.01541937, -0.2128508 , -0.04589793,
|
||||
-0.02104007, -0.05407977, -0.0311527 , -0.02634295, -0.01498855,
|
||||
-0.29396682, 0.79663224, -0.07275173, -0.11637537, 0.74604393,
|
||||
-0.64367488, -0.02634295, -0.02164101, -0.07083428, -0.05710047,
|
||||
-0.32468619, -0.01160845, -0.01631789, -0.28803008, -0.02634295,
|
||||
-0.17267234, -0.02634295, -0.01776301, -0.02709115, 0.85061814,
|
||||
0.73498669, -0.04111287, -0.06362285, -0.07083428, -0.07879431,
|
||||
0.91010891, -0.03680743, -0.0187955 , -0.01541937, -0.03680743,
|
||||
-0.03029581, -0.02634295, -0.03029581, -0.07471691, -0.01228768,
|
||||
0.76722803, -0.10505173, -0.06192993, -0.09720799, -0.01416217,
|
||||
-0.0187955 , -0.0689636 , -0.02865003, -0.07471691, -0.16460503,
|
||||
-0.09720799, -0.02045547, 0.82732766, -0.0311527 , -0.0187955 ,
|
||||
-0.15684317, -0.04111287, -0.03293737, -0.02104007, -0.02946178,
|
||||
-0.02421701, -0.1353385 , -0.03203302, -0.04111287, -0.10778798,
|
||||
-0.07083428, -0.06027915, -0.02164101, -0.06535882, -0.02946178,
|
||||
-0.07275173, -0.02490638, -0.01678627, -0.30605146, -0.02164101,
|
||||
-0.03482061, -0.03580075, 0.62969079, -0.0182721 , -0.03482061,
|
||||
-0.07083428, -0.04226237, -0.03999333, -0.03580075, -0.03203302,
|
||||
-0.0182721 , -0.03580075, -0.06027915, -0.03386636, -0.02946178,
|
||||
-0.03029581, -0.0689636 , -0.02634295, -0.02634295, -0.03029581,
|
||||
-0.02225873, -0.1353385 , -0.08989109, -0.01988679, -0.0526265 ,
|
||||
-0.03386636, -0.03386636, -0.02786 , -0.03029581, -0.06535882,
|
||||
-0.06535882, -0.03482061, -0.02786 , -0.29396682, -0.03293737,
|
||||
-0.12242534, -0.04589793, -0.04589793, -0.03999333, -0.07471691,
|
||||
-0.11344884, -0.05407977, -0.03482061, -0.01988679, -0.02045547,
|
||||
0.65610673, 0.85423777, -0.02561486, -0.0689636 , -0.02045547,
|
||||
-0.02865003, -0.0526265 , -0.02164101, -0.01776301, -0.08307425,
|
||||
-0.11344884, -0.04982997, -0.0182721 , -0.01498855, -0.02865003,
|
||||
-0.14221564, -0.07879431, -0.02865003, -0.10237696, -0.04465416,
|
||||
-0.07471691, -0.07673078, -0.13200634, -0.02104007, -0.0187955 ,
|
||||
-0.01376599, -0.04717464, -0.01128289, 0.94289953, -0.01988679,
|
||||
-0.01300612, -0.11936722, -0.03203302, -0.01726786, -0.04589793,
|
||||
-0.05407977, -0.09976271, -0.02561486, -0.03999333, -0.02634295,
|
||||
-0.03580075, -0.21771181, 0.8646615 , -0.01988679, 0.62295626,
|
||||
-0.06027915, -0.02045547, -0.18104935, 0.96000667, -0.18104935,
|
||||
-0.15684317, -0.01376599, -0.03293737, -0.08989109, -0.02709115,
|
||||
-0.14221564, 0.72934402, -0.10237696, -0.04226237, -0.72991785,
|
||||
-0.06713876, -0.04226237, -0.03482061, -0.07879431, -0.07471691,
|
||||
-0.15307528, 0.97710634, 0.91010891, -0.02634295, -0.43243779,
|
||||
-0.08756457, -0.03293737, -0.02786 , -0.03482061, -0.0187955 ,
|
||||
0.91692575, -0.04589793, -0.07275173, -0.0311527 , -0.04589793,
|
||||
-0.08307425, 0.67531381, -0.02289366, -0.02634295, -0.03580075,
|
||||
-0.14938186, -0.0526265 , -0.0526265 , 0.46731076, -0.19874565,
|
||||
-0.0187955 , -0.01541937, -0.01586237, -0.02045547, -0.02421701,
|
||||
-0.02634295, -0.11344884, -0.05710047, -0.05121018, -0.09720799,
|
||||
0.9688473 , -0.0526265 , -0.01586237, -0.07471691, -0.06027915,
|
||||
-0.15684317, -0.07879431, -0.02289366, -0.04111287, -0.04848506,
|
||||
-0.02865003, -0.04589793, -0.03580075, -0.04111287, -0.1353385 ,
|
||||
-0.09976271, -0.06362285, 0.67531381, -0.09976271, -0.49676673,
|
||||
-0.07879431, -0.06027915, -0.06027915, -0.05407977, -0.05710047,
|
||||
-0.0689636 , -0.11936722, -0.18973955, -0.02709115, -0.03890304,
|
||||
-0.02634295, 0.19374818, -0.04111287, -0.0311527 , -0.07879431,
|
||||
-0.0193336 , -0.01988679, -0.01376599, -0.07879431, 0.94289953,
|
||||
-0.06027915, -0.02104007, -0.0689636 , -0.04717464, -0.04465416,
|
||||
0.92916572, -0.03999333, -0.06192993, -0.05407977, -0.04982997,
|
||||
-0.46087756, -0.09720799, -0.04589793, -0.07083428, -0.0193336 ,
|
||||
-0.12242534, -0.12242534, -0.05407977, -0.01776301, -0.0311527 ,
|
||||
-0.0689636 , -0.02421701, -0.13200634, -0.19874565, -0.03293737,
|
||||
-0.82774282], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.resid_working,
|
||||
[ -1.71062283e-03, -1.53549840e-03, -8.42423701e-04,
|
||||
-4.42798906e-03, -8.12073047e-03, -5.71934606e-03,
|
||||
-2.12046213e-03, -5.34278480e-02, -5.16550074e-03,
|
||||
9.82823035e-03, -9.52067472e-02, 1.48142818e-01,
|
||||
-3.59779501e-03, -2.00993083e-03, -3.87619325e-04,
|
||||
-2.62379729e-03, -4.33370579e-04, -1.10808799e-03,
|
||||
-6.75670103e-04, -2.48818484e-03, -6.10129090e-02,
|
||||
6.25511612e-02, -1.10808799e-03, -1.98451739e-02,
|
||||
-3.41454749e-03, -2.61928659e-04, -4.09867263e-04,
|
||||
-2.34090923e-04, -3.56621577e-02, -2.00993083e-03,
|
||||
-4.33370579e-04, -2.76645832e-03, -9.40257152e-04,
|
||||
-6.75670103e-04, -2.21289369e-04, -6.10129090e-02,
|
||||
1.29061842e-01, -4.90775251e-03, -1.19671283e-02,
|
||||
1.41347263e-01, -1.47631680e-01, -6.75670103e-04,
|
||||
-4.58198217e-04, -4.66208406e-03, -3.07429001e-03,
|
||||
-7.11923401e-02, -1.33191898e-04, -2.61928659e-04,
|
||||
-5.90659690e-02, -6.75670103e-04, -2.46673839e-02,
|
||||
-6.75670103e-04, -3.09919962e-04, -7.14047519e-04,
|
||||
1.08085429e-01, 1.43161630e-01, -1.62077632e-03,
|
||||
-3.79032977e-03, -4.66208406e-03, -5.71934606e-03,
|
||||
7.44566288e-02, -1.30492035e-03, -3.46630910e-04,
|
||||
-2.34090923e-04, -1.30492035e-03, -8.90029618e-04,
|
||||
-6.75670103e-04, -8.90029618e-04, -5.16550074e-03,
|
||||
-1.49131762e-04, 1.37018624e-01, -9.87652847e-03,
|
||||
-3.59779501e-03, -8.53083698e-03, -1.97726627e-04,
|
||||
-3.46630910e-04, -4.42798906e-03, -7.97307494e-04,
|
||||
-5.16550074e-03, -2.26348718e-02, -8.53083698e-03,
|
||||
-4.09867263e-04, 1.18189219e-01, -9.40257152e-04,
|
||||
-3.46630910e-04, -2.07414715e-02, -1.62077632e-03,
|
||||
-1.04913757e-03, -4.33370579e-04, -8.42423701e-04,
|
||||
-5.72261321e-04, -1.58375811e-02, -9.93244730e-04,
|
||||
-1.62077632e-03, -1.03659408e-02, -4.66208406e-03,
|
||||
-3.41454749e-03, -4.58198217e-04, -3.99257703e-03,
|
||||
-8.42423701e-04, -4.90775251e-03, -6.04877746e-04,
|
||||
-2.77048947e-04, -6.50004229e-02, -4.58198217e-04,
|
||||
-1.17025566e-03, -1.23580799e-03, 1.46831486e-01,
|
||||
-3.27769165e-04, -1.17025566e-03, -4.66208406e-03,
|
||||
-1.71062283e-03, -1.53549840e-03, -1.23580799e-03,
|
||||
-9.93244730e-04, -3.27769165e-04, -1.23580799e-03,
|
||||
-3.41454749e-03, -1.10808799e-03, -8.42423701e-04,
|
||||
-8.90029618e-04, -4.42798906e-03, -6.75670103e-04,
|
||||
-6.75670103e-04, -8.90029618e-04, -4.84422741e-04,
|
||||
-1.58375811e-02, -7.35405096e-03, -3.87619325e-04,
|
||||
-2.62379729e-03, -1.10808799e-03, -1.10808799e-03,
|
||||
-7.54555329e-04, -8.90029618e-04, -3.99257703e-03,
|
||||
-3.99257703e-03, -1.17025566e-03, -7.54555329e-04,
|
||||
-6.10129090e-02, -1.04913757e-03, -1.31530576e-02,
|
||||
-2.00993083e-03, -2.00993083e-03, -1.53549840e-03,
|
||||
-5.16550074e-03, -1.14104800e-02, -2.76645832e-03,
|
||||
-1.17025566e-03, -3.87619325e-04, -4.09867263e-04,
|
||||
1.48037813e-01, 1.06365931e-01, -6.39314594e-04,
|
||||
-4.42798906e-03, -4.09867263e-04, -7.97307494e-04,
|
||||
-2.62379729e-03, -4.58198217e-04, -3.09919962e-04,
|
||||
-6.32800839e-03, -1.14104800e-02, -2.35929680e-03,
|
||||
-3.27769165e-04, -2.21289369e-04, -7.97307494e-04,
|
||||
-1.73489362e-02, -5.71934606e-03, -7.97307494e-04,
|
||||
-9.40802551e-03, -1.90495384e-03, -5.16550074e-03,
|
||||
-5.43585191e-03, -1.51253748e-02, -4.33370579e-04,
|
||||
-3.46630910e-04, -1.86893696e-04, -2.12046213e-03,
|
||||
-1.25867293e-04, 5.07657192e-02, -3.87619325e-04,
|
||||
-1.66959104e-04, -1.25477263e-02, -9.93244730e-04,
|
||||
-2.93030065e-04, -2.00993083e-03, -2.76645832e-03,
|
||||
-8.95970087e-03, -6.39314594e-04, -1.53549840e-03,
|
||||
-6.75670103e-04, -1.23580799e-03, -3.70792339e-02,
|
||||
1.01184411e-01, -3.87619325e-04, 1.46321062e-01,
|
||||
-3.41454749e-03, -4.09867263e-04, -2.68442736e-02,
|
||||
3.68583645e-02, -2.68442736e-02, -2.07414715e-02,
|
||||
-1.86893696e-04, -1.04913757e-03, -7.35405096e-03,
|
||||
-7.14047519e-04, -1.73489362e-02, 1.43973473e-01,
|
||||
-9.40802551e-03, -1.71062283e-03, -1.43894386e-01,
|
||||
-4.20497779e-03, -1.71062283e-03, -1.17025566e-03,
|
||||
-5.71934606e-03, -5.16550074e-03, -1.98451739e-02,
|
||||
2.18574168e-02, 7.44566288e-02, -6.75670103e-04,
|
||||
-1.06135519e-01, -6.99614755e-03, -1.04913757e-03,
|
||||
-7.54555329e-04, -1.17025566e-03, -3.46630910e-04,
|
||||
6.98449121e-02, -2.00993083e-03, -4.90775251e-03,
|
||||
-9.40257152e-04, -2.00993083e-03, -6.32800839e-03,
|
||||
1.48072729e-01, -5.12120512e-04, -6.75670103e-04,
|
||||
-1.23580799e-03, -1.89814939e-02, -2.62379729e-03,
|
||||
-2.62379729e-03, 1.16328328e-01, -3.16494123e-02,
|
||||
-3.46630910e-04, -2.34090923e-04, -2.47623705e-04,
|
||||
-4.09867263e-04, -5.72261321e-04, -6.75670103e-04,
|
||||
-1.14104800e-02, -3.07429001e-03, -2.48818484e-03,
|
||||
-8.53083698e-03, 2.92419496e-02, -2.62379729e-03,
|
||||
-2.47623705e-04, -5.16550074e-03, -3.41454749e-03,
|
||||
-2.07414715e-02, -5.71934606e-03, -5.12120512e-04,
|
||||
-1.62077632e-03, -2.23682205e-03, -7.97307494e-04,
|
||||
-2.00993083e-03, -1.23580799e-03, -1.62077632e-03,
|
||||
-1.58375811e-02, -8.95970087e-03, -3.79032977e-03,
|
||||
1.48072729e-01, -8.95970087e-03, -1.24186489e-01,
|
||||
-5.71934606e-03, -3.41454749e-03, -3.41454749e-03,
|
||||
-2.76645832e-03, -3.07429001e-03, -4.42798906e-03,
|
||||
-1.25477263e-02, -2.91702648e-02, -7.14047519e-04,
|
||||
-1.45456868e-03, -6.75670103e-04, 3.02653681e-02,
|
||||
-1.62077632e-03, -9.40257152e-04, -5.71934606e-03,
|
||||
-3.66561274e-04, -3.87619325e-04, -1.86893696e-04,
|
||||
-5.71934606e-03, 5.07657192e-02, -3.41454749e-03,
|
||||
-4.33370579e-04, -4.42798906e-03, -2.12046213e-03,
|
||||
-1.90495384e-03, 6.11546973e-02, -1.53549840e-03,
|
||||
-3.59779501e-03, -2.76645832e-03, -2.35929680e-03,
|
||||
-1.14513988e-01, -8.53083698e-03, -2.00993083e-03,
|
||||
-4.66208406e-03, -3.66561274e-04, -1.31530576e-02,
|
||||
-1.31530576e-02, -2.76645832e-03, -3.09919962e-04,
|
||||
-9.40257152e-04, -4.42798906e-03, -5.72261321e-04,
|
||||
-1.51253748e-02, -3.16494123e-02, -1.04913757e-03,
|
||||
-1.18023417e-01])
|
||||
np.testing.assert_allclose(results.resid_pearson,
|
||||
[-0.21006498, -0.20410641, -0.17423009, -0.27216147, -0.3234511 ,
|
||||
-0.29246179, -0.22250903, -0.60917574, -0.28416602, 0.3421141 ,
|
||||
-0.81229277, 1.42158361, -0.25694055, -0.21933056, -0.142444 ,
|
||||
-0.23569027, -0.14660243, -0.18722578, -0.16448609, -0.2323235 ,
|
||||
-0.64526275, 3.57006696, -0.18722578, -0.42513819, -0.25327023,
|
||||
-0.12879668, -0.14450826, -0.12514332, -0.5200069 , -0.21933056,
|
||||
-0.14660243, -0.23910582, -0.17931646, -0.16448609, -0.12335569,
|
||||
-0.64526275, 1.97919183, -0.28010679, -0.36290807, 1.71396874,
|
||||
-1.3440334 , -0.16448609, -0.14872695, -0.27610555, -0.24608613,
|
||||
-0.69339243, -0.1083734 , -0.12879668, -0.63604537, -0.16448609,
|
||||
-0.45684893, -0.16448609, -0.13447767, -0.16686977, 2.3862634 ,
|
||||
1.66535145, -0.20706426, -0.26066405, -0.27610555, -0.29246179,
|
||||
3.18191348, -0.19548397, -0.13840353, -0.12514332, -0.19548397,
|
||||
-0.17675498, -0.16448609, -0.17675498, -0.28416602, -0.11153719,
|
||||
1.81550268, -0.34261205, -0.25694055, -0.32813846, -0.11985666,
|
||||
-0.13840353, -0.27216147, -0.17174127, -0.28416602, -0.44389026,
|
||||
-0.32813846, -0.14450826, 2.18890738, -0.17931646, -0.13840353,
|
||||
-0.43129917, -0.20706426, -0.18455132, -0.14660243, -0.17423009,
|
||||
-0.1575374 , -0.39562855, -0.18191506, -0.20706426, -0.34757708,
|
||||
-0.27610555, -0.25327023, -0.14872695, -0.26444152, -0.17423009,
|
||||
-0.28010679, -0.15982038, -0.13066317, -0.66410018, -0.14872695,
|
||||
-0.189939 , -0.19269154, 1.30401147, -0.13642648, -0.189939 ,
|
||||
-0.27610555, -0.21006498, -0.20410641, -0.19269154, -0.18191506,
|
||||
-0.13642648, -0.19269154, -0.25327023, -0.18722578, -0.17423009,
|
||||
-0.17675498, -0.27216147, -0.16448609, -0.16448609, -0.17675498,
|
||||
-0.15088226, -0.39562855, -0.3142763 , -0.142444 , -0.23569027,
|
||||
-0.18722578, -0.18722578, -0.169288 , -0.17675498, -0.26444152,
|
||||
-0.26444152, -0.189939 , -0.169288 , -0.64526275, -0.18455132,
|
||||
-0.3735026 , -0.21933056, -0.21933056, -0.20410641, -0.28416602,
|
||||
-0.35772404, -0.23910582, -0.189939 , -0.142444 , -0.14450826,
|
||||
1.38125991, 2.42084442, -0.16213645, -0.27216147, -0.14450826,
|
||||
-0.17174127, -0.23569027, -0.14872695, -0.13447767, -0.30099975,
|
||||
-0.35772404, -0.22900483, -0.13642648, -0.12335569, -0.17174127,
|
||||
-0.4071783 , -0.29246179, -0.17174127, -0.33771794, -0.21619749,
|
||||
-0.28416602, -0.28828407, -0.38997712, -0.14660243, -0.13840353,
|
||||
-0.11814455, -0.22250903, -0.10682532, 4.06361781, -0.142444 ,
|
||||
-0.11479334, -0.36816723, -0.18191506, -0.1325567 , -0.21933056,
|
||||
-0.23910582, -0.33289374, -0.16213645, -0.20410641, -0.16448609,
|
||||
-0.19269154, -0.52754269, 2.52762346, -0.142444 , 1.28538406,
|
||||
-0.25327023, -0.14450826, -0.47018591, 4.89940505, -0.47018591,
|
||||
-0.43129917, -0.11814455, -0.18455132, -0.3142763 , -0.16686977,
|
||||
-0.4071783 , 1.64156241, -0.33771794, -0.21006498, -1.6439517 ,
|
||||
-0.26827373, -0.21006498, -0.189939 , -0.29246179, -0.28416602,
|
||||
-0.42513819, 6.53301013, 3.18191348, -0.16448609, -0.87288109,
|
||||
-0.30978696, -0.18455132, -0.169288 , -0.189939 , -0.13840353,
|
||||
3.32226189, -0.21933056, -0.28010679, -0.17931646, -0.21933056,
|
||||
-0.30099975, 1.44218477, -0.1530688 , -0.16448609, -0.19269154,
|
||||
-0.41906522, -0.23569027, -0.23569027, 0.93662539, -0.4980393 ,
|
||||
-0.13840353, -0.12514332, -0.12695686, -0.14450826, -0.1575374 ,
|
||||
-0.16448609, -0.35772404, -0.24608613, -0.2323235 , -0.32813846,
|
||||
5.57673284, -0.23569027, -0.12695686, -0.28416602, -0.25327023,
|
||||
-0.43129917, -0.29246179, -0.1530688 , -0.20706426, -0.22573357,
|
||||
-0.17174127, -0.21933056, -0.19269154, -0.20706426, -0.39562855,
|
||||
-0.33289374, -0.26066405, 1.44218477, -0.33289374, -0.99355423,
|
||||
-0.29246179, -0.25327023, -0.25327023, -0.23910582, -0.24608613,
|
||||
-0.27216147, -0.36816723, -0.48391225, -0.16686977, -0.20119082,
|
||||
-0.16448609, 0.49021146, -0.20706426, -0.17931646, -0.29246179,
|
||||
-0.14040923, -0.142444 , -0.11814455, -0.29246179, 4.06361781,
|
||||
-0.25327023, -0.14660243, -0.27216147, -0.22250903, -0.21619749,
|
||||
3.6218033 , -0.20410641, -0.25694055, -0.23910582, -0.22900483,
|
||||
-0.92458976, -0.32813846, -0.21933056, -0.27610555, -0.14040923,
|
||||
-0.3735026 , -0.3735026 , -0.23910582, -0.13447767, -0.17931646,
|
||||
-0.27216147, -0.1575374 , -0.38997712, -0.4980393 , -0.18455132,
|
||||
-2.19209332])
|
||||
np.testing.assert_allclose(results.resid_anscombe,
|
||||
[-0.31237627, -0.3036605 , -0.25978208, -0.40240831, -0.47552289,
|
||||
-0.43149255, -0.33053793, -0.85617194, -0.41962951, 0.50181328,
|
||||
-1.0954382 , 1.66940149, -0.38048321, -0.3259044 , -0.21280762,
|
||||
-0.34971301, -0.21896842, -0.27890356, -0.2454118 , -0.34482158,
|
||||
-0.90063409, 2.80452413, -0.27890356, -0.61652596, -0.37518169,
|
||||
-0.19255932, -0.2158664 , -0.18713159, -0.74270558, -0.3259044 ,
|
||||
-0.21896842, -0.35467084, -0.2672722 , -0.2454118 , -0.18447466,
|
||||
-0.90063409, 2.05763941, -0.41381347, -0.53089521, 1.88552083,
|
||||
-1.60654218, -0.2454118 , -0.22211425, -0.40807333, -0.3647888 ,
|
||||
-0.95861559, -0.16218047, -0.19255932, -0.88935802, -0.2454118 ,
|
||||
-0.65930821, -0.2454118 , -0.20099345, -0.24892975, 2.28774016,
|
||||
1.85167195, -0.30798858, -0.38585584, -0.40807333, -0.43149255,
|
||||
2.65398426, -0.2910267 , -0.20681747, -0.18713159, -0.2910267 ,
|
||||
-0.26350118, -0.2454118 , -0.26350118, -0.41962951, -0.16689207,
|
||||
1.95381191, -0.50251231, -0.38048321, -0.48214234, -0.17927213,
|
||||
-0.20681747, -0.40240831, -0.25611424, -0.41962951, -0.64189694,
|
||||
-0.48214234, -0.2158664 , 2.18071204, -0.2672722 , -0.20681747,
|
||||
-0.62488429, -0.30798858, -0.27497271, -0.21896842, -0.25978208,
|
||||
-0.23514749, -0.57618899, -0.27109582, -0.30798858, -0.50947546,
|
||||
-0.40807333, -0.37518169, -0.22211425, -0.39130036, -0.25978208,
|
||||
-0.41381347, -0.2385213 , -0.19533116, -0.92350689, -0.22211425,
|
||||
-0.28288904, -0.28692985, 1.5730846 , -0.20388497, -0.28288904,
|
||||
-0.40807333, -0.31237627, -0.3036605 , -0.28692985, -0.27109582,
|
||||
-0.20388497, -0.28692985, -0.37518169, -0.27890356, -0.25978208,
|
||||
-0.26350118, -0.40240831, -0.2454118 , -0.2454118 , -0.26350118,
|
||||
-0.22530448, -0.57618899, -0.46253505, -0.21280762, -0.34971301,
|
||||
-0.27890356, -0.27890356, -0.25249702, -0.26350118, -0.39130036,
|
||||
-0.39130036, -0.28288904, -0.25249702, -0.90063409, -0.27497271,
|
||||
-0.5456246 , -0.3259044 , -0.3259044 , -0.3036605 , -0.41962951,
|
||||
-0.52366614, -0.35467084, -0.28288904, -0.21280762, -0.2158664 ,
|
||||
1.63703418, 2.30570989, -0.24194253, -0.40240831, -0.2158664 ,
|
||||
-0.25611424, -0.34971301, -0.22211425, -0.20099345, -0.44366892,
|
||||
-0.52366614, -0.33999576, -0.20388497, -0.18447466, -0.25611424,
|
||||
-0.59203547, -0.43149255, -0.25611424, -0.49563627, -0.32133344,
|
||||
-0.41962951, -0.42552227, -0.56840788, -0.21896842, -0.20681747,
|
||||
-0.17672552, -0.33053793, -0.15987433, 2.9768074 , -0.21280762,
|
||||
-0.17173916, -0.53821445, -0.27109582, -0.19814236, -0.3259044 ,
|
||||
-0.35467084, -0.48884654, -0.24194253, -0.3036605 , -0.2454118 ,
|
||||
-0.28692985, -0.75249089, 2.35983933, -0.21280762, 1.55726719,
|
||||
-0.37518169, -0.2158664 , -0.67712261, 3.23165236, -0.67712261,
|
||||
-0.62488429, -0.17672552, -0.27497271, -0.46253505, -0.24892975,
|
||||
-0.59203547, 1.83482464, -0.49563627, -0.31237627, -1.83652534,
|
||||
-0.39681759, -0.31237627, -0.28288904, -0.43149255, -0.41962951,
|
||||
-0.61652596, 3.63983609, 2.65398426, -0.2454118 , -1.16171662,
|
||||
-0.45616505, -0.27497271, -0.25249702, -0.28288904, -0.20681747,
|
||||
2.71015945, -0.3259044 , -0.41381347, -0.2672722 , -0.3259044 ,
|
||||
-0.44366892, 1.68567947, -0.22853969, -0.2454118 , -0.28692985,
|
||||
-0.60826548, -0.34971301, -0.34971301, 1.2290223 , -0.71397735,
|
||||
-0.20681747, -0.18713159, -0.1898263 , -0.2158664 , -0.23514749,
|
||||
-0.2454118 , -0.52366614, -0.3647888 , -0.34482158, -0.48214234,
|
||||
3.41271513, -0.34971301, -0.1898263 , -0.41962951, -0.37518169,
|
||||
-0.62488429, -0.43149255, -0.22853969, -0.30798858, -0.3352348 ,
|
||||
-0.25611424, -0.3259044 , -0.28692985, -0.30798858, -0.57618899,
|
||||
-0.48884654, -0.38585584, 1.68567947, -0.48884654, -1.28709718,
|
||||
-0.43149255, -0.37518169, -0.37518169, -0.35467084, -0.3647888 ,
|
||||
-0.40240831, -0.53821445, -0.69534436, -0.24892975, -0.29939131,
|
||||
-0.2454118 , 0.70366797, -0.30798858, -0.2672722 , -0.43149255,
|
||||
-0.2097915 , -0.21280762, -0.17672552, -0.43149255, 2.9768074 ,
|
||||
-0.37518169, -0.21896842, -0.40240831, -0.33053793, -0.32133344,
|
||||
2.82351017, -0.3036605 , -0.38048321, -0.35467084, -0.33999576,
|
||||
-1.21650102, -0.48214234, -0.3259044 , -0.40807333, -0.2097915 ,
|
||||
-0.5456246 , -0.5456246 , -0.35467084, -0.20099345, -0.2672722 ,
|
||||
-0.40240831, -0.23514749, -0.56840788, -0.71397735, -0.27497271,
|
||||
-2.18250381])
|
||||
np.testing.assert_allclose(results.resid_deviance,
|
||||
[-0.29387552, -0.2857098 , -0.24455876, -0.37803944, -0.44609851,
|
||||
-0.40514674, -0.31088148, -0.79449324, -0.39409528, 0.47049798,
|
||||
-1.00668653, 1.48698001, -0.35757692, -0.30654405, -0.20043547,
|
||||
-0.32882173, -0.20622595, -0.26249995, -0.23106769, -0.32424676,
|
||||
-0.83437766, 2.28941155, -0.26249995, -0.57644334, -0.35262564,
|
||||
-0.18139734, -0.20331052, -0.17629229, -0.69186337, -0.30654405,
|
||||
-0.20622595, -0.33345774, -0.251588 , -0.23106769, -0.17379306,
|
||||
-0.83437766, 1.78479093, -0.38867448, -0.4974393 , 1.65565332,
|
||||
-1.43660134, -0.23106769, -0.20918228, -0.38332275, -0.34291558,
|
||||
-0.88609006, -0.15281596, -0.18139734, -0.82428104, -0.23106769,
|
||||
-0.61571821, -0.23106769, -0.18932865, -0.234371 , 1.94999969,
|
||||
1.62970871, -0.2897651 , -0.36259328, -0.38332275, -0.40514674,
|
||||
2.19506559, -0.27386827, -0.19480442, -0.17629229, -0.27386827,
|
||||
-0.24804925, -0.23106769, -0.24804925, -0.39409528, -0.15725009,
|
||||
1.7074519 , -0.47114617, -0.35757692, -0.4522457 , -0.16889886,
|
||||
-0.19480442, -0.37803944, -0.24111595, -0.39409528, -0.59975102,
|
||||
-0.4522457 , -0.20331052, 1.87422489, -0.251588 , -0.19480442,
|
||||
-0.5841272 , -0.2897651 , -0.25881274, -0.20622595, -0.24455876,
|
||||
-0.22142749, -0.53929061, -0.25517563, -0.2897651 , -0.47760126,
|
||||
-0.38332275, -0.35262564, -0.20918228, -0.36767536, -0.24455876,
|
||||
-0.38867448, -0.2245965 , -0.18400413, -0.85481866, -0.20918228,
|
||||
-0.26623785, -0.27002708, 1.40955093, -0.19204738, -0.26623785,
|
||||
-0.38332275, -0.29387552, -0.2857098 , -0.27002708, -0.25517563,
|
||||
-0.19204738, -0.27002708, -0.35262564, -0.26249995, -0.24455876,
|
||||
-0.24804925, -0.37803944, -0.23106769, -0.23106769, -0.24804925,
|
||||
-0.21218006, -0.53929061, -0.43402996, -0.20043547, -0.32882173,
|
||||
-0.26249995, -0.26249995, -0.23772023, -0.24804925, -0.36767536,
|
||||
-0.36767536, -0.26623785, -0.23772023, -0.83437766, -0.25881274,
|
||||
-0.51106408, -0.30654405, -0.30654405, -0.2857098 , -0.39409528,
|
||||
-0.49074728, -0.33345774, -0.26623785, -0.20043547, -0.20331052,
|
||||
1.46111186, 1.96253843, -0.22780971, -0.37803944, -0.20331052,
|
||||
-0.24111595, -0.32882173, -0.20918228, -0.18932865, -0.41648237,
|
||||
-0.49074728, -0.31973217, -0.19204738, -0.17379306, -0.24111595,
|
||||
-0.55389988, -0.40514674, -0.24111595, -0.46476893, -0.30226435,
|
||||
-0.39409528, -0.39958581, -0.53211065, -0.20622595, -0.19480442,
|
||||
-0.16650295, -0.31088148, -0.15064545, 2.39288231, -0.20043547,
|
||||
-0.16181126, -0.5042114 , -0.25517563, -0.18664773, -0.30654405,
|
||||
-0.33345774, -0.45846897, -0.22780971, -0.2857098 , -0.23106769,
|
||||
-0.27002708, -0.7007597 , 1.99998811, -0.20043547, 1.39670618,
|
||||
-0.35262564, -0.20331052, -0.63203077, 2.53733821, -0.63203077,
|
||||
-0.5841272 , -0.16650295, -0.25881274, -0.43402996, -0.234371 ,
|
||||
-0.55389988, 1.61672923, -0.46476893, -0.29387552, -1.61804148,
|
||||
-0.37282386, -0.29387552, -0.26623785, -0.40514674, -0.39409528,
|
||||
-0.57644334, 2.74841605, 2.19506559, -0.23106769, -1.06433539,
|
||||
-0.42810736, -0.25881274, -0.23772023, -0.26623785, -0.19480442,
|
||||
2.23070414, -0.30654405, -0.38867448, -0.251588 , -0.30654405,
|
||||
-0.41648237, 1.49993075, -0.21521982, -0.23106769, -0.27002708,
|
||||
-0.5688444 , -0.32882173, -0.32882173, 1.12233423, -0.66569789,
|
||||
-0.19480442, -0.17629229, -0.17882689, -0.20331052, -0.22142749,
|
||||
-0.23106769, -0.49074728, -0.34291558, -0.32424676, -0.4522457 ,
|
||||
2.63395309, -0.32882173, -0.17882689, -0.39409528, -0.35262564,
|
||||
-0.5841272 , -0.40514674, -0.21521982, -0.2897651 , -0.3152773 ,
|
||||
-0.24111595, -0.30654405, -0.27002708, -0.2897651 , -0.53929061,
|
||||
-0.45846897, -0.36259328, 1.49993075, -0.45846897, -1.17192274,
|
||||
-0.40514674, -0.35262564, -0.35262564, -0.33345774, -0.34291558,
|
||||
-0.37803944, -0.5042114 , -0.64869028, -0.234371 , -0.28170899,
|
||||
-0.23106769, 0.65629132, -0.2897651 , -0.251588 , -0.40514674,
|
||||
-0.19760028, -0.20043547, -0.16650295, -0.40514674, 2.39288231,
|
||||
-0.35262564, -0.20622595, -0.37803944, -0.31088148, -0.30226435,
|
||||
2.30104857, -0.2857098 , -0.35757692, -0.33345774, -0.31973217,
|
||||
-1.11158678, -0.4522457 , -0.30654405, -0.38332275, -0.19760028,
|
||||
-0.51106408, -0.51106408, -0.33345774, -0.18932865, -0.251588 ,
|
||||
-0.37803944, -0.22142749, -0.53211065, -0.66569789, -0.25881274,
|
||||
-1.87550882])
|
||||
np.testing.assert_allclose(results.null,
|
||||
[ 0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759])
|
||||
self.assertAlmostEqual(results.D2, .200712816165)
|
||||
self.assertAlmostEqual(results.adj_D2, 0.19816731557930456)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
350
src/py/crankshaft/crankshaft/regression/glm/utils.py
Normal file
350
src/py/crankshaft/crankshaft/regression/glm/utils.py
Normal file
@ -0,0 +1,350 @@
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
|
||||
def _bit_length_26(x):
|
||||
if x == 0:
|
||||
return 0
|
||||
elif x == 1:
|
||||
return 1
|
||||
else:
|
||||
return len(bin(x)) - 2
|
||||
|
||||
|
||||
try:
|
||||
from scipy.lib._version import NumpyVersion
|
||||
except ImportError:
|
||||
import re
|
||||
string_types = basestring
|
||||
|
||||
class NumpyVersion():
|
||||
"""Parse and compare numpy version strings.
|
||||
Numpy has the following versioning scheme (numbers given are examples; they
|
||||
can be >9) in principle):
|
||||
- Released version: '1.8.0', '1.8.1', etc.
|
||||
- Alpha: '1.8.0a1', '1.8.0a2', etc.
|
||||
- Beta: '1.8.0b1', '1.8.0b2', etc.
|
||||
- Release candidates: '1.8.0rc1', '1.8.0rc2', etc.
|
||||
- Development versions: '1.8.0.dev-f1234afa' (git commit hash appended)
|
||||
- Development versions after a1: '1.8.0a1.dev-f1234afa',
|
||||
'1.8.0b2.dev-f1234afa',
|
||||
'1.8.1rc1.dev-f1234afa', etc.
|
||||
- Development versions (no git hash available): '1.8.0.dev-Unknown'
|
||||
Comparing needs to be done against a valid version string or other
|
||||
`NumpyVersion` instance.
|
||||
Parameters
|
||||
----------
|
||||
vstring : str
|
||||
Numpy version string (``np.__version__``).
|
||||
Notes
|
||||
-----
|
||||
All dev versions of the same (pre-)release compare equal.
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.lib._version import NumpyVersion
|
||||
>>> if NumpyVersion(np.__version__) < '1.7.0':
|
||||
... print('skip')
|
||||
skip
|
||||
>>> NumpyVersion('1.7') # raises ValueError, add ".0"
|
||||
"""
|
||||
|
||||
def __init__(self, vstring):
|
||||
self.vstring = vstring
|
||||
ver_main = re.match(r'\d[.]\d+[.]\d+', vstring)
|
||||
if not ver_main:
|
||||
raise ValueError("Not a valid numpy version string")
|
||||
|
||||
self.version = ver_main.group()
|
||||
self.major, self.minor, self.bugfix = [int(x) for x in
|
||||
self.version.split('.')]
|
||||
if len(vstring) == ver_main.end():
|
||||
self.pre_release = 'final'
|
||||
else:
|
||||
alpha = re.match(r'a\d', vstring[ver_main.end():])
|
||||
beta = re.match(r'b\d', vstring[ver_main.end():])
|
||||
rc = re.match(r'rc\d', vstring[ver_main.end():])
|
||||
pre_rel = [m for m in [alpha, beta, rc] if m is not None]
|
||||
if pre_rel:
|
||||
self.pre_release = pre_rel[0].group()
|
||||
else:
|
||||
self.pre_release = ''
|
||||
|
||||
self.is_devversion = bool(re.search(r'.dev-', vstring))
|
||||
|
||||
def _compare_version(self, other):
|
||||
"""Compare major.minor.bugfix"""
|
||||
if self.major == other.major:
|
||||
if self.minor == other.minor:
|
||||
if self.bugfix == other.bugfix:
|
||||
vercmp = 0
|
||||
elif self.bugfix > other.bugfix:
|
||||
vercmp = 1
|
||||
else:
|
||||
vercmp = -1
|
||||
elif self.minor > other.minor:
|
||||
vercmp = 1
|
||||
else:
|
||||
vercmp = -1
|
||||
elif self.major > other.major:
|
||||
vercmp = 1
|
||||
else:
|
||||
vercmp = -1
|
||||
|
||||
return vercmp
|
||||
|
||||
def _compare_pre_release(self, other):
|
||||
"""Compare alpha/beta/rc/final."""
|
||||
if self.pre_release == other.pre_release:
|
||||
vercmp = 0
|
||||
elif self.pre_release == 'final':
|
||||
vercmp = 1
|
||||
elif other.pre_release == 'final':
|
||||
vercmp = -1
|
||||
elif self.pre_release > other.pre_release:
|
||||
vercmp = 1
|
||||
else:
|
||||
vercmp = -1
|
||||
|
||||
return vercmp
|
||||
|
||||
def _compare(self, other):
|
||||
if not isinstance(other, (string_types, NumpyVersion)):
|
||||
raise ValueError("Invalid object to compare with NumpyVersion.")
|
||||
|
||||
if isinstance(other, string_types):
|
||||
other = NumpyVersion(other)
|
||||
|
||||
vercmp = self._compare_version(other)
|
||||
if vercmp == 0:
|
||||
# Same x.y.z version, check for alpha/beta/rc
|
||||
vercmp = self._compare_pre_release(other)
|
||||
if vercmp == 0:
|
||||
# Same version and same pre-release, check if dev version
|
||||
if self.is_devversion is other.is_devversion:
|
||||
vercmp = 0
|
||||
elif self.is_devversion:
|
||||
vercmp = -1
|
||||
else:
|
||||
vercmp = 1
|
||||
|
||||
return vercmp
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other) < 0
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other) <= 0
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other) == 0
|
||||
|
||||
def __ne__(self, other):
|
||||
return self._compare(other) != 0
|
||||
|
||||
def __gt__(self, other):
|
||||
return self._compare(other) > 0
|
||||
|
||||
def __ge__(self, other):
|
||||
return self._compare(other) >= 0
|
||||
|
||||
def __repr(self):
|
||||
return "NumpyVersion(%s)" % self.vstring
|
||||
|
||||
|
||||
def _next_regular(target):
|
||||
"""
|
||||
Find the next regular number greater than or equal to target.
|
||||
Regular numbers are composites of the prime factors 2, 3, and 5.
|
||||
Also known as 5-smooth numbers or Hamming numbers, these are the optimal
|
||||
size for inputs to FFTPACK.
|
||||
Target must be a positive integer.
|
||||
"""
|
||||
if target <= 6:
|
||||
return target
|
||||
|
||||
# Quickly check if it's already a power of 2
|
||||
if not (target & (target - 1)):
|
||||
return target
|
||||
|
||||
match = float('inf') # Anything found will be smaller
|
||||
p5 = 1
|
||||
while p5 < target:
|
||||
p35 = p5
|
||||
while p35 < target:
|
||||
# Ceiling integer division, avoiding conversion to float
|
||||
# (quotient = ceil(target / p35))
|
||||
quotient = -(-target // p35)
|
||||
# Quickly find next power of 2 >= quotient
|
||||
try:
|
||||
p2 = 2 ** ((quotient - 1).bit_length())
|
||||
except AttributeError:
|
||||
# Fallback for Python <2.7
|
||||
p2 = 2 ** _bit_length_26(quotient - 1)
|
||||
|
||||
N = p2 * p35
|
||||
if N == target:
|
||||
return N
|
||||
elif N < match:
|
||||
match = N
|
||||
p35 *= 3
|
||||
if p35 == target:
|
||||
return p35
|
||||
if p35 < match:
|
||||
match = p35
|
||||
p5 *= 5
|
||||
if p5 == target:
|
||||
return p5
|
||||
if p5 < match:
|
||||
match = p5
|
||||
return match
|
||||
if NumpyVersion(np.__version__) >= '1.7.1':
|
||||
np_matrix_rank = np.linalg.matrix_rank
|
||||
else:
|
||||
def np_matrix_rank(M, tol=None):
|
||||
"""
|
||||
Return matrix rank of array using SVD method
|
||||
Rank of the array is the number of SVD singular values of the array that are
|
||||
greater than `tol`.
|
||||
Parameters
|
||||
----------
|
||||
M : {(M,), (M, N)} array_like
|
||||
array of <=2 dimensions
|
||||
tol : {None, float}, optional
|
||||
threshold below which SVD values are considered zero. If `tol` is
|
||||
None, and ``S`` is an array with singular values for `M`, and
|
||||
``eps`` is the epsilon value for datatype of ``S``, then `tol` is
|
||||
set to ``S.max() * max(M.shape) * eps``.
|
||||
Notes
|
||||
-----
|
||||
The default threshold to detect rank deficiency is a test on the magnitude
|
||||
of the singular values of `M`. By default, we identify singular values less
|
||||
than ``S.max() * max(M.shape) * eps`` as indicating rank deficiency (with
|
||||
the symbols defined above). This is the algorithm MATLAB uses [1]. It also
|
||||
appears in *Numerical recipes* in the discussion of SVD solutions for linear
|
||||
least squares [2].
|
||||
This default threshold is designed to detect rank deficiency accounting for
|
||||
the numerical errors of the SVD computation. Imagine that there is a column
|
||||
in `M` that is an exact (in floating point) linear combination of other
|
||||
columns in `M`. Computing the SVD on `M` will not produce a singular value
|
||||
exactly equal to 0 in general: any difference of the smallest SVD value from
|
||||
0 will be caused by numerical imprecision in the calculation of the SVD.
|
||||
Our threshold for small SVD values takes this numerical imprecision into
|
||||
account, and the default threshold will detect such numerical rank
|
||||
deficiency. The threshold may declare a matrix `M` rank deficient even if
|
||||
the linear combination of some columns of `M` is not exactly equal to
|
||||
another column of `M` but only numerically very close to another column of
|
||||
`M`.
|
||||
We chose our default threshold because it is in wide use. Other thresholds
|
||||
are possible. For example, elsewhere in the 2007 edition of *Numerical
|
||||
recipes* there is an alternative threshold of ``S.max() *
|
||||
np.finfo(M.dtype).eps / 2. * np.sqrt(m + n + 1.)``. The authors describe
|
||||
this threshold as being based on "expected roundoff error" (p 71).
|
||||
The thresholds above deal with floating point roundoff error in the
|
||||
calculation of the SVD. However, you may have more information about the
|
||||
sources of error in `M` that would make you consider other tolerance values
|
||||
to detect *effective* rank deficiency. The most useful measure of the
|
||||
tolerance depends on the operations you intend to use on your matrix. For
|
||||
example, if your data come from uncertain measurements with uncertainties
|
||||
greater than floating point epsilon, choosing a tolerance near that
|
||||
uncertainty may be preferable. The tolerance may be absolute if the
|
||||
uncertainties are absolute rather than relative.
|
||||
References
|
||||
----------
|
||||
.. [1] MATLAB reference documention, "Rank"
|
||||
http://www.mathworks.com/help/techdoc/ref/rank.html
|
||||
.. [2] W. H. Press, S. A. Teukolsky, W. T. Vetterling and B. P. Flannery,
|
||||
"Numerical Recipes (3rd edition)", Cambridge University Press, 2007,
|
||||
page 795.
|
||||
Examples
|
||||
--------
|
||||
>>> from numpy.linalg import matrix_rank
|
||||
>>> matrix_rank(np.eye(4)) # Full rank matrix
|
||||
4
|
||||
>>> I=np.eye(4); I[-1,-1] = 0. # rank deficient matrix
|
||||
>>> matrix_rank(I)
|
||||
3
|
||||
>>> matrix_rank(np.ones((4,))) # 1 dimension - rank 1 unless all 0
|
||||
1
|
||||
>>> matrix_rank(np.zeros((4,)))
|
||||
0
|
||||
"""
|
||||
M = np.asarray(M)
|
||||
if M.ndim > 2:
|
||||
raise TypeError('array should have 2 or fewer dimensions')
|
||||
if M.ndim < 2:
|
||||
return int(not all(M == 0))
|
||||
S = np.linalg.svd(M, compute_uv=False)
|
||||
if tol is None:
|
||||
tol = S.max() * max(M.shape) * np.finfo(S.dtype).eps
|
||||
return np.sum(S > tol)
|
||||
|
||||
|
||||
|
||||
class CacheWriteWarning(UserWarning):
|
||||
pass
|
||||
|
||||
class CachedAttribute(object):
|
||||
|
||||
def __init__(self, func, cachename=None, resetlist=None):
|
||||
self.fget = func
|
||||
self.name = func.__name__
|
||||
self.cachename = cachename or '_cache'
|
||||
self.resetlist = resetlist or ()
|
||||
|
||||
def __get__(self, obj, type=None):
|
||||
if obj is None:
|
||||
return self.fget
|
||||
# Get the cache or set a default one if needed
|
||||
_cachename = self.cachename
|
||||
_cache = getattr(obj, _cachename, None)
|
||||
if _cache is None:
|
||||
setattr(obj, _cachename, resettable_cache())
|
||||
_cache = getattr(obj, _cachename)
|
||||
# Get the name of the attribute to set and cache
|
||||
name = self.name
|
||||
_cachedval = _cache.get(name, None)
|
||||
# print("[_cachedval=%s]" % _cachedval)
|
||||
if _cachedval is None:
|
||||
# Call the "fget" function
|
||||
_cachedval = self.fget(obj)
|
||||
# Set the attribute in obj
|
||||
# print("Setting %s in cache to %s" % (name, _cachedval))
|
||||
try:
|
||||
_cache[name] = _cachedval
|
||||
except KeyError:
|
||||
setattr(_cache, name, _cachedval)
|
||||
# Update the reset list if needed (and possible)
|
||||
resetlist = self.resetlist
|
||||
if resetlist is not ():
|
||||
try:
|
||||
_cache._resetdict[name] = self.resetlist
|
||||
except AttributeError:
|
||||
pass
|
||||
# else:
|
||||
# print("Reading %s from cache (%s)" % (name, _cachedval))
|
||||
return _cachedval
|
||||
|
||||
def __set__(self, obj, value):
|
||||
errmsg = "The attribute '%s' cannot be overwritten" % self.name
|
||||
warnings.warn(errmsg, CacheWriteWarning)
|
||||
|
||||
|
||||
class _cache_readonly(object):
|
||||
"""
|
||||
Decorator for CachedAttribute
|
||||
"""
|
||||
|
||||
def __init__(self, cachename=None, resetlist=None):
|
||||
self.func = None
|
||||
self.cachename = cachename
|
||||
self.resetlist = resetlist or None
|
||||
|
||||
def __call__(self, func):
|
||||
return CachedAttribute(func,
|
||||
cachename=self.cachename,
|
||||
resetlist=self.resetlist)
|
||||
cache_readonly = _cache_readonly()
|
||||
|
||||
|
284
src/py/crankshaft/crankshaft/regression/glm/varfuncs.py
Normal file
284
src/py/crankshaft/crankshaft/regression/glm/varfuncs.py
Normal file
@ -0,0 +1,284 @@
|
||||
"""
|
||||
Variance functions for use with the link functions in statsmodels.family.links
|
||||
"""
|
||||
|
||||
__docformat__ = 'restructuredtext'
|
||||
|
||||
import numpy as np
|
||||
FLOAT_EPS = np.finfo(float).eps
|
||||
|
||||
class VarianceFunction(object):
|
||||
"""
|
||||
Relates the variance of a random variable to its mean. Defaults to 1.
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns an array of ones that is the same shape as `mu`
|
||||
|
||||
Notes
|
||||
-----
|
||||
After a variance function is initialized, its call method can be used.
|
||||
|
||||
Alias for VarianceFunction:
|
||||
constant = VarianceFunction()
|
||||
|
||||
See also
|
||||
--------
|
||||
statsmodels.family.family
|
||||
"""
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Default variance function
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
mu : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
v : array
|
||||
ones(mu.shape)
|
||||
"""
|
||||
mu = np.asarray(mu)
|
||||
return np.ones(mu.shape, np.float64)
|
||||
|
||||
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the variance function v'(mu)
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime_cs
|
||||
# TODO: diag workaround proplem with numdiff for 1d
|
||||
return np.diag(approx_fprime_cs(mu, self))
|
||||
|
||||
|
||||
constant = VarianceFunction()
|
||||
constant.__doc__ = """
|
||||
The call method of constant returns a constant variance, i.e., a vector of ones.
|
||||
|
||||
constant is an alias of VarianceFunction()
|
||||
"""
|
||||
|
||||
class Power(object):
|
||||
"""
|
||||
Power variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
power : float
|
||||
exponent used in power variance function
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns the power variance
|
||||
|
||||
Formulas
|
||||
--------
|
||||
V(mu) = numpy.fabs(mu)**power
|
||||
|
||||
Notes
|
||||
-----
|
||||
Aliases for Power:
|
||||
mu = Power()
|
||||
mu_squared = Power(power=2)
|
||||
mu_cubed = Power(power=3)
|
||||
"""
|
||||
|
||||
def __init__(self, power=1.):
|
||||
self.power = power
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Power variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mu : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
variance : array
|
||||
numpy.fabs(mu)**self.power
|
||||
"""
|
||||
return np.power(np.fabs(mu), self.power)
|
||||
|
||||
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the variance function v'(mu)
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
|
||||
#return approx_fprime_cs(mu, self) # TODO fix breaks in `fabs
|
||||
# TODO: diag is workaround problem with numdiff for 1d
|
||||
return np.diag(approx_fprime(mu, self))
|
||||
|
||||
|
||||
mu = Power()
|
||||
mu.__doc__ = """
|
||||
Returns np.fabs(mu)
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of Power()
|
||||
"""
|
||||
mu_squared = Power(power=2)
|
||||
mu_squared.__doc__ = """
|
||||
Returns np.fabs(mu)**2
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of statsmodels.family.links.Power(power=2)
|
||||
"""
|
||||
mu_cubed = Power(power=3)
|
||||
mu_cubed.__doc__ = """
|
||||
Returns np.fabs(mu)**3
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of statsmodels.family.links.Power(power=3)
|
||||
"""
|
||||
|
||||
class Binomial(object):
|
||||
"""
|
||||
Binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int, optional
|
||||
The number of trials for a binomial variable. The default is 1 for
|
||||
p in (0,1)
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns the binomial variance
|
||||
|
||||
Formulas
|
||||
--------
|
||||
V(mu) = p * (1 - p) * n
|
||||
|
||||
where p = mu / n
|
||||
|
||||
Notes
|
||||
-----
|
||||
Alias for Binomial:
|
||||
binary = Binomial()
|
||||
|
||||
A private method _clean trims the data by machine epsilon so that p is
|
||||
in (0,1)
|
||||
"""
|
||||
|
||||
def __init__(self, n=1):
|
||||
self.n = n
|
||||
|
||||
def _clean(self, p):
|
||||
return np.clip(p, FLOAT_EPS, 1 - FLOAT_EPS)
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Binomial variance function
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
mu : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
variance : array
|
||||
variance = mu/n * (1 - mu/n) * self.n
|
||||
"""
|
||||
p = self._clean(mu / self.n)
|
||||
return p * (1 - p) * self.n
|
||||
|
||||
#TODO: inherit from super
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the variance function v'(mu)
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
|
||||
# TODO: diag workaround proplem with numdiff for 1d
|
||||
return np.diag(approx_fprime_cs(mu, self))
|
||||
|
||||
|
||||
binary = Binomial()
|
||||
binary.__doc__ = """
|
||||
The binomial variance function for n = 1
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of Binomial(n=1)
|
||||
"""
|
||||
|
||||
class NegativeBinomial(object):
|
||||
'''
|
||||
Negative binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float
|
||||
The ancillary parameter for the negative binomial variance function.
|
||||
`alpha` is assumed to be nonstochastic. The default is 1.
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns the negative binomial variance
|
||||
|
||||
Formulas
|
||||
--------
|
||||
V(mu) = mu + alpha*mu**2
|
||||
|
||||
Notes
|
||||
-----
|
||||
Alias for NegativeBinomial:
|
||||
nbinom = NegativeBinomial()
|
||||
|
||||
A private method _clean trims the data by machine epsilon so that p is
|
||||
in (0,inf)
|
||||
'''
|
||||
|
||||
def __init__(self, alpha=1.):
|
||||
self.alpha = alpha
|
||||
|
||||
def _clean(self, p):
|
||||
return np.clip(p, FLOAT_EPS, np.inf)
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Negative binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mu : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
variance : array
|
||||
variance = mu + alpha*mu**2
|
||||
"""
|
||||
p = self._clean(mu)
|
||||
return p + self.alpha*p**2
|
||||
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the negative binomial variance function.
|
||||
"""
|
||||
|
||||
p = self._clean(mu)
|
||||
return 1 + 2 * self.alpha * p
|
||||
|
||||
nbinom = NegativeBinomial()
|
||||
nbinom.__doc__ = """
|
||||
Negative Binomial variance function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of NegativeBinomial(alpha=1.)
|
||||
"""
|
@ -0,0 +1,169 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import pysal as ps\n",
|
||||
"sys.path.append('/Users/toshan/Dropbox/GWR/PyGWRJing/PyGWR/')\n",
|
||||
"from M_FBGWR_May2016 import FBGWR\n",
|
||||
"from M_GWGLM import GWGLM\n",
|
||||
"from M_selection import Band_Sel\n",
|
||||
"import scipy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"path = ps.examples.get_path('GData_utm.csv')\n",
|
||||
"shp = pd.read_csv(path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prep data into design matrix and coordinates\n",
|
||||
"\n",
|
||||
"#Dependent variable\n",
|
||||
"y = shp.PctBach.reshape((-1,1))\n",
|
||||
"\n",
|
||||
"#Design matrix - covariates - intercept added automatically\n",
|
||||
"pov = shp.PctPov.reshape((-1,1))\n",
|
||||
"rural = shp.PctRural.reshape((-1,1))\n",
|
||||
"blk = shp.PctBlack.reshape((-1,1))\n",
|
||||
"X = np.hstack([pov, rural, blk])\n",
|
||||
"labels = ['Intercept', 'PctPov', 'PctRural', 'PctBlack']\n",
|
||||
"\n",
|
||||
"#Coordinates for calibration points\n",
|
||||
"u = shp.X\n",
|
||||
"v = shp.Y\n",
|
||||
"coords = zip(u,v)\n",
|
||||
"\n",
|
||||
"coords_dict = {}\n",
|
||||
"for i, x in enumerate(coords):\n",
|
||||
" coords_dict[i] = x"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(60.0, <Kernel.GWR_W object at 0x10b73a450>, [(89.0, 1088.4132720309442), (73.0, 1082.7676645259439), (62.0, 1079.8414440181946), (62.0, 1079.8414440181946), (62.0, 1079.8414440181946), (60.0, 1079.6668177916372), (60.0, 1079.6668177916372), (60.0, 1079.6668177916372)])\n",
|
||||
"CPU times: user 1.75 s, sys: 80.7 ms, total: 1.83 s\n",
|
||||
"Wall time: 1.75 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"print Band_Sel(y, None, X, coords_dict)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pysal.contrib.gwr.sel_bw import Sel_BW\n",
|
||||
"from pysal.contrib.gwr.gwr import GWR"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"60.0\n",
|
||||
"CPU times: user 1.1 s, sys: 3.5 ms, total: 1.11 s\n",
|
||||
"Wall time: 1.1 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"print Sel_BW(coords, y, X, [], kernel='bisquare', constant=False).search()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"results = FBGWR(y, X, coords_dict, tolFB=1e-03)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"sel = Sel_BW(coords, y, X, [], kernel='bisquare', fb=True, constant=False)\n",
|
||||
"results = sel.search(tol_fb=1e-03)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "Python [fbgwr]",
|
||||
"language": "python",
|
||||
"name": "Python [fbgwr]"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,6 @@
|
||||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@ -0,0 +1,571 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pysal as ps\n",
|
||||
"from pysal.weights.Distance import Kernel\n",
|
||||
"import sys\n",
|
||||
"sys.path.append('/Users/toshan/dev/pysal/pysal/weights')\n",
|
||||
"from Distance import Kernel as kn\n",
|
||||
"from util import full2W as f2w"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n = 5 #number of observations\n",
|
||||
"m = 3 #number of calibration points"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"x = np.random.randint(1,1000, n)\n",
|
||||
"y = np.random.randint(1,1000, n)\n",
|
||||
"D1 = zip(x,y)\n",
|
||||
"W1 = kn(D1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"x = np.random.randint(1,1000, m)\n",
|
||||
"y = np.random.randint(1,1000, m)\n",
|
||||
"D2 = zip(x,y)\n",
|
||||
"W2 = kn(D2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 0.39894228, 0.1009935 , 0.05619733, 0.04896355, 0.32650695,\n",
|
||||
" 0.37879836, 0.31411809, 0.1073076 ],\n",
|
||||
" [ 0.1009935 , 0.39894228, 0.36195557, 0.36626279, 0.07720723,\n",
|
||||
" 0.11754135, 0.03669346, 0.33721825],\n",
|
||||
" [ 0.05619733, 0.36195557, 0.39894228, 0.39108403, 0.0345001 ,\n",
|
||||
" 0.06151089, 0.02103471, 0.24420603],\n",
|
||||
" [ 0.04896355, 0.36626279, 0.39108403, 0.39894228, 0.0335092 ,\n",
|
||||
" 0.05713611, 0.01604658, 0.26976648],\n",
|
||||
" [ 0.32650695, 0.07720723, 0.0345001 , 0.0335092 , 0.39894228,\n",
|
||||
" 0.37224696, 0.18985479, 0.11774827],\n",
|
||||
" [ 0.37879836, 0.11754135, 0.06151089, 0.05713611, 0.37224696,\n",
|
||||
" 0.39894228, 0.24197075, 0.14519262],\n",
|
||||
" [ 0.31411809, 0.03669346, 0.02103471, 0.01604658, 0.18985479,\n",
|
||||
" 0.24197075, 0.39894228, 0.03119528],\n",
|
||||
" [ 0.1073076 , 0.33721825, 0.24420603, 0.26976648, 0.11774827,\n",
|
||||
" 0.14519262, 0.03119528, 0.39894228]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"D3 = np.vstack([D1,D2])\n",
|
||||
"W3 = Kernel(D3, function='gaussian')\n",
|
||||
"W3 = kn(D3, function='gaussian', truncate=False)\n",
|
||||
"\n",
|
||||
"W3.full()[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n",
|
||||
"a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"coord_ids = np.arange(n)\n",
|
||||
"points_ids = np.arange(n, n+m)\n",
|
||||
"all_ids = np.arange(n+m)\n",
|
||||
"dists = np.zeros((m,n))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"for i in all_ids:\n",
|
||||
" if i in points_ids:\n",
|
||||
" for j in coord_ids:\n",
|
||||
" if j in W3[j].keys():\n",
|
||||
" if i >= n:\n",
|
||||
" print 'a'\n",
|
||||
" dists[i-n][j] = W3.full()[0][i][j]\n",
|
||||
" elif j >= m:\n",
|
||||
" print 'b'\n",
|
||||
" dists[i][j-m] = W3.full()[0][i][j]\n",
|
||||
" elif (i >= n) & (j >= m):\n",
|
||||
" print 'c'\n",
|
||||
" dists[i-n][j-m] = W3.full()[0][i][j]\n",
|
||||
" else:\n",
|
||||
" print 'd'\n",
|
||||
" dists[i][j] = W3.full()[0][i][j]\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 0.37879836, 0.11754135, 0.06151089, 0.05713611, 0.37224696],\n",
|
||||
" [ 0.31411809, 0.03669346, 0.02103471, 0.01604658, 0.18985479],\n",
|
||||
" [ 0.1073076 , 0.33721825, 0.24420603, 0.26976648, 0.11774827]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dists"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "3 is not in list",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-22-3036666a721b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf2w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdists\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[0;32m//anaconda/lib/python2.7/site-packages/pysal/weights/weights.pyc\u001b[0m in \u001b[0;36mfull\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 953\u001b[0m \"\"\"\n\u001b[0;32m--> 954\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 955\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 956\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtowsp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m//anaconda/lib/python2.7/site-packages/pysal/weights/util.pyc\u001b[0m in \u001b[0;36mfull\u001b[0;34m(w)\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mw_i\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 712\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwij\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_i\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw_i\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 713\u001b[0;31m \u001b[0mc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 714\u001b[0m \u001b[0mwfull\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwij\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mwfull\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mValueError\u001b[0m: 3 is not in list"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"w = f2w(dists)\n",
|
||||
"w.full()[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"new_neighbs = {}\n",
|
||||
"new_weights = {}\n",
|
||||
"for each in W2.neighbors:\n",
|
||||
" three = set(W3.neighbors[each])\n",
|
||||
" two = set(W2.neighbors[each])\n",
|
||||
" new_neighbs[each] = list(three.difference(two))\n",
|
||||
" new_weights[each] = {}\n",
|
||||
" for weight in new_neighbs[each]:\n",
|
||||
" new_weights[each][weight] = W3[each][weight]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nfor ids in all_ids:\\n if ids in points_ids:\\n _all = set(W3.neighbors[ids])\\n points = set(W2.neighbors[ids])\\n new_neighbs[ids] = list(_all.difference(points))\\n new_weights[ids] = {}\\n for weight in new_neighbs[ids]:\\n new_weights[ids][weight] = W3[ids][weight]\\n\\n else:\\n new_weights[ids] = {}\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"coord_ids = W1.id_order\n",
|
||||
"points_ids = W2.id_order\n",
|
||||
"all_ids = W3.id_order\n",
|
||||
"dists = np.zeros((2,3))\n",
|
||||
"\n",
|
||||
"'''\n",
|
||||
"for ids in all_ids:\n",
|
||||
" if ids in points_ids:\n",
|
||||
" _all = set(W3.neighbors[ids])\n",
|
||||
" points = set(W2.neighbors[ids])\n",
|
||||
" new_neighbs[ids] = list(_all.difference(points))\n",
|
||||
" new_weights[ids] = {}\n",
|
||||
" for weight in new_neighbs[ids]:\n",
|
||||
" new_weights[ids][weight] = W3[ids][weight]\n",
|
||||
"\n",
|
||||
" else:\n",
|
||||
" new_weights[ids] = {}\n",
|
||||
"''' \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"3 0\n",
|
||||
"a\n",
|
||||
"3 1\n",
|
||||
"a\n",
|
||||
"3 2\n",
|
||||
"a\n",
|
||||
"4 0\n",
|
||||
"a\n",
|
||||
"4 1\n",
|
||||
"a\n",
|
||||
"4 2\n",
|
||||
"a\n",
|
||||
"5 0\n",
|
||||
"a\n",
|
||||
"5 1\n",
|
||||
"a\n",
|
||||
"5 2\n",
|
||||
"a\n",
|
||||
"6 0\n",
|
||||
"a\n",
|
||||
"6 1\n",
|
||||
"a\n",
|
||||
"6 2\n",
|
||||
"a\n",
|
||||
"7 0\n",
|
||||
"a\n",
|
||||
"7 1\n",
|
||||
"a\n",
|
||||
"7 2\n",
|
||||
"a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"n = 3 #number of observations\n",
|
||||
"m = 2 #number of \n",
|
||||
"for i in all_ids:\n",
|
||||
" if i in points_ids:\n",
|
||||
" for j in coord_ids:\n",
|
||||
" if j in W3[j].keys():\n",
|
||||
" print i,j\n",
|
||||
" if i >= n:\n",
|
||||
" print 'a'\n",
|
||||
" dists[i-n][j] = W3.full()[0][i][j]\n",
|
||||
" elif j >= m:\n",
|
||||
" print 'b'\n",
|
||||
" dists[i][j-m] = W3.full()[0][i][j]\n",
|
||||
" elif (i >= n) & (j >= m):\n",
|
||||
" print 'c'\n",
|
||||
" dists[i-n][j-m] = W3.full()[0][i][j]\n",
|
||||
" else:\n",
|
||||
" print 'd'\n",
|
||||
" dists[i][j] = W3.full()[0][i][j]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 0. , 0.45118883, 0.85404129],\n",
|
||||
" [ 0.27754126, 0.40233088, 0.20596816],\n",
|
||||
" [ 0. , 0.37941649, 0.6032733 ],\n",
|
||||
" [ 0. , 0. , 0. ],\n",
|
||||
" [ 0.35446728, 0.73305802, 0.50022967]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dists"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[0, 1]"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"points_ids"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"W4 = ps.W(new_neighbs, new_weights)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{0: [3, 4, 5, 6, 7], 1: [3, 4, 5, 6, 7], 2: [3, 4, 5, 6, 7]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"W4.neighbors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{0: {3: -1.0482874068379293,\n",
|
||||
" 4: 0.36696681575222334,\n",
|
||||
" 5: 0.678090925231672,\n",
|
||||
" 6: 0.308555499289517,\n",
|
||||
" 7: -0.620566108018012},\n",
|
||||
" 1: {3: 0.5865615659374835,\n",
|
||||
" 4: -0.8123595700046831,\n",
|
||||
" 5: -0.5633467635255329,\n",
|
||||
" 6: -1.1845906645769202,\n",
|
||||
" 7: 0.42019589403802593},\n",
|
||||
" 2: {3: 0.8005291815217084,\n",
|
||||
" 4: -1.212624893235362,\n",
|
||||
" 5: -0.9337024333901489,\n",
|
||||
" 6: -1.4259607135415542,\n",
|
||||
" 7: 0.009238162970930053}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"W4.weights"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[0, 1, 2]"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"W4.id_order"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "3 is not in list",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-29-bea43da3bca2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mW4\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[0;32m//anaconda/lib/python2.7/site-packages/pysal/weights/weights.pyc\u001b[0m in \u001b[0;36mfull\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 953\u001b[0m \"\"\"\n\u001b[0;32m--> 954\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 955\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 956\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtowsp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m//anaconda/lib/python2.7/site-packages/pysal/weights/util.pyc\u001b[0m in \u001b[0;36mfull\u001b[0;34m(w)\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mw_i\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 712\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwij\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_i\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw_i\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 713\u001b[0;31m \u001b[0mc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 714\u001b[0m \u001b[0mwfull\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwij\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mwfull\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mValueError\u001b[0m: 3 is not in list"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"W4.full()[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 244,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{1: 1}"
|
||||
]
|
||||
},
|
||||
"execution_count": 244,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"W4."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([5])"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.arange(5,6)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "Python [Root]",
|
||||
"language": "python",
|
||||
"name": "Python [Root]"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
import gwr
|
||||
import sel_bw
|
||||
import diagnostics
|
||||
import kernels
|
@ -0,0 +1,81 @@
|
||||
"""
|
||||
Diagnostics for estimated gwr modesl
|
||||
"""
|
||||
__author__ = "Taylor Oshan tayoshan@gmail.com"
|
||||
|
||||
import numpy as np
|
||||
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
|
||||
|
||||
def get_AICc(gwr):
|
||||
"""
|
||||
Get AICc value
|
||||
|
||||
Gaussian: p61, (2.33), Fotheringham, Brunsdon and Charlton (2002)
|
||||
|
||||
GWGLM: AICc=AIC+2k(k+1)/(n-k-1), Nakaya et al. (2005): p2704, (36)
|
||||
|
||||
"""
|
||||
n = gwr.n
|
||||
k = gwr.tr_S
|
||||
if isinstance(gwr.family, Gaussian):
|
||||
aicc = -2.0*gwr.llf + 2.0*n*(k + 1.0)/(n-k-2.0)
|
||||
elif isinstance(gwr.family, (Poisson, Binomial)):
|
||||
aicc = get_AIC(gwr) + 2.0 * k * (k+1.0) / (n - k - 1.0)
|
||||
return aicc
|
||||
|
||||
def get_AIC(gwr):
|
||||
"""
|
||||
Get AIC calue
|
||||
|
||||
Gaussian: p96, (4.22), Fotheringham, Brunsdon and Charlton (2002)
|
||||
|
||||
GWGLM: AIC(G)=D(G) + 2K(G), where D and K denote the deviance and the effective
|
||||
number of parameters in the model with bandwidth G, respectively.
|
||||
|
||||
"""
|
||||
k = gwr.tr_S
|
||||
#deviance = -2*log-likelihood
|
||||
y = gwr.y
|
||||
mu = gwr.mu
|
||||
if isinstance(gwr.family, Gaussian):
|
||||
aic = -2.0 * gwr.llf + 2.0 * (k+1)
|
||||
elif isinstance(gwr.family, (Poisson, Binomial)):
|
||||
aic = np.sum(gwr.family.resid_dev(y, mu)**2) + 2.0 * k
|
||||
return aic
|
||||
|
||||
def get_BIC(gwr):
|
||||
"""
|
||||
Get BIC value
|
||||
|
||||
Gaussian: p61 (2.34), Fotheringham, Brunsdon and Charlton (2002)
|
||||
BIC = -2log(L)+klog(n)
|
||||
|
||||
GWGLM: BIC = dev + tr_S * log(n)
|
||||
|
||||
"""
|
||||
n = gwr.n # (scalar) number of observations
|
||||
k = gwr.tr_S
|
||||
y = gwr.y
|
||||
mu = gwr.mu
|
||||
if isinstance(gwr.family, Gaussian):
|
||||
bic = -2.0 * gwr.llf + (k+1) * np.log(n)
|
||||
elif isinstance(gwr.family, (Poisson, Binomial)):
|
||||
bic = np.sum(gwr.family.resid_dev(y, mu)**2) + k * np.log(n)
|
||||
return bic
|
||||
|
||||
def get_CV(gwr):
|
||||
"""
|
||||
Get CV value
|
||||
|
||||
Gaussian only
|
||||
|
||||
Methods: p60, (2.31) or p212 (9.4)
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
Modification: sum of residual squared is divided by n according to GWR4 results
|
||||
|
||||
"""
|
||||
aa = gwr.resid_response.reshape((-1,1))/(1.0-gwr.influ)
|
||||
cv = np.sum(aa**2)/gwr.n
|
||||
return cv
|
||||
|
1019
src/py/crankshaft/crankshaft/regression/gwr/base/gwr.py
Normal file
1019
src/py/crankshaft/crankshaft/regression/gwr/base/gwr.py
Normal file
File diff suppressed because it is too large
Load Diff
120
src/py/crankshaft/crankshaft/regression/gwr/base/kernels.py
Normal file
120
src/py/crankshaft/crankshaft/regression/gwr/base/kernels.py
Normal file
@ -0,0 +1,120 @@
|
||||
# GWR kernel function specifications
|
||||
|
||||
__author__ = "Taylor Oshan tayoshan@gmail.com"
|
||||
|
||||
#from pysal.weights.Distance import Kernel
|
||||
import scipy
|
||||
from scipy.spatial.kdtree import KDTree
|
||||
import numpy as np
|
||||
|
||||
#adaptive specifications should be parameterized with nn-1 to match original gwr
|
||||
#implementation. That is, pysal counts self neighbors with knn automatically.
|
||||
|
||||
def fix_gauss(points, bw):
|
||||
w = _Kernel(points, function='gwr_gaussian', bandwidth=bw,
|
||||
truncate=False)
|
||||
return w.kernel
|
||||
|
||||
def adapt_gauss(points, nn):
|
||||
w = _Kernel(points, fixed=False, k=nn-1, function='gwr_gaussian',
|
||||
truncate=False)
|
||||
return w.kernel
|
||||
|
||||
def fix_bisquare(points, bw):
|
||||
w = _Kernel(points, function='bisquare', bandwidth=bw)
|
||||
return w.kernel
|
||||
|
||||
def adapt_bisquare(points, nn):
|
||||
w = _Kernel(points, fixed=False, k=nn-1, function='bisquare')
|
||||
return w.kernel
|
||||
|
||||
def fix_exp(points, bw):
|
||||
w = _Kernel(points, function='exponential', bandwidth=bw,
|
||||
truncate=False)
|
||||
return w.kernel
|
||||
|
||||
def adapt_exp(points, nn):
|
||||
w = _Kernel(points, fixed=False, k=nn-1, function='exponential',
|
||||
truncate=False)
|
||||
return w.kernel
|
||||
|
||||
from scipy.spatial.distance import cdist
|
||||
|
||||
class _Kernel(object):
|
||||
"""
|
||||
|
||||
"""
|
||||
def __init__(self, data, bandwidth=None, fixed=True, k=None,
|
||||
function='triangular', eps=1.0000001, ids=None, truncate=True): #Added truncate flag
|
||||
if issubclass(type(data), scipy.spatial.KDTree):
|
||||
self.kdt = data
|
||||
self.data = self.kdt.data
|
||||
data = self.data
|
||||
else:
|
||||
self.data = data
|
||||
self.kdt = KDTree(self.data)
|
||||
if k is not None:
|
||||
self.k = int(k) + 1
|
||||
else:
|
||||
self.k = k
|
||||
self.dmat = cdist(self.data, self.data)
|
||||
self.function = function.lower()
|
||||
self.fixed = fixed
|
||||
self.eps = eps
|
||||
self.trunc = truncate
|
||||
if bandwidth:
|
||||
try:
|
||||
bandwidth = np.array(bandwidth)
|
||||
bandwidth.shape = (len(bandwidth), 1)
|
||||
except:
|
||||
bandwidth = np.ones((len(data), 1), 'float') * bandwidth
|
||||
self.bandwidth = bandwidth
|
||||
else:
|
||||
self._set_bw()
|
||||
self.kernel = self._kernel_funcs(self.dmat/self.bandwidth)
|
||||
|
||||
if self.trunc:
|
||||
mask = np.repeat(self.bandwidth, len(self.bandwidth), axis=1)
|
||||
kernel_mask = self._kernel_funcs(1.0/mask)
|
||||
self.kernel[(self.dmat >= mask)] = 0
|
||||
|
||||
|
||||
|
||||
def _set_bw(self):
|
||||
if self.k is not None:
|
||||
dmat = np.sort(self.dmat)[:,:self.k]
|
||||
else:
|
||||
dmat = self.dmat
|
||||
if self.fixed:
|
||||
# use max knn distance as bandwidth
|
||||
bandwidth = dmat.max() * self.eps
|
||||
n = len(self.data)
|
||||
self.bandwidth = np.ones((n, 1), 'float') * bandwidth
|
||||
else:
|
||||
# use local max knn distance
|
||||
self.bandwidth = dmat.max(axis=1) * self.eps
|
||||
self.bandwidth.shape = (self.bandwidth.size, 1)
|
||||
|
||||
|
||||
def _kernel_funcs(self, zs):
|
||||
# functions follow Anselin and Rey (2010) table 5.4
|
||||
if self.function == 'triangular':
|
||||
return 1 - zs
|
||||
elif self.function == 'uniform':
|
||||
return np.ones(zi.shape) * 0.5
|
||||
elif self.function == 'quadratic':
|
||||
return (3. / 4) * (1 - zs ** 2)
|
||||
elif self.function == 'quartic':
|
||||
return (15. / 16) * (1 - zs ** 2) ** 2
|
||||
elif self.function == 'gaussian':
|
||||
c = np.pi * 2
|
||||
c = c ** (-0.5)
|
||||
return c * np.exp(-(zs ** 2) / 2.)
|
||||
elif self.function == 'gwr_gaussian':
|
||||
return np.exp(-0.5*(zs)**2)
|
||||
elif self.function == 'bisquare':
|
||||
return (1-(zs)**2)**2
|
||||
elif self.function =='exponential':
|
||||
return np.exp(-zs)
|
||||
else:
|
||||
print('Unsupported kernel function', self.function)
|
208
src/py/crankshaft/crankshaft/regression/gwr/base/search.py
Normal file
208
src/py/crankshaft/crankshaft/regression/gwr/base/search.py
Normal file
@ -0,0 +1,208 @@
|
||||
#Bandwidth optimization methods
|
||||
|
||||
__author__ = "Taylor Oshan"
|
||||
|
||||
import numpy as np
|
||||
|
||||
def golden_section(a, c, delta, function, tol, max_iter, int_score=False):
|
||||
"""
|
||||
Golden section search routine
|
||||
Method: p212, 9.6.4
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : float
|
||||
initial max search section value
|
||||
b : float
|
||||
initial min search section value
|
||||
delta : float
|
||||
constant used to determine width of search sections
|
||||
function : function
|
||||
obejective function to be evaluated at different section
|
||||
values
|
||||
int_score : boolean
|
||||
False for float score, True for integer score
|
||||
tol : float
|
||||
tolerance used to determine convergence
|
||||
max_iter : integer
|
||||
maximum iterations if no convergence to tolerance
|
||||
|
||||
Returns
|
||||
-------
|
||||
opt_val : float
|
||||
optimal value
|
||||
opt_score : kernel
|
||||
optimal score
|
||||
output : list of tuples
|
||||
searching history
|
||||
"""
|
||||
b = a + delta * np.abs(c-a)
|
||||
d = c - delta * np.abs(c-a)
|
||||
score = 0.0
|
||||
diff = 1.0e9
|
||||
iters = 0
|
||||
output = []
|
||||
while np.abs(diff) > tol and iters < max_iter:
|
||||
iters += 1
|
||||
if int_score:
|
||||
b = np.round(b)
|
||||
d = np.round(d)
|
||||
|
||||
score_a = function(a)
|
||||
score_b = function(b)
|
||||
score_c = function(c)
|
||||
score_d = function(d)
|
||||
|
||||
if score_b <= score_d:
|
||||
opt_val = b
|
||||
opt_score = score_b
|
||||
c = d
|
||||
d = b
|
||||
b = a + delta * np.abs(c-a)
|
||||
#if int_score:
|
||||
#b = np.round(b)
|
||||
else:
|
||||
opt_val = d
|
||||
opt_score = score_d
|
||||
a = b
|
||||
b = d
|
||||
d = c - delta * np.abs(c-a)
|
||||
#if int_score:
|
||||
#d = np.round(b)
|
||||
|
||||
#if int_score:
|
||||
# opt_val = np.round(opt_val)
|
||||
output.append((opt_val, opt_score))
|
||||
diff = score_b - score_d
|
||||
score = opt_score
|
||||
return np.round(opt_val, 2), opt_score, output
|
||||
|
||||
def equal_interval(l_bound, u_bound, interval, function, int_score=False):
|
||||
"""
|
||||
Interval search, using interval as stepsize
|
||||
|
||||
Parameters
|
||||
----------
|
||||
l_bound : float
|
||||
initial min search section value
|
||||
u_bound : float
|
||||
initial max search section value
|
||||
interval : float
|
||||
constant used to determine width of search sections
|
||||
function : function
|
||||
obejective function to be evaluated at different section
|
||||
values
|
||||
int_score : boolean
|
||||
False for float score, True for integer score
|
||||
|
||||
Returns
|
||||
-------
|
||||
opt_val : float
|
||||
optimal value
|
||||
opt_score : kernel
|
||||
optimal score
|
||||
output : list of tuples
|
||||
searching history
|
||||
"""
|
||||
a = l_bound
|
||||
c = u_bound
|
||||
b = a + interval
|
||||
if int_score:
|
||||
a = np.round(a,0)
|
||||
c = np.round(c,0)
|
||||
b = np.round(b,0)
|
||||
|
||||
output = []
|
||||
|
||||
score_a = function(a)
|
||||
score_c = function(c)
|
||||
|
||||
output.append((a,score_a))
|
||||
output.append((c,score_c))
|
||||
|
||||
if score_a < score_c:
|
||||
opt_val = a
|
||||
opt_score = score_a
|
||||
else:
|
||||
opt_val = c
|
||||
opt_score = score_c
|
||||
|
||||
while b < c:
|
||||
score_b = function(b)
|
||||
|
||||
output.append((b,score_b))
|
||||
|
||||
if score_b < opt_score:
|
||||
opt_val = b
|
||||
opt_score = score_b
|
||||
b = b + interval
|
||||
|
||||
return opt_val, opt_score, output
|
||||
|
||||
|
||||
def flexible_bw(init, y, X, n, k, family, tol, max_iter, rss_score,
|
||||
gwr_func, bw_func, sel_func):
|
||||
if init:
|
||||
bw = sel_func(bw_func(y, X))
|
||||
print bw
|
||||
optim_model = gwr_func(y, X, bw)
|
||||
err = optim_model.resid_response.reshape((-1,1))
|
||||
est = optim_model.params
|
||||
else:
|
||||
model = GLM(y, X, family=self.family, constant=False).fit()
|
||||
err = model.resid_response.reshape((-1,1))
|
||||
est = np.repeat(model.params.T, n, axis=0)
|
||||
|
||||
|
||||
XB = np.multiply(est, X)
|
||||
if rss_score:
|
||||
rss = np.sum((err)**2)
|
||||
iters = 0
|
||||
scores = []
|
||||
delta = 1e6
|
||||
BWs = []
|
||||
VALs = []
|
||||
|
||||
while delta > tol and iters < max_iter:
|
||||
iters += 1
|
||||
new_XB = np.zeros_like(X)
|
||||
bws = []
|
||||
vals = []
|
||||
ests = np.zeros_like(X)
|
||||
f_XB = XB.copy()
|
||||
f_err = err.copy()
|
||||
for i in range(k):
|
||||
temp_y = XB[:,i].reshape((-1,1))
|
||||
temp_y = temp_y + err
|
||||
temp_X = X[:,i].reshape((-1,1))
|
||||
bw_class = bw_func(temp_y, temp_X)
|
||||
bw = sel_func(bw_class)
|
||||
optim_model = gwr_func(temp_y, temp_X, bw)
|
||||
err = optim_model.resid_response.reshape((-1,1))
|
||||
est = optim_model.params.reshape((-1,))
|
||||
|
||||
new_XB[:,i] = np.multiply(est, temp_X.reshape((-1,)))
|
||||
bws.append(bw)
|
||||
ests[:,i] = est
|
||||
vals.append(bw_class.bw[1])
|
||||
|
||||
predy = np.sum(np.multiply(ests, X), axis=1).reshape((-1,1))
|
||||
num = np.sum((new_XB - XB)**2)/n
|
||||
den = np.sum(np.sum(new_XB, axis=1)**2)
|
||||
score = (num/den)**0.5
|
||||
XB = new_XB
|
||||
|
||||
if rss_score:
|
||||
new_rss = np.sum((y - predy)**2)
|
||||
score = np.abs((new_rss - rss)/new_rss)
|
||||
rss = new_rss
|
||||
print score
|
||||
scores.append(score)
|
||||
delta = score
|
||||
BWs.append(bws)
|
||||
VALs.append(vals)
|
||||
|
||||
opt_bws = BWs[-1]
|
||||
return opt_bws, np.array(BWs), np.array(VALs), np.array(scores), f_XB, f_err
|
286
src/py/crankshaft/crankshaft/regression/gwr/base/sel_bw.py
Normal file
286
src/py/crankshaft/crankshaft/regression/gwr/base/sel_bw.py
Normal file
@ -0,0 +1,286 @@
|
||||
# GWR Bandwidth selection class
|
||||
|
||||
#Thinking about removing the search method and just having optimization begin in
|
||||
#class __init__
|
||||
|
||||
#x_glob and offset parameters dont yet do anything; former is for semiparametric
|
||||
#GWR and later is for offset variable for Poisson model
|
||||
|
||||
__author__ = "Taylor Oshan Tayoshan@gmail.com"
|
||||
|
||||
from kernels import *
|
||||
from search import golden_section, equal_interval, flexible_bw
|
||||
from gwr import GWR
|
||||
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
|
||||
import pysal.spreg.user_output as USER
|
||||
from diagnostics import get_AICc, get_AIC, get_BIC, get_CV
|
||||
from scipy.spatial.distance import pdist, squareform
|
||||
from pysal.common import KDTree
|
||||
import numpy as np
|
||||
|
||||
kernels = {1: fix_gauss, 2: adapt_gauss, 3: fix_bisquare, 4:
|
||||
adapt_bisquare, 5: fix_exp, 6:adapt_exp}
|
||||
getDiag = {'AICc': get_AICc,'AIC':get_AIC, 'BIC': get_BIC, 'CV': get_CV}
|
||||
|
||||
class Sel_BW(object):
|
||||
"""
|
||||
Select bandwidth for kernel
|
||||
|
||||
Methods: p211 - p213, bandwidth selection
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
x_glob : array
|
||||
n*k1, fixed independent variable.
|
||||
x_loc : array
|
||||
n*k2, local independent variable, including constant.
|
||||
coords : list of tuples
|
||||
(x,y) of points used in bandwidth selection
|
||||
family : string
|
||||
GWR model type: 'Gaussian', 'logistic, 'Poisson''
|
||||
offset : array
|
||||
n*1, offset variable for Poisson model
|
||||
kernel : string
|
||||
kernel function: 'gaussian', 'bisquare', 'exponetial'
|
||||
fixed : boolean
|
||||
True for fixed bandwidth and False for adaptive (NN)
|
||||
fb : True for flexible (mutliple covaraite-specific) bandwidths
|
||||
False for a traditional (same for all covariates)
|
||||
bandwdith; defualt is False.
|
||||
constant : boolean
|
||||
True to include intercept (default) in model and False to exclude
|
||||
intercept.
|
||||
|
||||
|
||||
Attributes
|
||||
----------
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
x_glob : array
|
||||
n*k1, fixed independent variable.
|
||||
x_loc : array
|
||||
n*k2, local independent variable, including constant.
|
||||
coords : list of tuples
|
||||
(x,y) of points used in bandwidth selection
|
||||
family : string
|
||||
GWR model type: 'Gaussian', 'logistic, 'Poisson''
|
||||
kernel : string
|
||||
type of kernel used and wether fixed or adaptive
|
||||
criterion : string
|
||||
bw selection criterion: 'AICc', 'AIC', 'BIC', 'CV'
|
||||
search : string
|
||||
bw search method: 'golden', 'interval'
|
||||
bw_min : float
|
||||
min value used in bandwidth search
|
||||
bw_max : float
|
||||
max value used in bandwidth search
|
||||
interval : float
|
||||
interval increment used in interval search
|
||||
tol : float
|
||||
tolerance used to determine convergence
|
||||
max_iter : integer
|
||||
max interations if no convergence to tol
|
||||
fb : True for flexible (mutliple covaraite-specific) bandwidths
|
||||
False for a traditional (same for all covariates)
|
||||
bandwdith; defualt is False.
|
||||
constant : boolean
|
||||
True to include intercept (default) in model and False to exclude
|
||||
intercept.
|
||||
"""
|
||||
def __init__(self, coords, y, x_loc, x_glob=None, family=Gaussian(),
|
||||
offset=None, kernel='bisquare', fixed=False, fb=False, constant=True):
|
||||
self.coords = coords
|
||||
self.y = y
|
||||
self.x_loc = x_loc
|
||||
if x_glob is not None:
|
||||
self.x_glob = x_glob
|
||||
else:
|
||||
self.x_glob = []
|
||||
self.family=family
|
||||
self.fixed = fixed
|
||||
self.kernel = kernel
|
||||
if offset is None:
|
||||
self.offset = np.ones((len(y), 1))
|
||||
else:
|
||||
self.offset = offset * 1.0
|
||||
self.fb = fb
|
||||
self.constant = constant
|
||||
|
||||
def search(self, search='golden_section', criterion='AICc', bw_min=0.0,
|
||||
bw_max=0.0, interval=0.0, tol=1.0e-6, max_iter=200, init_fb=True,
|
||||
tol_fb=1.0e-5, rss_score=False, max_iter_fb=200):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
criterion : string
|
||||
bw selection criterion: 'AICc', 'AIC', 'BIC', 'CV'
|
||||
search : string
|
||||
bw search method: 'golden', 'interval'
|
||||
bw_min : float
|
||||
min value used in bandwidth search
|
||||
bw_max : float
|
||||
max value used in bandwidth search
|
||||
interval : float
|
||||
interval increment used in interval search
|
||||
tol : float
|
||||
tolerance used to determine convergence
|
||||
max_iter : integer
|
||||
max iterations if no convergence to tol
|
||||
init_fb : True to initialize flexible bandwidth search with
|
||||
esitmates from a traditional GWR and False to
|
||||
initialize flexible bandwidth search with global
|
||||
regression estimates
|
||||
tol_fb : convergence tolerence for the flexible bandwidth
|
||||
backfitting algorithm; a larger tolerance may stop the
|
||||
algorith faster though it may result in a less optimal
|
||||
model
|
||||
max_iter_fb : max iterations if no convergence to tol for flexible
|
||||
bandwidth backfittign algorithm
|
||||
rss_score : True to use the residual sum of sqaures to evaluate
|
||||
each iteration of the flexible bandwidth backfitting
|
||||
routine and False to use a smooth function; default is
|
||||
False
|
||||
|
||||
Returns
|
||||
-------
|
||||
bw : scalar or array
|
||||
optimal bandwidth value or values; returns scalar for
|
||||
fb=False and array for fb=True; ordering of bandwidths
|
||||
matches the ordering of the covariates (columns) of the
|
||||
designs matrix, X
|
||||
"""
|
||||
self.search = search
|
||||
self.criterion = criterion
|
||||
self.bw_min = bw_min
|
||||
self.bw_max = bw_max
|
||||
self.interval = interval
|
||||
self.tol = tol
|
||||
self.max_iter = max_iter
|
||||
self.init_fb = init_fb
|
||||
self.tol_fb = tol_fb
|
||||
self.rss_score = rss_score
|
||||
self.max_iter_fb = max_iter_fb
|
||||
|
||||
|
||||
if self.fixed:
|
||||
if self.kernel == 'gaussian':
|
||||
ktype = 1
|
||||
elif self.kernel == 'bisquare':
|
||||
ktype = 3
|
||||
elif self.kernel == 'exponential':
|
||||
ktype = 5
|
||||
else:
|
||||
raise TypeError('Unsupported kernel function ', self.kernel)
|
||||
else:
|
||||
if self.kernel == 'gaussian':
|
||||
ktype = 2
|
||||
elif self.kernel == 'bisquare':
|
||||
ktype = 4
|
||||
elif self.kernel == 'exponential':
|
||||
ktype = 6
|
||||
else:
|
||||
raise TypeError('Unsupported kernel function ', self.kernel)
|
||||
|
||||
function = lambda bw: getDiag[criterion](
|
||||
GWR(self.coords, self.y, self.x_loc, bw, family=self.family,
|
||||
kernel=self.kernel, fixed=self.fixed, offset=self.offset).fit())
|
||||
|
||||
if ktype % 2 == 0:
|
||||
int_score = True
|
||||
else:
|
||||
int_score = False
|
||||
self.int_score = int_score
|
||||
|
||||
if self.fb:
|
||||
self._fbw()
|
||||
print self.bw[1]
|
||||
self.XB = self.bw[4]
|
||||
self.err = self.bw[5]
|
||||
else:
|
||||
self._bw()
|
||||
|
||||
return self.bw[0]
|
||||
|
||||
def _bw(self):
|
||||
gwr_func = lambda bw: getDiag[self.criterion](
|
||||
GWR(self.coords, self.y, self.x_loc, bw, family=self.family,
|
||||
kernel=self.kernel, fixed=self.fixed, constant=self.constant).fit())
|
||||
if self.search == 'golden_section':
|
||||
a,c = self._init_section(self.x_glob, self.x_loc, self.coords,
|
||||
self.constant)
|
||||
delta = 0.38197 #1 - (np.sqrt(5.0)-1.0)/2.0
|
||||
self.bw = golden_section(a, c, delta, gwr_func, self.tol,
|
||||
self.max_iter, self.int_score)
|
||||
elif self.search == 'interval':
|
||||
self.bw = equal_interval(self.bw_min, self.bw_max, self.interval,
|
||||
gwr_func, self.int_score)
|
||||
else:
|
||||
raise TypeError('Unsupported computational search method ', search)
|
||||
|
||||
def _fbw(self):
|
||||
y = self.y
|
||||
if self.constant:
|
||||
X = USER.check_constant(self.x_loc)
|
||||
else:
|
||||
X = self.x_loc
|
||||
n, k = X.shape
|
||||
family = self.family
|
||||
offset = self.offset
|
||||
kernel = self.kernel
|
||||
fixed = self.fixed
|
||||
coords = self.coords
|
||||
search = self.search
|
||||
criterion = self.criterion
|
||||
bw_min = self.bw_min
|
||||
bw_max = self.bw_max
|
||||
interval = self.interval
|
||||
tol = self.tol
|
||||
max_iter = self.max_iter
|
||||
gwr_func = lambda y, X, bw: GWR(coords, y, X, bw, family=family,
|
||||
kernel=kernel, fixed=fixed, offset=offset, constant=False).fit()
|
||||
bw_func = lambda y, X: Sel_BW(coords, y, X, x_glob=[], family=family,
|
||||
kernel=kernel, fixed=fixed, offset=offset, constant=False)
|
||||
sel_func = lambda bw_func: bw_func.search(search=search,
|
||||
criterion=criterion, bw_min=bw_min, bw_max=bw_max,
|
||||
interval=interval, tol=tol, max_iter=max_iter)
|
||||
self.bw = flexible_bw(self.init_fb, y, X, n, k, family, self.tol_fb,
|
||||
self.max_iter_fb, self.rss_score, gwr_func, bw_func, sel_func)
|
||||
|
||||
|
||||
|
||||
def _init_section(self, x_glob, x_loc, coords, constant):
|
||||
if len(x_glob) > 0:
|
||||
n_glob = x_glob.shape[1]
|
||||
else:
|
||||
n_glob = 0
|
||||
if len(x_loc) > 0:
|
||||
n_loc = x_loc.shape[1]
|
||||
else:
|
||||
n_loc = 0
|
||||
if constant:
|
||||
n_vars = n_glob + n_loc + 1
|
||||
else:
|
||||
n_vars = n_glob + n_loc
|
||||
n = np.array(coords).shape[0]
|
||||
|
||||
if self.int_score:
|
||||
a = 40 + 2 * n_vars
|
||||
c = n
|
||||
else:
|
||||
nn = 40 + 2 * n_vars
|
||||
sq_dists = squareform(pdist(coords))
|
||||
sort_dists = np.sort(sq_dists, axis=1)
|
||||
min_dists = sort_dists[:,nn-1]
|
||||
max_dists = sort_dists[:,-1]
|
||||
a = np.min(min_dists)/2.0
|
||||
c = np.max(max_dists)/2.0
|
||||
|
||||
if a < self.bw_min:
|
||||
a = self.bw_min
|
||||
if c > self.bw_max and self.bw_max > 0:
|
||||
c = self.bw_max
|
||||
return a, c
|
@ -0,0 +1,785 @@
|
||||
"""
|
||||
GWR is tested against results from GWR4
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import pickle as pk
|
||||
from pysal.contrib.gwr.gwr import GWR, FBGWR
|
||||
from pysal.contrib.gwr.diagnostics import get_AICc, get_AIC, get_BIC, get_CV
|
||||
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
|
||||
import numpy as np
|
||||
import pysal
|
||||
|
||||
class TestGWRGaussian(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
||||
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
||||
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
||||
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
||||
black = np.array(data.by_col('PctBlack')).reshape((-1,1))
|
||||
self.X = np.hstack([rural, pov, black])
|
||||
self.BS_F = pysal.open(pysal.examples.get_path('georgia_BS_F_listwise.csv'))
|
||||
self.BS_NN = pysal.open(pysal.examples.get_path('georgia_BS_NN_listwise.csv'))
|
||||
self.GS_F = pysal.open(pysal.examples.get_path('georgia_GS_F_listwise.csv'))
|
||||
self.GS_NN = pysal.open(pysal.examples.get_path('georgia_GS_NN_listwise.csv'))
|
||||
self.FB = pk.load(open(pysal.examples.get_path('FB.p'), 'r'))
|
||||
self.XB = pk.load(open(pysal.examples.get_path('XB.p'), 'r'))
|
||||
self.err = pk.load(open(pysal.examples.get_path('err.p'), 'r'))
|
||||
|
||||
def test_BS_F(self):
|
||||
est_Int = self.BS_F.by_col(' est_Intercept')
|
||||
se_Int = self.BS_F.by_col(' se_Intercept')
|
||||
t_Int = self.BS_F.by_col(' t_Intercept')
|
||||
est_rural = self.BS_F.by_col(' est_PctRural')
|
||||
se_rural = self.BS_F.by_col(' se_PctRural')
|
||||
t_rural = self.BS_F.by_col(' t_PctRural')
|
||||
est_pov = self.BS_F.by_col(' est_PctPov')
|
||||
se_pov = self.BS_F.by_col(' se_PctPov')
|
||||
t_pov = self.BS_F.by_col(' t_PctPov')
|
||||
est_black = self.BS_F.by_col(' est_PctBlack')
|
||||
se_black = self.BS_F.by_col(' se_PctBlack')
|
||||
t_black = self.BS_F.by_col(' t_PctBlack')
|
||||
yhat = self.BS_F.by_col(' yhat')
|
||||
res = np.array(self.BS_F.by_col(' residual'))
|
||||
std_res = np.array(self.BS_F.by_col(' std_residual')).reshape((-1,1))
|
||||
localR2 = np.array(self.BS_F.by_col(' localR2')).reshape((-1,1))
|
||||
inf = np.array(self.BS_F.by_col(' influence')).reshape((-1,1))
|
||||
cooksD = np.array(self.BS_F.by_col(' CooksD')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=209267.689, fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 894.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 890.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 944.0)
|
||||
self.assertAlmostEquals(np.round(CV,2), 18.25)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
|
||||
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
|
||||
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
|
||||
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
|
||||
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
|
||||
|
||||
def test_BS_NN(self):
|
||||
est_Int = self.BS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.BS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.BS_NN.by_col(' t_Intercept')
|
||||
est_rural = self.BS_NN.by_col(' est_PctRural')
|
||||
se_rural = self.BS_NN.by_col(' se_PctRural')
|
||||
t_rural = self.BS_NN.by_col(' t_PctRural')
|
||||
est_pov = self.BS_NN.by_col(' est_PctPov')
|
||||
se_pov = self.BS_NN.by_col(' se_PctPov')
|
||||
t_pov = self.BS_NN.by_col(' t_PctPov')
|
||||
est_black = self.BS_NN.by_col(' est_PctBlack')
|
||||
se_black = self.BS_NN.by_col(' se_PctBlack')
|
||||
t_black = self.BS_NN.by_col(' t_PctBlack')
|
||||
yhat = self.BS_NN.by_col(' yhat')
|
||||
res = np.array(self.BS_NN.by_col(' residual'))
|
||||
std_res = np.array(self.BS_NN.by_col(' std_residual')).reshape((-1,1))
|
||||
localR2 = np.array(self.BS_NN.by_col(' localR2')).reshape((-1,1))
|
||||
inf = np.array(self.BS_NN.by_col(' influence')).reshape((-1,1))
|
||||
cooksD = np.array(self.BS_NN.by_col(' CooksD')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=90.000, fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 896.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 892.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 941.0)
|
||||
self.assertAlmostEquals(np.around(CV, 2), 19.19)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
|
||||
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
|
||||
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
|
||||
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
|
||||
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
|
||||
|
||||
def test_GS_F(self):
|
||||
est_Int = self.GS_F.by_col(' est_Intercept')
|
||||
se_Int = self.GS_F.by_col(' se_Intercept')
|
||||
t_Int = self.GS_F.by_col(' t_Intercept')
|
||||
est_rural = self.GS_F.by_col(' est_PctRural')
|
||||
se_rural = self.GS_F.by_col(' se_PctRural')
|
||||
t_rural = self.GS_F.by_col(' t_PctRural')
|
||||
est_pov = self.GS_F.by_col(' est_PctPov')
|
||||
se_pov = self.GS_F.by_col(' se_PctPov')
|
||||
t_pov = self.GS_F.by_col(' t_PctPov')
|
||||
est_black = self.GS_F.by_col(' est_PctBlack')
|
||||
se_black = self.GS_F.by_col(' se_PctBlack')
|
||||
t_black = self.GS_F.by_col(' t_PctBlack')
|
||||
yhat = self.GS_F.by_col(' yhat')
|
||||
res = np.array(self.GS_F.by_col(' residual'))
|
||||
std_res = np.array(self.GS_F.by_col(' std_residual')).reshape((-1,1))
|
||||
localR2 = np.array(self.GS_F.by_col(' localR2')).reshape((-1,1))
|
||||
inf = np.array(self.GS_F.by_col(' influence')).reshape((-1,1))
|
||||
cooksD = np.array(self.GS_F.by_col(' CooksD')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=87308.298,
|
||||
kernel='gaussian', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 895.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 890.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 943.0)
|
||||
self.assertAlmostEquals(np.around(CV, 2), 18.21)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
|
||||
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
|
||||
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
|
||||
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
|
||||
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
|
||||
|
||||
def test_GS_NN(self):
|
||||
est_Int = self.GS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.GS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.GS_NN.by_col(' t_Intercept')
|
||||
est_rural = self.GS_NN.by_col(' est_PctRural')
|
||||
se_rural = self.GS_NN.by_col(' se_PctRural')
|
||||
t_rural = self.GS_NN.by_col(' t_PctRural')
|
||||
est_pov = self.GS_NN.by_col(' est_PctPov')
|
||||
se_pov = self.GS_NN.by_col(' se_PctPov')
|
||||
t_pov = self.GS_NN.by_col(' t_PctPov')
|
||||
est_black = self.GS_NN.by_col(' est_PctBlack')
|
||||
se_black = self.GS_NN.by_col(' se_PctBlack')
|
||||
t_black = self.GS_NN.by_col(' t_PctBlack')
|
||||
yhat = self.GS_NN.by_col(' yhat')
|
||||
res = np.array(self.GS_NN.by_col(' residual'))
|
||||
std_res = np.array(self.GS_NN.by_col(' std_residual')).reshape((-1,1))
|
||||
localR2 = np.array(self.GS_NN.by_col(' localR2')).reshape((-1,1))
|
||||
inf = np.array(self.GS_NN.by_col(' influence')).reshape((-1,1))
|
||||
cooksD = np.array(self.GS_NN.by_col(' CooksD')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=49.000,
|
||||
kernel='gaussian', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 896)
|
||||
self.assertAlmostEquals(np.floor(AIC), 894.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 922.0)
|
||||
self.assertAlmostEquals(np.around(CV, 2), 17.91)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
|
||||
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
|
||||
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
|
||||
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
|
||||
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
|
||||
|
||||
def test_FBGWR(self):
|
||||
model = FBGWR(self.coords, self.y, self.X, [157.0, 65.0, 52.0],
|
||||
XB=self.XB, err=self.err, constant=False)
|
||||
rslt = model.fit()
|
||||
|
||||
np.testing.assert_allclose(rslt.predy, self.FB['predy'], atol=1e-07)
|
||||
np.testing.assert_allclose(rslt.params, self.FB['params'], atol=1e-07)
|
||||
np.testing.assert_allclose(rslt.resid_response, self.FB['u'], atol=1e-05)
|
||||
np.testing.assert_almost_equal(rslt.resid_ss, 6339.3497144025841)
|
||||
|
||||
class TestGWRPoisson(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('Tokyomortality.csv'), mode='Ur')
|
||||
self.coords = zip(data.by_col('X_CENTROID'), data.by_col('Y_CENTROID'))
|
||||
self.y = np.array(data.by_col('db2564')).reshape((-1,1))
|
||||
self.off = np.array(data.by_col('eb2564')).reshape((-1,1))
|
||||
OCC = np.array(data.by_col('OCC_TEC')).reshape((-1,1))
|
||||
OWN = np.array(data.by_col('OWNH')).reshape((-1,1))
|
||||
POP = np.array(data.by_col('POP65')).reshape((-1,1))
|
||||
UNEMP = np.array(data.by_col('UNEMP')).reshape((-1,1))
|
||||
self.X = np.hstack([OCC,OWN,POP,UNEMP])
|
||||
self.BS_F = pysal.open(pysal.examples.get_path('tokyo_BS_F_listwise.csv'))
|
||||
self.BS_NN = pysal.open(pysal.examples.get_path('tokyo_BS_NN_listwise.csv'))
|
||||
self.GS_F = pysal.open(pysal.examples.get_path('tokyo_GS_F_listwise.csv'))
|
||||
self.GS_NN = pysal.open(pysal.examples.get_path('tokyo_GS_NN_listwise.csv'))
|
||||
self.BS_NN_OFF = pysal.open(pysal.examples.get_path('tokyo_BS_NN_OFF_listwise.csv'))
|
||||
|
||||
def test_BS_F(self):
|
||||
est_Int = self.BS_F.by_col(' est_Intercept')
|
||||
se_Int = self.BS_F.by_col(' se_Intercept')
|
||||
t_Int = self.BS_F.by_col(' t_Intercept')
|
||||
est_OCC = self.BS_F.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.BS_F.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.BS_F.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.BS_F.by_col(' est_OWNH')
|
||||
se_OWN = self.BS_F.by_col(' se_OWNH')
|
||||
t_OWN = self.BS_F.by_col(' t_OWNH')
|
||||
est_POP = self.BS_F.by_col(' est_POP65')
|
||||
se_POP = self.BS_F.by_col(' se_POP65')
|
||||
t_POP = self.BS_F.by_col(' t_POP65')
|
||||
est_UNEMP = self.BS_F.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.BS_F.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.BS_F.by_col(' t_UNEMP')
|
||||
yhat = self.BS_F.by_col(' yhat')
|
||||
pdev = np.array(self.BS_F.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=26029.625, family=Poisson(),
|
||||
kernel='bisquare', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 13294.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 13247.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 13485.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-05)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-03)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-03)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-03)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-03)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
|
||||
def test_BS_NN(self):
|
||||
est_Int = self.BS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.BS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.BS_NN.by_col(' t_Intercept')
|
||||
est_OCC = self.BS_NN.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.BS_NN.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.BS_NN.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.BS_NN.by_col(' est_OWNH')
|
||||
se_OWN = self.BS_NN.by_col(' se_OWNH')
|
||||
t_OWN = self.BS_NN.by_col(' t_OWNH')
|
||||
est_POP = self.BS_NN.by_col(' est_POP65')
|
||||
se_POP = self.BS_NN.by_col(' se_POP65')
|
||||
t_POP = self.BS_NN.by_col(' t_POP65')
|
||||
est_UNEMP = self.BS_NN.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.BS_NN.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.BS_NN.by_col(' t_UNEMP')
|
||||
yhat = self.BS_NN.by_col(' yhat')
|
||||
pdev = np.array(self.BS_NN.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(),
|
||||
kernel='bisquare', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 13285)
|
||||
self.assertAlmostEquals(np.floor(AIC), 13259.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 13442.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_BS_NN_Offset(self):
|
||||
est_Int = self.BS_NN_OFF.by_col(' est_Intercept')
|
||||
se_Int = self.BS_NN_OFF.by_col(' se_Intercept')
|
||||
t_Int = self.BS_NN_OFF.by_col(' t_Intercept')
|
||||
est_OCC = self.BS_NN_OFF.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.BS_NN_OFF.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.BS_NN_OFF.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.BS_NN_OFF.by_col(' est_OWNH')
|
||||
se_OWN = self.BS_NN_OFF.by_col(' se_OWNH')
|
||||
t_OWN = self.BS_NN_OFF.by_col(' t_OWNH')
|
||||
est_POP = self.BS_NN_OFF.by_col(' est_POP65')
|
||||
se_POP = self.BS_NN_OFF.by_col(' se_POP65')
|
||||
t_POP = self.BS_NN_OFF.by_col(' t_POP65')
|
||||
est_UNEMP = self.BS_NN_OFF.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.BS_NN_OFF.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.BS_NN_OFF.by_col(' t_UNEMP')
|
||||
yhat = self.BS_NN_OFF.by_col(' yhat')
|
||||
pdev = np.array(self.BS_NN_OFF.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=100, offset=self.off, family=Poisson(),
|
||||
kernel='bisquare', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 367.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 361.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 451.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-02,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02, atol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02, atol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02, atol=1e-02)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-03,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02, atol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-03, atol=1e-02)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-04, atol=1e-02)
|
||||
|
||||
def test_GS_F(self):
|
||||
est_Int = self.GS_F.by_col(' est_Intercept')
|
||||
se_Int = self.GS_F.by_col(' se_Intercept')
|
||||
t_Int = self.GS_F.by_col(' t_Intercept')
|
||||
est_OCC = self.GS_F.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.GS_F.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.GS_F.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.GS_F.by_col(' est_OWNH')
|
||||
se_OWN = self.GS_F.by_col(' se_OWNH')
|
||||
t_OWN = self.GS_F.by_col(' t_OWNH')
|
||||
est_POP = self.GS_F.by_col(' est_POP65')
|
||||
se_POP = self.GS_F.by_col(' se_POP65')
|
||||
t_POP = self.GS_F.by_col(' t_POP65')
|
||||
est_UNEMP = self.GS_F.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.GS_F.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.GS_F.by_col(' t_UNEMP')
|
||||
yhat = self.GS_F.by_col(' yhat')
|
||||
pdev = np.array(self.GS_F.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=8764.474, family=Poisson(),
|
||||
kernel='gaussian', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 11283.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 11211.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 11497.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_GS_NN(self):
|
||||
est_Int = self.GS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.GS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.GS_NN.by_col(' t_Intercept')
|
||||
est_OCC = self.GS_NN.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.GS_NN.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.GS_NN.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.GS_NN.by_col(' est_OWNH')
|
||||
se_OWN = self.GS_NN.by_col(' se_OWNH')
|
||||
t_OWN = self.GS_NN.by_col(' t_OWNH')
|
||||
est_POP = self.GS_NN.by_col(' est_POP65')
|
||||
se_POP = self.GS_NN.by_col(' se_POP65')
|
||||
t_POP = self.GS_NN.by_col(' t_POP65')
|
||||
est_UNEMP = self.GS_NN.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.GS_NN.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.GS_NN.by_col(' t_UNEMP')
|
||||
yhat = self.GS_NN.by_col(' yhat')
|
||||
pdev = np.array(self.GS_NN.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(),
|
||||
kernel='gaussian', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 21070.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 21069.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 21111.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
class TestGWRBinomial(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('landslides.csv'))
|
||||
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||
self.y = np.array(data.by_col('Landslid')).reshape((-1,1))
|
||||
ELEV = np.array(data.by_col('Elev')).reshape((-1,1))
|
||||
SLOPE = np.array(data.by_col('Slope')).reshape((-1,1))
|
||||
SIN = np.array(data.by_col('SinAspct')).reshape((-1,1))
|
||||
COS = np.array(data.by_col('CosAspct')).reshape((-1,1))
|
||||
SOUTH = np.array(data.by_col('AbsSouth')).reshape((-1,1))
|
||||
DIST = np.array(data.by_col('DistStrm')).reshape((-1,1))
|
||||
self.X = np.hstack([ELEV, SLOPE, SIN, COS, SOUTH, DIST])
|
||||
self.BS_F = pysal.open(pysal.examples.get_path('clearwater_BS_F_listwise.csv'))
|
||||
self.BS_NN = pysal.open(pysal.examples.get_path('clearwater_BS_NN_listwise.csv'))
|
||||
self.GS_F = pysal.open(pysal.examples.get_path('clearwater_GS_F_listwise.csv'))
|
||||
self.GS_NN = pysal.open(pysal.examples.get_path('clearwater_GS_NN_listwise.csv'))
|
||||
|
||||
def test_BS_F(self):
|
||||
est_Int = self.BS_F.by_col(' est_Intercept')
|
||||
se_Int = self.BS_F.by_col(' se_Intercept')
|
||||
t_Int = self.BS_F.by_col(' t_Intercept')
|
||||
est_elev = self.BS_F.by_col(' est_Elev')
|
||||
se_elev = self.BS_F.by_col(' se_Elev')
|
||||
t_elev = self.BS_F.by_col(' t_Elev')
|
||||
est_slope = self.BS_F.by_col(' est_Slope')
|
||||
se_slope = self.BS_F.by_col(' se_Slope')
|
||||
t_slope = self.BS_F.by_col(' t_Slope')
|
||||
est_sin = self.BS_F.by_col(' est_SinAspct')
|
||||
se_sin = self.BS_F.by_col(' se_SinAspct')
|
||||
t_sin = self.BS_F.by_col(' t_SinAspct')
|
||||
est_cos = self.BS_F.by_col(' est_CosAspct')
|
||||
se_cos = self.BS_F.by_col(' se_CosAspct')
|
||||
t_cos = self.BS_F.by_col(' t_CosAspct')
|
||||
est_south = self.BS_F.by_col(' est_AbsSouth')
|
||||
se_south = self.BS_F.by_col(' se_AbsSouth')
|
||||
t_south = self.BS_F.by_col(' t_AbsSouth')
|
||||
est_strm = self.BS_F.by_col(' est_DistStrm')
|
||||
se_strm = self.BS_F.by_col(' se_DistStrm')
|
||||
t_strm = self.BS_F.by_col(' t_DistStrm')
|
||||
yhat = self.BS_F.by_col(' yhat')
|
||||
pdev = np.array(self.BS_F.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=19642.170, family=Binomial(),
|
||||
kernel='bisquare', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 275.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 271.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 349.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
|
||||
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
|
||||
#This test fails - likely due to compound rounding errors
|
||||
#Has been tested using statsmodels.family calculations and
|
||||
#code from Jing's python version, which both yield the same
|
||||
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_BS_NN(self):
|
||||
est_Int = self.BS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.BS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.BS_NN.by_col(' t_Intercept')
|
||||
est_elev = self.BS_NN.by_col(' est_Elev')
|
||||
se_elev = self.BS_NN.by_col(' se_Elev')
|
||||
t_elev = self.BS_NN.by_col(' t_Elev')
|
||||
est_slope = self.BS_NN.by_col(' est_Slope')
|
||||
se_slope = self.BS_NN.by_col(' se_Slope')
|
||||
t_slope = self.BS_NN.by_col(' t_Slope')
|
||||
est_sin = self.BS_NN.by_col(' est_SinAspct')
|
||||
se_sin = self.BS_NN.by_col(' se_SinAspct')
|
||||
t_sin = self.BS_NN.by_col(' t_SinAspct')
|
||||
est_cos = self.BS_NN.by_col(' est_CosAspct')
|
||||
se_cos = self.BS_NN.by_col(' se_CosAspct')
|
||||
t_cos = self.BS_NN.by_col(' t_CosAspct')
|
||||
est_south = self.BS_NN.by_col(' est_AbsSouth')
|
||||
se_south = self.BS_NN.by_col(' se_AbsSouth')
|
||||
t_south = self.BS_NN.by_col(' t_AbsSouth')
|
||||
est_strm = self.BS_NN.by_col(' est_DistStrm')
|
||||
se_strm = self.BS_NN.by_col(' se_DistStrm')
|
||||
t_strm = self.BS_NN.by_col(' t_DistStrm')
|
||||
yhat = self.BS_NN.by_col(' yhat')
|
||||
pdev = self.BS_NN.by_col(' localpdev')
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=158, family=Binomial(),
|
||||
kernel='bisquare', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 277.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 271.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 358.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e03)
|
||||
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
|
||||
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e03)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
|
||||
#This test fails - likely due to compound rounding errors
|
||||
#Has been tested using statsmodels.family calculations and
|
||||
#code from Jing's python version, which both yield the same
|
||||
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_GS_F(self):
|
||||
est_Int = self.GS_F.by_col(' est_Intercept')
|
||||
se_Int = self.GS_F.by_col(' se_Intercept')
|
||||
t_Int = self.GS_F.by_col(' t_Intercept')
|
||||
est_elev = self.GS_F.by_col(' est_Elev')
|
||||
se_elev = self.GS_F.by_col(' se_Elev')
|
||||
t_elev = self.GS_F.by_col(' t_Elev')
|
||||
est_slope = self.GS_F.by_col(' est_Slope')
|
||||
se_slope = self.GS_F.by_col(' se_Slope')
|
||||
t_slope = self.GS_F.by_col(' t_Slope')
|
||||
est_sin = self.GS_F.by_col(' est_SinAspct')
|
||||
se_sin = self.GS_F.by_col(' se_SinAspct')
|
||||
t_sin = self.GS_F.by_col(' t_SinAspct')
|
||||
est_cos = self.GS_F.by_col(' est_CosAspct')
|
||||
se_cos = self.GS_F.by_col(' se_CosAspct')
|
||||
t_cos = self.GS_F.by_col(' t_CosAspct')
|
||||
est_south = self.GS_F.by_col(' est_AbsSouth')
|
||||
se_south = self.GS_F.by_col(' se_AbsSouth')
|
||||
t_south = self.GS_F.by_col(' t_AbsSouth')
|
||||
est_strm = self.GS_F.by_col(' est_DistStrm')
|
||||
se_strm = self.GS_F.by_col(' se_DistStrm')
|
||||
t_strm = self.GS_F.by_col(' t_DistStrm')
|
||||
yhat = self.GS_F.by_col(' yhat')
|
||||
pdev = self.GS_F.by_col(' localpdev')
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=8929.061, family=Binomial(),
|
||||
kernel='gaussian', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 276.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 272.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 341.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
|
||||
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
|
||||
#This test fails - likely due to compound rounding errors
|
||||
#Has been tested using statsmodels.family calculations and
|
||||
#code from Jing's python version, which both yield the same
|
||||
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_GS_NN(self):
|
||||
est_Int = self.GS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.GS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.GS_NN.by_col(' t_Intercept')
|
||||
est_elev = self.GS_NN.by_col(' est_Elev')
|
||||
se_elev = self.GS_NN.by_col(' se_Elev')
|
||||
t_elev = self.GS_NN.by_col(' t_Elev')
|
||||
est_slope = self.GS_NN.by_col(' est_Slope')
|
||||
se_slope = self.GS_NN.by_col(' se_Slope')
|
||||
t_slope = self.GS_NN.by_col(' t_Slope')
|
||||
est_sin = self.GS_NN.by_col(' est_SinAspct')
|
||||
se_sin = self.GS_NN.by_col(' se_SinAspct')
|
||||
t_sin = self.GS_NN.by_col(' t_SinAspct')
|
||||
est_cos = self.GS_NN.by_col(' est_CosAspct')
|
||||
se_cos = self.GS_NN.by_col(' se_CosAspct')
|
||||
t_cos = self.GS_NN.by_col(' t_CosAspct')
|
||||
est_south = self.GS_NN.by_col(' est_AbsSouth')
|
||||
se_south = self.GS_NN.by_col(' se_AbsSouth')
|
||||
t_south = self.GS_NN.by_col(' t_AbsSouth')
|
||||
est_strm = self.GS_NN.by_col(' est_DistStrm')
|
||||
se_strm = self.GS_NN.by_col(' se_DistStrm')
|
||||
t_strm = self.GS_NN.by_col(' t_DistStrm')
|
||||
yhat = self.GS_NN.by_col(' yhat')
|
||||
pdev = self.GS_NN.by_col(' localpdev')
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=64, family=Binomial(),
|
||||
kernel='gaussian', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 276.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 273.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 331.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
|
||||
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-00)
|
||||
#This test fails - likely due to compound rounding errors
|
||||
#Has been tested using statsmodels.family calculations and
|
||||
#code from Jing's python version, which both yield the same
|
||||
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,84 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pysal
|
||||
from pysal.contrib.gwr.kernels import *
|
||||
|
||||
PEGP = pysal.examples.get_path
|
||||
|
||||
class TestKernels(unittest.TestCase):
|
||||
def setUp(self):
|
||||
np.random.seed(1234)
|
||||
x = np.arange(1,6)
|
||||
y = np.arange(5,0, -1)
|
||||
np.random.shuffle(x)
|
||||
np.random.shuffle(y)
|
||||
self.coords = np.array(zip(x, y))
|
||||
self.fix_gauss_kern = np.array([
|
||||
[ 1. , 0.38889556, 0.48567179, 0.48567179, 0.89483932],
|
||||
[ 0.38889556, 1. , 0.89483932, 0.64118039, 0.48567179],
|
||||
[ 0.48567179, 0.89483932, 1. , 0.89483932, 0.48567179],
|
||||
[ 0.48567179, 0.64118039, 0.89483932, 1. , 0.38889556],
|
||||
[ 0.89483932, 0.48567179, 0.48567179, 0.38889556, 1. ]])
|
||||
self.adapt_gauss_kern = np.array([
|
||||
[ 1. , 0.52004183, 0.60653072, 0.60653072, 0.92596109],
|
||||
[ 0.34559083, 1. , 0.88249692, 0.60653072, 0.44374738],
|
||||
[ 0.03877423, 0.60653072, 1. , 0.60653072, 0.03877423],
|
||||
[ 0.44374738, 0.60653072, 0.88249692, 1. , 0.34559083],
|
||||
[ 0.92596109, 0.60653072, 0.60653072, 0.52004183, 1. ]])
|
||||
self.fix_bisquare_kern = np.array([
|
||||
[ 1. , 0. , 0. , 0. , 0.60493827],
|
||||
[ 0. , 1. , 0.60493827, 0.01234568, 0. ],
|
||||
[ 0. , 0.60493827, 1. , 0.60493827, 0. ],
|
||||
[ 0. , 0.01234568, 0.60493827, 1. , 0. ],
|
||||
[ 0.60493827, 0. , 0. , 0. , 1. ]])
|
||||
self.adapt_bisquare_kern = np.array([
|
||||
[ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00,
|
||||
3.99999881e-14, 7.15976383e-01],
|
||||
[ 0.00000000e+00, 1.00000000e+00, 5.62500075e-01,
|
||||
3.99999881e-14, 0.00000000e+00],
|
||||
[ 0.00000000e+00, 3.99999881e-14, 1.00000000e+00,
|
||||
3.99999881e-14, 0.00000000e+00],
|
||||
[ 0.00000000e+00, 3.99999881e-14, 5.62500075e-01,
|
||||
1.00000000e+00, 0.00000000e+00],
|
||||
[ 7.15976383e-01, 0.00000000e+00, 3.99999881e-14,
|
||||
0.00000000e+00, 1.00000000e+00]])
|
||||
self.fix_exp_kern = np.array([
|
||||
[ 1. , 0.2529993 , 0.30063739, 0.30063739, 0.62412506],
|
||||
[ 0.2529993 , 1. , 0.62412506, 0.38953209, 0.30063739],
|
||||
[ 0.30063739, 0.62412506, 1. , 0.62412506, 0.30063739],
|
||||
[ 0.30063739, 0.38953209, 0.62412506, 1. , 0.2529993 ],
|
||||
[ 0.62412506, 0.30063739, 0.30063739, 0.2529993 , 1. ]])
|
||||
self.adapt_exp_kern = np.array([
|
||||
[ 1. , 0.31868771, 0.36787948, 0.36787948, 0.67554721],
|
||||
[ 0.23276223, 1. , 0.60653069, 0.36787948, 0.27949951],
|
||||
[ 0.07811997, 0.36787948, 1. , 0.36787948, 0.07811997],
|
||||
[ 0.27949951, 0.36787948, 0.60653069, 1. , 0.23276223],
|
||||
[ 0.67554721, 0.36787948, 0.36787948, 0.31868771, 1. ]])
|
||||
|
||||
def test_fix_gauss(self):
|
||||
kern = fix_gauss(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.fix_gauss_kern)
|
||||
|
||||
def test_adapt_gauss(self):
|
||||
kern = adapt_gauss(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.adapt_gauss_kern)
|
||||
|
||||
def test_fix_biqsquare(self):
|
||||
kern = fix_bisquare(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.fix_bisquare_kern,
|
||||
atol=1e-01)
|
||||
|
||||
def test_adapt_bisqaure(self):
|
||||
kern = adapt_bisquare(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.adapt_bisquare_kern, atol=1e-012)
|
||||
|
||||
def test_fix_exp(self):
|
||||
kern = fix_exp(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.fix_exp_kern)
|
||||
|
||||
def test_adapt_exp(self):
|
||||
kern = adapt_exp(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.adapt_exp_kern)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,139 @@
|
||||
|
||||
"""
|
||||
GWR is tested against results from GWR4
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import pickle as pk
|
||||
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
|
||||
from pysal.contrib.gwr.sel_bw import Sel_BW
|
||||
import numpy as np
|
||||
import pysal
|
||||
|
||||
class TestSelBW(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
||||
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
||||
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
||||
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
||||
black = np.array(data.by_col('PctBlack')).reshape((-1,1))
|
||||
self.X = np.hstack([rural, pov, black])
|
||||
self.XB = pk.load(open(pysal.examples.get_path('XB.p'), 'r'))
|
||||
self.err = pk.load(open(pysal.examples.get_path('err.p'), 'r'))
|
||||
|
||||
def test_golden_fixed_AICc(self):
|
||||
bw1 = 211027.34
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
|
||||
fixed=True).search(criterion='AICc')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_adapt_AICc(self):
|
||||
bw1 = 93.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
|
||||
fixed=False).search(criterion='AICc')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_fixed_AIC(self):
|
||||
bw1 = 76169.15
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='AIC')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_adapt_AIC(self):
|
||||
bw1 = 50.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='AIC')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_fixed_BIC(self):
|
||||
bw1 = 279451.43
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='BIC')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_adapt_BIC(self):
|
||||
bw1 = 62.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='BIC')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_fixed_CV(self):
|
||||
bw1 = 130406.67
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='CV')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_adapt_CV(self):
|
||||
bw1 = 68.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='CV')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_fixed_AICc(self):
|
||||
bw1 = 211025.0#211027.00
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
|
||||
fixed=True).search(criterion='AICc', search='interval', bw_min=211001.,
|
||||
bw_max=211035.0, interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_adapt_AICc(self):
|
||||
bw1 = 93.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
|
||||
fixed=False).search(criterion='AICc', search='interval',
|
||||
bw_min=90.0, bw_max=95.0, interval=1)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_fixed_AIC(self):
|
||||
bw1 = 76175.0#76169.00
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='AIC', search='interval',
|
||||
bw_min=76161.0, bw_max=76175.0, interval=1)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_adapt_AIC(self):
|
||||
bw1 = 40.0#50.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='AIC', search='interval', bw_min=40.0,
|
||||
bw_max=60.0, interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_fixed_BIC(self):
|
||||
bw1 = 279461.0#279451.00
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='BIC', search='interval', bw_min=279441.0,
|
||||
bw_max=279461.0, interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_adapt_BIC(self):
|
||||
bw1 = 62.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='BIC', search='interval',
|
||||
bw_min=52.0, bw_max=72.0, interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_fixed_CV(self):
|
||||
bw1 = 130400.0#130406.00
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='CV', search='interval', bw_min=130400.0,
|
||||
bw_max=130410.0, interval=1)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_adapt_CV(self):
|
||||
bw1 = 62.0#68.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='CV', search='interval', bw_min=60.0,
|
||||
bw_max=76.0 , interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_FBGWR_AIC(self):
|
||||
bw1 = [157.0, 65.0, 52.0]
|
||||
sel = Sel_BW(self.coords, self.y, self.X, fb=True, kernel='bisquare',
|
||||
constant=False)
|
||||
bw2 = sel.search(tol_fb=1e-03)
|
||||
np.testing.assert_allclose(bw1, bw2)
|
||||
np.testing.assert_allclose(sel.XB, self.XB, atol=1e-05)
|
||||
np.testing.assert_allclose(sel.err, self.err, atol=1e-05)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
42
src/py/crankshaft/crankshaft/regression/gwr/gwr.py
Normal file
42
src/py/crankshaft/crankshaft/regression/gwr/gwr.py
Normal file
@ -0,0 +1,42 @@
|
||||
import numpy as np
|
||||
from base.gwr import GWR
|
||||
from base.sel_bw import Sel_BW
|
||||
|
||||
def gwr(subquery, dep_var, ind_vars, fixed=False, kernel='bisquare'):
|
||||
"""
|
||||
subquery: 'select * from interesting_table'
|
||||
dep_var: 'pctbachelor'
|
||||
ind_vars: ['intercept', 'pctpov', 'pctrural', 'pctblack']
|
||||
fixed: False (kNN) or True ('distance')
|
||||
kernel: 'bisquare' (default), or 'exponential', 'gaussian'
|
||||
"""
|
||||
|
||||
query_result = subquery
|
||||
rowid = np.array(query_result[0]['rowid'])
|
||||
|
||||
x = np.array(query_result[0]['x'])
|
||||
y = np.array(query_result[0]['y'])
|
||||
coords = zip(x,y)
|
||||
|
||||
Y = query_result[0]['dep'].reshape((-1,1))
|
||||
n = Y.shape[0]
|
||||
k = len(ind_vars)
|
||||
X = np.zeros((n, k))
|
||||
|
||||
for attr in range(0,k):
|
||||
attr_name = 'attr' + str(attr+1)
|
||||
X[:, attr] = np.array(query_result[0][attr_name]).flatten()
|
||||
|
||||
bw = Sel_BW(coords, Y, X, fixed=fixed, kernel=kernel).search()
|
||||
model = GWR(coords, Y, X, bw, fixed=fixed, kernel=kernel).fit()
|
||||
|
||||
coefficients = model.params.reshape((-1,))
|
||||
t_vals = model.tvalues.reshape((-1,))
|
||||
stand_errs = model.bse.reshape((-1))
|
||||
predicted = np.repeat(model.predy.reshape((-1,)), k+1)
|
||||
residuals = np.repeat(model.resid_response.reshape((-1,)), k+1)
|
||||
r_squared = np.tile(model.localR2.reshape((-1,)), k+1)
|
||||
rowid = np.tile(rowid, k+1).reshape((-1,))
|
||||
var_name = np.tile(ind_vars, k+1).reshape((-1,))
|
||||
|
||||
return zip(coefficients, stand_errs, t_vals, predicted, residuals, r_squared, rowid, var_name)
|
Loading…
Reference in New Issue
Block a user