add glm/gwr base code; start gwr crankshaft func

This commit is contained in:
Taylor Oshan 2016-11-22 16:10:13 -07:00
parent 538ab9a071
commit 01c9195ea5
27 changed files with 10244 additions and 0 deletions

View File

@ -0,0 +1,444 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Import GLM and pysal\n",
"import os\n",
"import numpy as np\n",
"os.chdir('/Users/toshan/dev/pysal/pysal/contrib/glm')\n",
"from glm import GLM\n",
"import pysal\n",
"import pandas as pd\n",
"import statsmodels.formula.api as smf\n",
"import statsmodels.api as sm\n",
"from family import Gaussian, Binomial, Poisson, QuasiPoisson\n",
"\n",
"from statsmodels.api import families"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Prepare some test data - columbus example\n",
"db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')\n",
"y = np.array(db.by_col(\"HOVAL\"))\n",
"y = np.reshape(y, (49,1))\n",
"X = []\n",
"#X.append(np.ones(len(y)))\n",
"X.append(db.by_col(\"INC\"))\n",
"X.append(db.by_col(\"CRIME\"))\n",
"X = np.array(X).T"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 46.42818268]\n",
" [ 0.62898397]\n",
" [ -0.48488854]]\n"
]
}
],
"source": [
"#First fit pysal OLS model\n",
"from pysal.spreg import ols\n",
"OLS = ols.OLS(y, X)\n",
"print OLS.betas"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'family.Gaussian'>\n",
"<class 'family.Gaussian'>\n",
"<class 'family.Gaussian'>\n",
"[ 46.42818268 0.62898397 -0.48488854]\n",
"[ 46.42818268 0.62898397 -0.48488854]\n"
]
}
],
"source": [
"#Then fit Gaussian GLM\n",
"\n",
"#create Gaussian GLM model object\n",
"model = GLM(y, X, Gaussian())\n",
"model\n",
"\n",
"#Fit model to estimate coefficients and return GLMResults object\n",
"results = model.fit()\n",
"\n",
"#Check coefficients - R betas [46.4282, 0.6290, -0.4849]\n",
"print results.params\n",
"\n",
"# Gaussian GLM results from statsmodels\n",
"sm_model = smf.GLM(y, sm.add_constant(X), family=families.Gaussian())\n",
"sm_results = sm_model.fit()\n",
"print sm_results.params"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 2\n",
"<class 'family.Gaussian'>\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"<class 'family.Gaussian'>\n",
"<class 'family.Gaussian'>\n",
"<class 'family.Gaussian'>\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n"
]
}
],
"source": [
"print results.df_model, sm_results.df_model\n",
"print np.allclose(results.aic, sm_results.aic)\n",
"print np.allclose(results.bic, sm_results.bic)\n",
"print np.allclose(results.deviance, sm_results.deviance)\n",
"print np.allclose(results.df_model, sm_results.df_model)\n",
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
"print np.allclose(results.llf, sm_results.llf)\n",
"print np.allclose(results.mu, sm_results.mu)\n",
"print np.allclose(results.n, sm_results.nobs)\n",
"print np.allclose(results.null, sm_results.null)\n",
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
"print np.allclose(results.params, sm_results.params)\n",
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
"print np.allclose(results.scale, sm_results.scale)\n",
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
"print np.allclose(results.bse, sm_results.bse)\n",
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
"print np.allclose(results.tvalues, sm_results.tvalues)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'family.Poisson'>\n",
"<class 'family.Poisson'>\n",
"<class 'family.Poisson'>\n",
"[ 3.92159085 0.01183491 -0.01371397]\n",
"[ 3.92159085 0.01183491 -0.01371397]\n"
]
}
],
"source": [
"#Now fit a Poisson GLM \n",
"\n",
"poisson_y = np.round(y).astype(int)\n",
"\n",
"#create Poisson GLM model object\n",
"model = GLM(poisson_y, X, Poisson())\n",
"model\n",
"\n",
"#Fit model to estimate coefficients and return GLMResults object\n",
"results = model.fit()\n",
"\n",
"#Check coefficients - R betas [3.91926, 0.01198, -0.01371]\n",
"print results.params.T\n",
"\n",
"# Poisson GLM results from statsmodels\n",
"sm_results = smf.GLM(poisson_y, sm.add_constant(X), family=families.Poisson()).fit()\n",
"print sm_results.params"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'family.Poisson'>\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"<class 'family.Poisson'>\n",
"<class 'family.Poisson'>\n",
"<class 'family.Poisson'>\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"[ 0.13049161 0.00511599 0.00193769] [ 0.13049161 0.00511599 0.00193769]\n"
]
}
],
"source": [
"print np.allclose(results.aic, sm_results.aic)\n",
"print np.allclose(results.bic, sm_results.bic)\n",
"print np.allclose(results.deviance, sm_results.deviance)\n",
"print np.allclose(results.df_model, sm_results.df_model)\n",
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
"print np.allclose(results.llf, sm_results.llf)\n",
"print np.allclose(results.mu, sm_results.mu)\n",
"print np.allclose(results.n, sm_results.nobs)\n",
"print np.allclose(results.null, sm_results.null)\n",
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
"print np.allclose(results.params, sm_results.params)\n",
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
"print np.allclose(results.scale, sm_results.scale)\n",
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
"print np.allclose(results.bse, sm_results.bse)\n",
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
"print np.allclose(results.tvalues, sm_results.tvalues)\n",
"print results.bse, sm_results.bse"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-5.33638276 0.0287754 ]\n",
"[-5.33638276 0.0287754 ]\n"
]
}
],
"source": [
"#Now fit a binomial GLM\n",
"londonhp = pd.read_csv('/Users/toshan/projects/londonhp.csv')\n",
"#londonhp = pd.read_csv('/Users/qszhao/Dropbox/pysal/pysal/contrib/gwr/londonhp.csv')\n",
"y = londonhp['BATH2'].values\n",
"y = np.reshape(y, (316,1))\n",
"X = londonhp['FLOORSZ'].values\n",
"X = np.reshape(X, (316,1))\n",
"\n",
"#create logistic GLM model object\n",
"model = GLM(y, X, Binomial())\n",
"model\n",
"\n",
"#Fit model to estimate coefficients and return GLMResults object\n",
"results = model.fit()\n",
"\n",
"#Check coefficients - R betas [-5.33638, 0.02878]\n",
"print results.params.T\n",
"\n",
"# Logistic GLM results from statsmodels\n",
"sm_results = smf.GLM(y, sm.add_constant(X), family=families.Binomial()).fit()\n",
"print sm_results.params"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 1\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n",
"True\n"
]
}
],
"source": [
"print results.df_model, sm_results.df_model\n",
"print np.allclose(results.aic, sm_results.aic)\n",
"print np.allclose(results.bic, sm_results.bic)\n",
"print np.allclose(results.deviance, sm_results.deviance)\n",
"print np.allclose(results.df_model, sm_results.df_model)\n",
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
"print np.allclose(results.llf, sm_results.llf)\n",
"print np.allclose(results.mu, sm_results.mu)\n",
"print np.allclose(results.n, sm_results.nobs)\n",
"print np.allclose(results.null, sm_results.null)\n",
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
"print np.allclose(results.params, sm_results.params)\n",
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
"print np.allclose(results.scale, sm_results.scale)\n",
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
"print np.allclose(results.bse, sm_results.bse)\n",
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
"print np.allclose(results.tvalues, sm_results.tvalues)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'family.QuasiPoisson'>\n",
"<class 'family.QuasiPoisson'>\n",
"<class 'family.QuasiPoisson'>\n"
]
}
],
"source": [
"#create QUasiPoisson GLM model object\n",
"model = GLM(poisson_y, X, QuasiPoisson())\n",
"model\n",
"\n",
"#Fit model to estimate coefficients and return GLMResults object\n",
"results = model.fit()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,4 @@
import glm
import family
import utils
import iwls

View File

@ -0,0 +1,959 @@
from __future__ import print_function
import numpy as np
from scipy import stats
from utils import cache_readonly
class Results(object):
"""
Class to contain model results
Parameters
----------
model : class instance
the previously specified model instance
params : array
parameter estimates from the fit model
"""
def __init__(self, model, params, **kwd):
self.__dict__.update(kwd)
self.initialize(model, params, **kwd)
self._data_attr = []
def initialize(self, model, params, **kwd):
self.params = params
self.model = model
if hasattr(model, 'k_constant'):
self.k_constant = model.k_constant
def predict(self, exog=None, transform=True, *args, **kwargs):
"""
Call self.model.predict with self.params as the first argument.
Parameters
----------
exog : array-like, optional
The values for which you want to predict.
transform : bool, optional
If the model was fit via a formula, do you want to pass
exog through the formula. Default is True. E.g., if you fit
a model y ~ log(x1) + log(x2), and transform is True, then
you can pass a data structure that contains x1 and x2 in
their original form. Otherwise, you'd need to log the data
first.
args, kwargs :
Some models can take additional arguments or keywords, see the
predict method of the model for the details.
Returns
-------
prediction : ndarray or pandas.Series
See self.model.predict
"""
if transform and hasattr(self.model, 'formula') and exog is not None:
from patsy import dmatrix
exog = dmatrix(self.model.data.design_info.builder,
exog)
if exog is not None:
exog = np.asarray(exog)
if exog.ndim == 1 and (self.model.exog.ndim == 1 or
self.model.exog.shape[1] == 1):
exog = exog[:, None]
exog = np.atleast_2d(exog) # needed in count model shape[1]
return self.model.predict(self.params, exog, *args, **kwargs)
#TODO: public method?
class LikelihoodModelResults(Results):
"""
Class to contain results from likelihood models
Parameters
-----------
model : LikelihoodModel instance or subclass instance
LikelihoodModelResults holds a reference to the model that is fit.
params : 1d array_like
parameter estimates from estimated model
normalized_cov_params : 2d array
Normalized (before scaling) covariance of params. (dot(X.T,X))**-1
scale : float
For (some subset of models) scale will typically be the
mean square error from the estimated model (sigma^2)
Returns
-------
**Attributes**
mle_retvals : dict
Contains the values returned from the chosen optimization method if
full_output is True during the fit. Available only if the model
is fit by maximum likelihood. See notes below for the output from
the different methods.
mle_settings : dict
Contains the arguments passed to the chosen optimization method.
Available if the model is fit by maximum likelihood. See
LikelihoodModel.fit for more information.
model : model instance
LikelihoodResults contains a reference to the model that is fit.
params : ndarray
The parameters estimated for the model.
scale : float
The scaling factor of the model given during instantiation.
tvalues : array
The t-values of the standard errors.
Notes
-----
The covariance of params is given by scale times normalized_cov_params.
Return values by solver if full_output is True during fit:
'newton'
fopt : float
The value of the (negative) loglikelihood at its
minimum.
iterations : int
Number of iterations performed.
score : ndarray
The score vector at the optimum.
Hessian : ndarray
The Hessian at the optimum.
warnflag : int
1 if maxiter is exceeded. 0 if successful convergence.
converged : bool
True: converged. False: did not converge.
allvecs : list
List of solutions at each iteration.
'nm'
fopt : float
The value of the (negative) loglikelihood at its
minimum.
iterations : int
Number of iterations performed.
warnflag : int
1: Maximum number of function evaluations made.
2: Maximum number of iterations reached.
converged : bool
True: converged. False: did not converge.
allvecs : list
List of solutions at each iteration.
'bfgs'
fopt : float
Value of the (negative) loglikelihood at its minimum.
gopt : float
Value of gradient at minimum, which should be near 0.
Hinv : ndarray
value of the inverse Hessian matrix at minimum. Note
that this is just an approximation and will often be
different from the value of the analytic Hessian.
fcalls : int
Number of calls to loglike.
gcalls : int
Number of calls to gradient/score.
warnflag : int
1: Maximum number of iterations exceeded. 2: Gradient
and/or function calls are not changing.
converged : bool
True: converged. False: did not converge.
allvecs : list
Results at each iteration.
'lbfgs'
fopt : float
Value of the (negative) loglikelihood at its minimum.
gopt : float
Value of gradient at minimum, which should be near 0.
fcalls : int
Number of calls to loglike.
warnflag : int
Warning flag:
- 0 if converged
- 1 if too many function evaluations or too many iterations
- 2 if stopped for another reason
converged : bool
True: converged. False: did not converge.
'powell'
fopt : float
Value of the (negative) loglikelihood at its minimum.
direc : ndarray
Current direction set.
iterations : int
Number of iterations performed.
fcalls : int
Number of calls to loglike.
warnflag : int
1: Maximum number of function evaluations. 2: Maximum number
of iterations.
converged : bool
True : converged. False: did not converge.
allvecs : list
Results at each iteration.
'cg'
fopt : float
Value of the (negative) loglikelihood at its minimum.
fcalls : int
Number of calls to loglike.
gcalls : int
Number of calls to gradient/score.
warnflag : int
1: Maximum number of iterations exceeded. 2: Gradient and/
or function calls not changing.
converged : bool
True: converged. False: did not converge.
allvecs : list
Results at each iteration.
'ncg'
fopt : float
Value of the (negative) loglikelihood at its minimum.
fcalls : int
Number of calls to loglike.
gcalls : int
Number of calls to gradient/score.
hcalls : int
Number of calls to hessian.
warnflag : int
1: Maximum number of iterations exceeded.
converged : bool
True: converged. False: did not converge.
allvecs : list
Results at each iteration.
"""
# by default we use normal distribution
# can be overwritten by instances or subclasses
use_t = False
def __init__(self, model, params, normalized_cov_params=None, scale=1.,
**kwargs):
super(LikelihoodModelResults, self).__init__(model, params)
self.normalized_cov_params = normalized_cov_params
self.scale = scale
# robust covariance
# We put cov_type in kwargs so subclasses can decide in fit whether to
# use this generic implementation
if 'use_t' in kwargs:
use_t = kwargs['use_t']
if use_t is not None:
self.use_t = use_t
if 'cov_type' in kwargs:
cov_type = kwargs.get('cov_type', 'nonrobust')
cov_kwds = kwargs.get('cov_kwds', {})
if cov_type == 'nonrobust':
self.cov_type = 'nonrobust'
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
'covariance matrix of the errors is correctly ' +
'specified.'}
else:
from statsmodels.base.covtype import get_robustcov_results
if cov_kwds is None:
cov_kwds = {}
use_t = self.use_t
# TODO: we shouldn't need use_t in get_robustcov_results
get_robustcov_results(self, cov_type=cov_type, use_self=True,
use_t=use_t, **cov_kwds)
def normalized_cov_params(self):
raise NotImplementedError
def _get_robustcov_results(self, cov_type='nonrobust', use_self=True,
use_t=None, **cov_kwds):
from statsmodels.base.covtype import get_robustcov_results
if cov_kwds is None:
cov_kwds = {}
if cov_type == 'nonrobust':
self.cov_type = 'nonrobust'
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
'covariance matrix of the errors is correctly ' +
'specified.'}
else:
# TODO: we shouldn't need use_t in get_robustcov_results
get_robustcov_results(self, cov_type=cov_type, use_self=True,
use_t=use_t, **cov_kwds)
@cache_readonly
def llf(self):
return self.model.loglike(self.params)
@cache_readonly
def bse(self):
return np.sqrt(np.diag(self.cov_params()))
@cache_readonly
def tvalues(self):
"""
Return the t-statistic for a given parameter estimate.
"""
return self.params / self.bse
@cache_readonly
def pvalues(self):
if self.use_t:
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
return stats.t.sf(np.abs(self.tvalues), df_resid)*2
else:
return stats.norm.sf(np.abs(self.tvalues))*2
def cov_params(self, r_matrix=None, column=None, scale=None, cov_p=None,
other=None):
"""
Returns the variance/covariance matrix.
The variance/covariance matrix can be of a linear contrast
of the estimates of params or all params multiplied by scale which
will usually be an estimate of sigma^2. Scale is assumed to be
a scalar.
Parameters
----------
r_matrix : array-like
Can be 1d, or 2d. Can be used alone or with other.
column : array-like, optional
Must be used on its own. Can be 0d or 1d see below.
scale : float, optional
Can be specified or not. Default is None, which means that
the scale argument is taken from the model.
other : array-like, optional
Can be used when r_matrix is specified.
Returns
-------
cov : ndarray
covariance matrix of the parameter estimates or of linear
combination of parameter estimates. See Notes.
Notes
-----
(The below are assumed to be in matrix notation.)
If no argument is specified returns the covariance matrix of a model
``(scale)*(X.T X)^(-1)``
If contrast is specified it pre and post-multiplies as follows
``(scale) * r_matrix (X.T X)^(-1) r_matrix.T``
If contrast and other are specified returns
``(scale) * r_matrix (X.T X)^(-1) other.T``
If column is specified returns
``(scale) * (X.T X)^(-1)[column,column]`` if column is 0d
OR
``(scale) * (X.T X)^(-1)[column][:,column]`` if column is 1d
"""
if (hasattr(self, 'mle_settings') and
self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
dot_fun = nan_dot
else:
dot_fun = np.dot
if (cov_p is None and self.normalized_cov_params is None and
not hasattr(self, 'cov_params_default')):
raise ValueError('need covariance of parameters for computing '
'(unnormalized) covariances')
if column is not None and (r_matrix is not None or other is not None):
raise ValueError('Column should be specified without other '
'arguments.')
if other is not None and r_matrix is None:
raise ValueError('other can only be specified with r_matrix')
if cov_p is None:
if hasattr(self, 'cov_params_default'):
cov_p = self.cov_params_default
else:
if scale is None:
scale = self.scale
cov_p = self.normalized_cov_params * scale
if column is not None:
column = np.asarray(column)
if column.shape == ():
return cov_p[column, column]
else:
#return cov_p[column][:, column]
return cov_p[column[:, None], column]
elif r_matrix is not None:
r_matrix = np.asarray(r_matrix)
if r_matrix.shape == ():
raise ValueError("r_matrix should be 1d or 2d")
if other is None:
other = r_matrix
else:
other = np.asarray(other)
tmp = dot_fun(r_matrix, dot_fun(cov_p, np.transpose(other)))
return tmp
else: # if r_matrix is None and column is None:
return cov_p
#TODO: make sure this works as needed for GLMs
def t_test(self, r_matrix, cov_p=None, scale=None,
use_t=None):
"""
Compute a t-test for a each linear hypothesis of the form Rb = q
Parameters
----------
r_matrix : array-like, str, tuple
- array : If an array is given, a p x k 2d array or length k 1d
array specifying the linear restrictions. It is assumed
that the linear combination is equal to zero.
- str : The full hypotheses to test can be given as a string.
See the examples.
- tuple : A tuple of arrays in the form (R, q). If q is given,
can be either a scalar or a length p row vector.
cov_p : array-like, optional
An alternative estimate for the parameter covariance matrix.
If None is given, self.normalized_cov_params is used.
scale : float, optional
An optional `scale` to use. Default is the scale specified
by the model fit.
use_t : bool, optional
If use_t is None, then the default of the model is used.
If use_t is True, then the p-values are based on the t
distribution.
If use_t is False, then the p-values are based on the normal
distribution.
Returns
-------
res : ContrastResults instance
The results for the test are attributes of this results instance.
The available results have the same elements as the parameter table
in `summary()`.
Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm
>>> data = sm.datasets.longley.load()
>>> data.exog = sm.add_constant(data.exog)
>>> results = sm.OLS(data.endog, data.exog).fit()
>>> r = np.zeros_like(results.params)
>>> r[5:] = [1,-1]
>>> print(r)
[ 0. 0. 0. 0. 0. 1. -1.]
r tests that the coefficients on the 5th and 6th independent
variable are the same.
>>> T_test = results.t_test(r)
>>> print(T_test)
<T contrast: effect=-1829.2025687192481, sd=455.39079425193762,
t=-4.0167754636411717, p=0.0015163772380899498, df_denom=9>
>>> T_test.effect
-1829.2025687192481
>>> T_test.sd
455.39079425193762
>>> T_test.tvalue
-4.0167754636411717
>>> T_test.pvalue
0.0015163772380899498
Alternatively, you can specify the hypothesis tests using a string
>>> from statsmodels.formula.api import ols
>>> dta = sm.datasets.longley.load_pandas().data
>>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
>>> results = ols(formula, dta).fit()
>>> hypotheses = 'GNPDEFL = GNP, UNEMP = 2, YEAR/1829 = 1'
>>> t_test = results.t_test(hypotheses)
>>> print(t_test)
See Also
---------
tvalues : individual t statistics
f_test : for F tests
patsy.DesignInfo.linear_constraint
"""
from patsy import DesignInfo
names = self.model.data.param_names
LC = DesignInfo(names).linear_constraint(r_matrix)
r_matrix, q_matrix = LC.coefs, LC.constants
num_ttests = r_matrix.shape[0]
num_params = r_matrix.shape[1]
if (cov_p is None and self.normalized_cov_params is None and
not hasattr(self, 'cov_params_default')):
raise ValueError('Need covariance of parameters for computing '
'T statistics')
if num_params != self.params.shape[0]:
raise ValueError('r_matrix and params are not aligned')
if q_matrix is None:
q_matrix = np.zeros(num_ttests)
else:
q_matrix = np.asarray(q_matrix)
q_matrix = q_matrix.squeeze()
if q_matrix.size > 1:
if q_matrix.shape[0] != num_ttests:
raise ValueError("r_matrix and q_matrix must have the same "
"number of rows")
if use_t is None:
#switch to use_t false if undefined
use_t = (hasattr(self, 'use_t') and self.use_t)
_t = _sd = None
_effect = np.dot(r_matrix, self.params)
# nan_dot multiplies with the convention nan * 0 = 0
# Perform the test
if num_ttests > 1:
_sd = np.sqrt(np.diag(self.cov_params(
r_matrix=r_matrix, cov_p=cov_p)))
else:
_sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))
_t = (_effect - q_matrix) * recipr(_sd)
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
if use_t:
return ContrastResults(effect=_effect, t=_t, sd=_sd,
df_denom=df_resid)
else:
return ContrastResults(effect=_effect, statistic=_t, sd=_sd,
df_denom=df_resid,
distribution='norm')
def f_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None):
"""
Compute the F-test for a joint linear hypothesis.
This is a special case of `wald_test` that always uses the F
distribution.
Parameters
----------
r_matrix : array-like, str, or tuple
- array : An r x k array where r is the number of restrictions to
test and k is the number of regressors. It is assumed
that the linear combination is equal to zero.
- str : The full hypotheses to test can be given as a string.
See the examples.
- tuple : A tuple of arrays in the form (R, q), ``q`` can be
either a scalar or a length k row vector.
cov_p : array-like, optional
An alternative estimate for the parameter covariance matrix.
If None is given, self.normalized_cov_params is used.
scale : float, optional
Default is 1.0 for no scaling.
invcov : array-like, optional
A q x q array to specify an inverse covariance matrix based on a
restrictions matrix.
Returns
-------
res : ContrastResults instance
The results for the test are attributes of this results instance.
Examples
--------
>>> import numpy as np
>>> import statsmodels.api as sm
>>> data = sm.datasets.longley.load()
>>> data.exog = sm.add_constant(data.exog)
>>> results = sm.OLS(data.endog, data.exog).fit()
>>> A = np.identity(len(results.params))
>>> A = A[1:,:]
This tests that each coefficient is jointly statistically
significantly different from zero.
>>> print(results.f_test(A))
<F contrast: F=330.28533923463488, p=4.98403052872e-10,
df_denom=9, df_num=6>
Compare this to
>>> results.fvalue
330.2853392346658
>>> results.f_pvalue
4.98403096572e-10
>>> B = np.array(([0,0,1,-1,0,0,0],[0,0,0,0,0,1,-1]))
This tests that the coefficient on the 2nd and 3rd regressors are
equal and jointly that the coefficient on the 5th and 6th regressors
are equal.
>>> print(results.f_test(B))
<F contrast: F=9.740461873303655, p=0.00560528853174, df_denom=9,
df_num=2>
Alternatively, you can specify the hypothesis tests using a string
>>> from statsmodels.datasets import longley
>>> from statsmodels.formula.api import ols
>>> dta = longley.load_pandas().data
>>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
>>> results = ols(formula, dta).fit()
>>> hypotheses = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
>>> f_test = results.f_test(hypotheses)
>>> print(f_test)
See Also
--------
statsmodels.stats.contrast.ContrastResults
wald_test
t_test
patsy.DesignInfo.linear_constraint
Notes
-----
The matrix `r_matrix` is assumed to be non-singular. More precisely,
r_matrix (pX pX.T) r_matrix.T
is assumed invertible. Here, pX is the generalized inverse of the
design matrix of the model. There can be problems in non-OLS models
where the rank of the covariance of the noise is not full.
"""
res = self.wald_test(r_matrix, cov_p=cov_p, scale=scale,
invcov=invcov, use_f=True)
return res
#TODO: untested for GLMs?
def wald_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None,
use_f=None):
"""
Compute a Wald-test for a joint linear hypothesis.
Parameters
----------
r_matrix : array-like, str, or tuple
- array : An r x k array where r is the number of restrictions to
test and k is the number of regressors. It is assumed that the
linear combination is equal to zero.
- str : The full hypotheses to test can be given as a string.
See the examples.
- tuple : A tuple of arrays in the form (R, q), ``q`` can be
either a scalar or a length p row vector.
cov_p : array-like, optional
An alternative estimate for the parameter covariance matrix.
If None is given, self.normalized_cov_params is used.
scale : float, optional
Default is 1.0 for no scaling.
invcov : array-like, optional
A q x q array to specify an inverse covariance matrix based on a
restrictions matrix.
use_f : bool
If True, then the F-distribution is used. If False, then the
asymptotic distribution, chisquare is used. If use_f is None, then
the F distribution is used if the model specifies that use_t is True.
The test statistic is proportionally adjusted for the distribution
by the number of constraints in the hypothesis.
Returns
-------
res : ContrastResults instance
The results for the test are attributes of this results instance.
See also
--------
statsmodels.stats.contrast.ContrastResults
f_test
t_test
patsy.DesignInfo.linear_constraint
Notes
-----
The matrix `r_matrix` is assumed to be non-singular. More precisely,
r_matrix (pX pX.T) r_matrix.T
is assumed invertible. Here, pX is the generalized inverse of the
design matrix of the model. There can be problems in non-OLS models
where the rank of the covariance of the noise is not full.
"""
if use_f is None:
#switch to use_t false if undefined
use_f = (hasattr(self, 'use_t') and self.use_t)
from patsy import DesignInfo
names = self.model.data.param_names
LC = DesignInfo(names).linear_constraint(r_matrix)
r_matrix, q_matrix = LC.coefs, LC.constants
if (self.normalized_cov_params is None and cov_p is None and
invcov is None and not hasattr(self, 'cov_params_default')):
raise ValueError('need covariance of parameters for computing '
'F statistics')
cparams = np.dot(r_matrix, self.params[:, None])
J = float(r_matrix.shape[0]) # number of restrictions
if q_matrix is None:
q_matrix = np.zeros(J)
else:
q_matrix = np.asarray(q_matrix)
if q_matrix.ndim == 1:
q_matrix = q_matrix[:, None]
if q_matrix.shape[0] != J:
raise ValueError("r_matrix and q_matrix must have the same "
"number of rows")
Rbq = cparams - q_matrix
if invcov is None:
cov_p = self.cov_params(r_matrix=r_matrix, cov_p=cov_p)
if np.isnan(cov_p).max():
raise ValueError("r_matrix performs f_test for using "
"dimensions that are asymptotically "
"non-normal")
invcov = np.linalg.inv(cov_p)
if (hasattr(self, 'mle_settings') and
self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
F = nan_dot(nan_dot(Rbq.T, invcov), Rbq)
else:
F = np.dot(np.dot(Rbq.T, invcov), Rbq)
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
if use_f:
F /= J
return ContrastResults(F=F, df_denom=df_resid,
df_num=invcov.shape[0])
else:
return ContrastResults(chi2=F, df_denom=J, statistic=F,
distribution='chi2', distargs=(J,))
def wald_test_terms(self, skip_single=False, extra_constraints=None,
combine_terms=None):
"""
Compute a sequence of Wald tests for terms over multiple columns
This computes joined Wald tests for the hypothesis that all
coefficients corresponding to a `term` are zero.
`Terms` are defined by the underlying formula or by string matching.
Parameters
----------
skip_single : boolean
If true, then terms that consist only of a single column and,
therefore, refers only to a single parameter is skipped.
If false, then all terms are included.
extra_constraints : ndarray
not tested yet
combine_terms : None or list of strings
Each string in this list is matched to the name of the terms or
the name of the exogenous variables. All columns whose name
includes that string are combined in one joint test.
Returns
-------
test_result : result instance
The result instance contains `table` which is a pandas DataFrame
with the test results: test statistic, degrees of freedom and
pvalues.
Examples
--------
>>> res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
data).fit()
>>> res_ols.wald_test_terms()
<class 'statsmodels.stats.contrast.WaldTestResults'>
F P>F df constraint df denom
Intercept 279.754525 2.37985521351e-22 1 51
C(Duration, Sum) 5.367071 0.0245738436636 1 51
C(Weight, Sum) 12.432445 3.99943118767e-05 2 51
C(Duration, Sum):C(Weight, Sum) 0.176002 0.83912310946 2 51
>>> res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)",
data).fit(cov_type='HC0')
>>> wt = res_poi.wald_test_terms(skip_single=False,
combine_terms=['Duration', 'Weight'])
>>> print(wt)
chi2 P>chi2 df constraint
Intercept 15.695625 7.43960374424e-05 1
C(Weight) 16.132616 0.000313940174705 2
C(Duration) 1.009147 0.315107378931 1
C(Weight):C(Duration) 0.216694 0.897315972824 2
Duration 11.187849 0.010752286833 3
Weight 30.263368 4.32586407145e-06 4
"""
# lazy import
from collections import defaultdict
result = self
if extra_constraints is None:
extra_constraints = []
if combine_terms is None:
combine_terms = []
design_info = getattr(result.model.data.orig_exog, 'design_info', None)
if design_info is None and extra_constraints is None:
raise ValueError('no constraints, nothing to do')
identity = np.eye(len(result.params))
constraints = []
combined = defaultdict(list)
if design_info is not None:
for term in design_info.terms:
cols = design_info.slice(term)
name = term.name()
constraint_matrix = identity[cols]
# check if in combined
for cname in combine_terms:
if cname in name:
combined[cname].append(constraint_matrix)
k_constraint = constraint_matrix.shape[0]
if skip_single:
if k_constraint == 1:
continue
constraints.append((name, constraint_matrix))
combined_constraints = []
for cname in combine_terms:
combined_constraints.append((cname, np.vstack(combined[cname])))
else:
# check by exog/params names if there is no formula info
for col, name in enumerate(result.model.exog_names):
constraint_matrix = identity[col]
# check if in combined
for cname in combine_terms:
if cname in name:
combined[cname].append(constraint_matrix)
if skip_single:
continue
constraints.append((name, constraint_matrix))
combined_constraints = []
for cname in combine_terms:
combined_constraints.append((cname, np.vstack(combined[cname])))
use_t = result.use_t
distribution = ['chi2', 'F'][use_t]
res_wald = []
index = []
for name, constraint in constraints + combined_constraints + extra_constraints:
wt = result.wald_test(constraint)
row = [wt.statistic.item(), wt.pvalue, constraint.shape[0]]
if use_t:
row.append(wt.df_denom)
res_wald.append(row)
index.append(name)
# distribution nerutral names
col_names = ['statistic', 'pvalue', 'df_constraint']
if use_t:
col_names.append('df_denom')
# TODO: maybe move DataFrame creation to results class
from pandas import DataFrame
table = DataFrame(res_wald, index=index, columns=col_names)
res = WaldTestResults(None, distribution, None, table=table)
# TODO: remove temp again, added for testing
res.temp = constraints + combined_constraints + extra_constraints
return res
def conf_int(self, alpha=.05, cols=None, method='default'):
"""
Returns the confidence interval of the fitted parameters.
Parameters
----------
alpha : float, optional
The significance level for the confidence interval.
ie., The default `alpha` = .05 returns a 95% confidence interval.
cols : array-like, optional
`cols` specifies which confidence intervals to return
method : string
Not Implemented Yet
Method to estimate the confidence_interval.
"Default" : uses self.bse which is based on inverse Hessian for MLE
"hjjh" :
"jac" :
"boot-bse"
"boot_quant"
"profile"
Returns
--------
conf_int : array
Each row contains [lower, upper] limits of the confidence interval
for the corresponding parameter. The first column contains all
lower, the second column contains all upper limits.
Examples
--------
>>> import statsmodels.api as sm
>>> data = sm.datasets.longley.load()
>>> data.exog = sm.add_constant(data.exog)
>>> results = sm.OLS(data.endog, data.exog).fit()
>>> results.conf_int()
array([[-5496529.48322745, -1467987.78596704],
[ -177.02903529, 207.15277984],
[ -0.1115811 , 0.03994274],
[ -3.12506664, -0.91539297],
[ -1.5179487 , -0.54850503],
[ -0.56251721, 0.460309 ],
[ 798.7875153 , 2859.51541392]])
>>> results.conf_int(cols=(2,3))
array([[-0.1115811 , 0.03994274],
[-3.12506664, -0.91539297]])
Notes
-----
The confidence interval is based on the standard normal distribution.
Models wish to use a different distribution should overwrite this
method.
"""
bse = self.bse
if self.use_t:
dist = stats.t
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
q = dist.ppf(1 - alpha / 2, df_resid)
else:
dist = stats.norm
q = dist.ppf(1 - alpha / 2)
if cols is None:
lower = self.params - q * bse
upper = self.params + q * bse
else:
cols = np.asarray(cols)
lower = self.params[cols] - q * bse[cols]
upper = self.params[cols] + q * bse[cols]
return np.asarray(lzip(lower, upper))
def save(self, fname, remove_data=False):
'''
save a pickle of this instance
Parameters
----------
fname : string or filehandle
fname can be a string to a file path or filename, or a filehandle.
remove_data : bool
If False (default), then the instance is pickled without changes.
If True, then all arrays with length nobs are set to None before
pickling. See the remove_data method.
In some cases not all arrays will be set to None.
Notes
-----
If remove_data is true and the model result does not implement a
remove_data method then this will raise an exception.
'''
from statsmodels.iolib.smpickle import save_pickle
if remove_data:
self.remove_data()
save_pickle(self, fname)
@classmethod
def load(cls, fname):
'''
load a pickle, (class method)
Parameters
----------
fname : string or filehandle
fname can be a string to a file path or filename, or a filehandle.
Returns
-------
unpickled instance
'''
from statsmodels.iolib.smpickle import load_pickle
return load_pickle(fname)
def remove_data(self):
'''remove data arrays, all nobs arrays from result and model
This reduces the size of the instance, so it can be pickled with less
memory. Currently tested for use with predict from an unpickled
results and model instance.
.. warning:: Since data and some intermediate results have been removed
calculating new statistics that require them will raise exceptions.
The exception will occur the first time an attribute is accessed
that has been set to None.
Not fully tested for time series models, tsa, and might delete too much
for prediction or not all that would be possible.
The list of arrays to delete is maintained as an attribute of the
result and model instance, except for cached values. These lists could
be changed before calling remove_data.
'''
def wipe(obj, att):
#get to last element in attribute path
p = att.split('.')
att_ = p.pop(-1)
try:
obj_ = reduce(getattr, [obj] + p)
#print(repr(obj), repr(att))
#print(hasattr(obj_, att_))
if hasattr(obj_, att_):
#print('removing3', att_)
setattr(obj_, att_, None)
except AttributeError:
pass
model_attr = ['model.' + i for i in self.model._data_attr]
for att in self._data_attr + model_attr:
#print('removing', att)
wipe(self, att)
data_in_cache = getattr(self, 'data_in_cache', [])
data_in_cache += ['fittedvalues', 'resid', 'wresid']
for key in data_in_cache:
try:
self._cache[key] = None
except (AttributeError, KeyError):
pass
def lzip(*args, **kwargs):
return list(zip(*args, **kwargs))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,326 @@
import numpy as np
import numpy.linalg as la
from pysal.spreg.utils import RegressionPropsY, spdot
import pysal.spreg.user_output as USER
from utils import cache_readonly
from base import LikelihoodModelResults
import family
from iwls import iwls
__all__ = ['GLM']
class GLM(RegressionPropsY):
"""
Generalised linear models. Can currently estimate Guassian, Poisson and
Logisitc regression coefficients. GLM object prepares model input and fit
method performs estimation which then returns a GLMResults object.
Parameters
----------
y : array
n*1, dependent variable.
X : array
n*k, independent variable, exlcuding the constant.
family : string
Model type: 'Gaussian', 'Poisson', 'Binomial'
Attributes
----------
y : array
n*1, dependent variable.
X : array
n*k, independent variable, including constant.
family : string
Model type: 'Gaussian', 'Poisson', 'logistic'
n : integer
Number of observations
k : integer
Number of independent variables
df_model : float
k-1, where k is the number of variables (including
intercept)
df_residual : float
observations minus variables (n-k)
mean_y : float
Mean of y
std_y : float
Standard deviation of y
fit_params : dict
Parameters passed into fit method to define estimation
routine.
normalized_cov_params : array
k*k, approximates [X.T*X]-1
"""
def __init__(self, y, X, family=family.Gaussian(), constant=True):
"""
Initialize class
"""
self.n = USER.check_arrays(y, X)
USER.check_y(y, self.n)
self.y = y
if constant:
self.X = USER.check_constant(X)
else:
self.X = X
self.family = family
self.k = self.X.shape[1]
self.fit_params = {}
def fit(self, ini_betas=None, tol=1.0e-6, max_iter=200, solve='iwls'):
"""
Method that fits a model with a particular estimation routine.
Parameters
----------
ini_betas : array
k*1, initial coefficient values, including constant.
Default is None, which calculates initial values during
estimation.
tol: float
Tolerence for estimation convergence.
max_iter : integer
Maximum number of iterations if convergence not
achieved.
solve :string
Technique to solve MLE equations.
'iwls' = iteratively (re)weighted least squares (default)
"""
self.fit_params['ini_betas'] = ini_betas
self.fit_params['tol'] = tol
self.fit_params['max_iter'] = max_iter
self.fit_params['solve']=solve
if solve.lower() == 'iwls':
params, predy, w, n_iter = iwls(self.y, self.X, self.family,
ini_betas=ini_betas, tol=tol, max_iter=max_iter)
self.fit_params['n_iter'] = n_iter
return GLMResults(self, params.flatten(), predy, w)
@cache_readonly
def df_model(self):
return self.X.shape[1] - 1
@cache_readonly
def df_resid(self):
return self.n - self.df_model - 1
class GLMResults(LikelihoodModelResults):
"""
Results of estimated GLM and diagnostics.
Parameters
----------
model : GLM object
Pointer to GLM object with estimation parameters.
params : array
k*1, estimared coefficients
mu : array
n*1, predicted y values.
w : array
n*1, final weight used for iwls
Attributes
----------
model : GLM Object
Points to GLM object for which parameters have been
estimated.
y : array
n*1, dependent variable.
x : array
n*k, independent variable, including constant.
family : string
Model type: 'Gaussian', 'Poisson', 'Logistic'
n : integer
Number of observations
k : integer
Number of independent variables
df_model : float
k-1, where k is the number of variables (including
intercept)
df_residual : float
observations minus variables (n-k)
fit_params : dict
parameters passed into fit method to define estimation
routine.
scale : float
sigma squared used for subsequent computations.
params : array
n*k, estimared beta coefficients
w : array
n*1, final weight values of x
mu : array
n*1, predicted value of y (i.e., fittedvalues)
cov_params : array
Variance covariance matrix (kxk) of betas which has been
appropriately scaled by sigma-squared
bse : array
k*1, standard errors of betas
pvalues : array
k*1, two-tailed pvalues of parameters
tvalues : array
k*1, the tvalues of the standard errors
null : array
n*1, predicted values of y for null model
deviance : float
value of the deviance function evalued at params;
see family.py for distribution-specific deviance
null_deviance : float
value of the deviance function for the model fit with
a constant as the only regressor
llf : float
value of the loglikelihood function evalued at params;
see family.py for distribution-specific loglikelihoods
llnull : float
value of log-likelihood function evaluated at null
aic : float
AIC
bic : float
BIC
D2 : float
percent deviance explained
adj_D2 : float
adjusted percent deviance explained
pseudo_R2 : float
McFadden's pseudo R2 (coefficient of determination)
adj_pseudoR2 : float
adjusted McFadden's pseudo R2
resid_response : array
response residuals; defined as y-mu
resid_pearson : array
Pearson residuals; defined as (y-mu)/sqrt(VAR(mu))
where VAR is the distribution specific variance
function; see family.py and varfuncs.py for more information.
resid_working : array
Working residuals; the working residuals are defined as
resid_response/link'(mu); see links.py for the
derivatives of the link functions.
resid_anscombe : array
Anscombe residuals; see family.py for
distribution-specific Anscombe residuals.
resid_deviance : array
deviance residuals; see family.py for
distribution-specific deviance residuals.
pearson_chi2 : float
chi-Squared statistic is defined as the sum
of the squares of the Pearson residuals
normalized_cov_params : array
k*k, approximates [X.T*X]-1
"""
def __init__(self, model, params, mu, w):
self.model = model
self.n = model.n
self.y = model.y.T.flatten()
self.X = model.X
self.k = model.k
self.family = model.family
self.fit_params = model.fit_params
self.params = params
self.w = w
self.mu = mu.flatten()
self._cache = {}
@cache_readonly
def df_model(self):
return self.model.df_model
@cache_readonly
def df_resid(self):
return self.model.df_resid
@cache_readonly
def normalized_cov_params(self):
return la.inv(spdot(self.w.T, self.w))
@cache_readonly
def resid_response(self):
return (self.y-self.mu)
@cache_readonly
def resid_pearson(self):
return ((self.y-self.mu) /
np.sqrt(self.family.variance(self.mu)))
@cache_readonly
def resid_working(self):
return (self.resid_response / self.family.link.deriv(self.mu))
@cache_readonly
def resid_anscombe(self):
return (self.family.resid_anscombe(self.y, self.mu))
@cache_readonly
def resid_deviance(self):
return (self.family.resid_dev(self.y, self.mu))
@cache_readonly
def pearson_chi2(self):
chisq = (self.y - self.mu)**2 / self.family.variance(self.mu)
chisqsum = np.sum(chisq)
return chisqsum
@cache_readonly
def null(self):
y = np.reshape(self.y, (-1,1))
model = self.model
X = np.ones((len(y), 1))
null_mod = GLM(y, X, family=self.family, constant=False)
return null_mod.fit().mu
@cache_readonly
def scale(self):
if isinstance(self.family, (family.Binomial, family.Poisson)):
return 1.
else:
return (((np.power(self.resid_response, 2) /
self.family.variance(self.mu))).sum() /
(self.df_resid))
@cache_readonly
def deviance(self):
return self.family.deviance(self.y, self.mu)
@cache_readonly
def null_deviance(self):
return self.family.deviance(self.y, self.null)
@cache_readonly
def llnull(self):
return self.family.loglike(self.y, self.null, scale=self.scale)
@cache_readonly
def llf(self):
return self.family.loglike(self.y, self.mu, scale=self.scale)
@cache_readonly
def aic(self):
if isinstance(self.family, family.QuasiPoisson):
return np.nan
else:
return -2 * self.llf + 2*(self.df_model+1)
@cache_readonly
def bic(self):
return (self.deviance -
(self.model.n - self.df_model - 1) *
np.log(self.model.n))
@cache_readonly
def D2(self):
return 1 - (self.deviance / self.null_deviance)
@cache_readonly
def adj_D2(self):
return 1.0 - (float(self.n) - 1.0)/(float(self.n) - float(self.k)) * (1.0-self.D2)
@cache_readonly
def pseudoR2(self):
return 1 - (self.llf/self.llnull)
@cache_readonly
def adj_pseudoR2(self):
return 1 - ((self.llf-self.k)/self.llnull)

View File

@ -0,0 +1,84 @@
import numpy as np
import numpy.linalg as la
from scipy import sparse as sp
from scipy.sparse import linalg as spla
from pysal.spreg.utils import spdot, spmultiply
from family import Binomial, Poisson
def _compute_betas(y, x):
"""
compute MLE coefficients using iwls routine
Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
Geographically weighted regression: the analysis of spatially varying relationships.
"""
xT = x.T
xtx = spdot(xT, x)
xtx_inv = la.inv(xtx)
xtx_inv = sp.csr_matrix(xtx_inv)
xTy = spdot(xT, y, array_out=False)
betas = spdot(xtx_inv, xTy)
return betas
def _compute_betas_gwr(y, x, wi):
"""
compute MLE coefficients using iwls routine
Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
Geographically weighted regression: the analysis of spatially varying relationships.
"""
xT = (x * wi).T
xtx = np.dot(xT, x)
xtx_inv = la.inv(xtx)
xtx_inv_xt = np.dot(xtx_inv, xT)
betas = np.dot(xtx_inv_xt, y)
return betas, xtx_inv_xt
def iwls(y, x, family, offset=1.0, ini_betas=None, tol=1.0e-8, max_iter=200, wi=None):
"""
Iteratively re-weighted least squares estimation routine
"""
n_iter = 0
diff = 1.0e6
if ini_betas is None:
betas = np.zeros((x.shape[1], 1), np.float)
else:
betas = ini_betas
if isinstance(family, Binomial):
y = family.link._clean(y)
if isinstance(family, Poisson):
y_off = y/offset
y_off = family.starting_mu(y_off)
v = family.predict(y_off)
mu = family.starting_mu(y)
else:
mu = family.starting_mu(y)
v = family.predict(mu)
while diff > tol and n_iter < max_iter:
n_iter += 1
w = family.weights(mu)
z = v + (family.link.deriv(mu)*(y-mu))
w = np.sqrt(w)
if type(x) != np.ndarray:
w = sp.csr_matrix(w)
z = sp.csr_matrix(z)
wx = spmultiply(x, w, array_out=False)
wz = spmultiply(z, w, array_out=False)
if wi is None:
n_betas = _compute_betas(wz, wx)
else:
n_betas, xtx_inv_xt = _compute_betas_gwr(wz, wx, wi)
v = spdot(x, n_betas)
mu = family.fitted(v)
if isinstance(family, Poisson):
mu = mu * offset
diff = min(abs(n_betas-betas))
betas = n_betas
if wi is None:
return betas, mu, wx, n_iter
else:
return betas, mu, v, w, z, xtx_inv_xt, n_iter

View File

@ -0,0 +1,953 @@
'''
Defines the link functions to be used with GLM and GEE families.
'''
import numpy as np
import scipy.stats
FLOAT_EPS = np.finfo(float).eps
class Link(object):
"""
A generic link function for one-parameter exponential family.
`Link` does nothing, but lays out the methods expected of any subclass.
"""
def __call__(self, p):
"""
Return the value of the link function. This is just a placeholder.
Parameters
----------
p : array-like
Probabilities
Returns
-------
g(p) : array-like
The value of the link function g(p) = z
"""
return NotImplementedError
def inverse(self, z):
"""
Inverse of the link function. Just a placeholder.
Parameters
----------
z : array-like
`z` is usually the linear predictor of the transformed variable
in the IRLS algorithm for GLM.
Returns
-------
g^(-1)(z) : array
The value of the inverse of the link function g^(-1)(z) = p
"""
return NotImplementedError
def deriv(self, p):
"""
Derivative of the link function g'(p). Just a placeholder.
Parameters
----------
p : array-like
Returns
-------
g'(p) : array
The value of the derivative of the link function g'(p)
"""
return NotImplementedError
def deriv2(self, p):
"""Second derivative of the link function g''(p)
implemented through numerical differentiation
"""
from statsmodels.tools.numdiff import approx_fprime_cs
# TODO: workaround proplem with numdiff for 1d
return np.diag(approx_fprime_cs(p, self.deriv))
def inverse_deriv(self, z):
"""
Derivative of the inverse link function g^(-1)(z).
Notes
-----
This reference implementation gives the correct result but is
inefficient, so it can be overriden in subclasses.
Parameters
----------
z : array-like
`z` is usually the linear predictor for a GLM or GEE model.
Returns
-------
g'^(-1)(z) : array
The value of the derivative of the inverse of the link function
"""
return 1 / self.deriv(self.inverse(z))
class Logit(Link):
"""
The logit transform
Notes
-----
call and derivative use a private method _clean to make trim p by
machine epsilon so that p is in (0,1)
Alias of Logit:
logit = Logit()
"""
def _clean(self, p):
"""
Clip logistic values to range (eps, 1-eps)
Parameters
-----------
p : array-like
Probabilities
Returns
--------
pclip : array
Clipped probabilities
"""
return np.clip(p, FLOAT_EPS, 1. - FLOAT_EPS)
def __call__(self, p):
"""
The logit transform
Parameters
----------
p : array-like
Probabilities
Returns
-------
z : array
Logit transform of `p`
Notes
-----
g(p) = log(p / (1 - p))
"""
p = self._clean(p)
return np.log(p / (1. - p))
def inverse(self, z):
"""
Inverse of the logit transform
Parameters
----------
z : array-like
The value of the logit transform at `p`
Returns
-------
p : array
Probabilities
Notes
-----
g^(-1)(z) = exp(z)/(1+exp(z))
"""
z = np.asarray(z)
t = np.exp(-z)
return 1. / (1. + t)
def deriv(self, p):
"""
Derivative of the logit transform
Parameters
----------
p: array-like
Probabilities
Returns
-------
g'(p) : array
Value of the derivative of logit transform at `p`
Notes
-----
g'(p) = 1 / (p * (1 - p))
Alias for `Logit`:
logit = Logit()
"""
p = self._clean(p)
return 1. / (p * (1 - p))
def inverse_deriv(self, z):
"""
Derivative of the inverse of the logit transform
Parameters
----------
z : array-like
`z` is usually the linear predictor for a GLM or GEE model.
Returns
-------
g'^(-1)(z) : array
The value of the derivative of the inverse of the logit function
"""
t = np.exp(z)
return t/(1 + t)**2
def deriv2(self, p):
"""
Second derivative of the logit function.
Parameters
----------
p : array-like
probabilities
Returns
-------
g''(z) : array
The value of the second derivative of the logit function
"""
v = p * (1 - p)
return (2*p - 1) / v**2
class logit(Logit):
pass
class Power(Link):
"""
The power transform
Parameters
----------
power : float
The exponent of the power transform
Notes
-----
Aliases of Power:
inverse = Power(power=-1)
sqrt = Power(power=.5)
inverse_squared = Power(power=-2.)
identity = Power(power=1.)
"""
def __init__(self, power=1.):
self.power = power
def __call__(self, p):
"""
Power transform link function
Parameters
----------
p : array-like
Mean parameters
Returns
-------
z : array-like
Power transform of x
Notes
-----
g(p) = x**self.power
"""
z = np.power(p, self.power)
return z
def inverse(self, z):
"""
Inverse of the power transform link function
Parameters
----------
`z` : array-like
Value of the transformed mean parameters at `p`
Returns
-------
`p` : array
Mean parameters
Notes
-----
g^(-1)(z`) = `z`**(1/`power`)
"""
p = np.power(z, 1. / self.power)
return p
def deriv(self, p):
"""
Derivative of the power transform
Parameters
----------
p : array-like
Mean parameters
Returns
--------
g'(p) : array
Derivative of power transform of `p`
Notes
-----
g'(`p`) = `power` * `p`**(`power` - 1)
"""
return self.power * np.power(p, self.power - 1)
def deriv2(self, p):
"""
Second derivative of the power transform
Parameters
----------
p : array-like
Mean parameters
Returns
--------
g''(p) : array
Second derivative of the power transform of `p`
Notes
-----
g''(`p`) = `power` * (`power` - 1) * `p`**(`power` - 2)
"""
return self.power * (self.power - 1) * np.power(p, self.power - 2)
def inverse_deriv(self, z):
"""
Derivative of the inverse of the power transform
Parameters
----------
z : array-like
`z` is usually the linear predictor for a GLM or GEE model.
Returns
-------
g^(-1)'(z) : array
The value of the derivative of the inverse of the power transform
function
"""
return np.power(z, (1 - self.power)/self.power) / self.power
class inverse_power(Power):
"""
The inverse transform
Notes
-----
g(p) = 1/p
Alias of statsmodels.family.links.Power(power=-1.)
"""
def __init__(self):
super(inverse_power, self).__init__(power=-1.)
class sqrt(Power):
"""
The square-root transform
Notes
-----
g(`p`) = sqrt(`p`)
Alias of statsmodels.family.links.Power(power=.5)
"""
def __init__(self):
super(sqrt, self).__init__(power=.5)
class inverse_squared(Power):
"""
The inverse squared transform
Notes
-----
g(`p`) = 1/(`p`\ \*\*2)
Alias of statsmodels.family.links.Power(power=2.)
"""
def __init__(self):
super(inverse_squared, self).__init__(power=-2.)
class identity(Power):
"""
The identity transform
Notes
-----
g(`p`) = `p`
Alias of statsmodels.family.links.Power(power=1.)
"""
def __init__(self):
super(identity, self).__init__(power=1.)
class Log(Link):
"""
The log transform
Notes
-----
call and derivative call a private method _clean to trim the data by
machine epsilon so that p is in (0,1). log is an alias of Log.
"""
def _clean(self, x):
return np.clip(x, FLOAT_EPS, np.inf)
def __call__(self, p, **extra):
"""
Log transform link function
Parameters
----------
x : array-like
Mean parameters
Returns
-------
z : array
log(x)
Notes
-----
g(p) = log(p)
"""
x = self._clean(p)
return np.log(x)
def inverse(self, z):
"""
Inverse of log transform link function
Parameters
----------
z : array
The inverse of the link function at `p`
Returns
-------
p : array
The mean probabilities given the value of the inverse `z`
Notes
-----
g^{-1}(z) = exp(z)
"""
return np.exp(z)
def deriv(self, p):
"""
Derivative of log transform link function
Parameters
----------
p : array-like
Mean parameters
Returns
-------
g'(p) : array
derivative of log transform of x
Notes
-----
g'(x) = 1/x
"""
p = self._clean(p)
return 1. / p
def deriv2(self, p):
"""
Second derivative of the log transform link function
Parameters
----------
p : array-like
Mean parameters
Returns
-------
g''(p) : array
Second derivative of log transform of x
Notes
-----
g''(x) = -1/x^2
"""
p = self._clean(p)
return -1. / p**2
def inverse_deriv(self, z):
"""
Derivative of the inverse of the log transform link function
Parameters
----------
z : array
The inverse of the link function at `p`
Returns
-------
g^(-1)'(z) : array
The value of the derivative of the inverse of the log function,
the exponential function
"""
return np.exp(z)
class log(Log):
"""
The log transform
Notes
-----
log is a an alias of Log.
"""
pass
# TODO: the CDFLink is untested
class CDFLink(Logit):
"""
The use the CDF of a scipy.stats distribution
CDFLink is a subclass of logit in order to use its _clean method
for the link and its derivative.
Parameters
----------
dbn : scipy.stats distribution
Default is dbn=scipy.stats.norm
Notes
-----
The CDF link is untested.
"""
def __init__(self, dbn=scipy.stats.norm):
self.dbn = dbn
def __call__(self, p):
"""
CDF link function
Parameters
----------
p : array-like
Mean parameters
Returns
-------
z : array
(ppf) inverse of CDF transform of p
Notes
-----
g(`p`) = `dbn`.ppf(`p`)
"""
p = self._clean(p)
return self.dbn.ppf(p)
def inverse(self, z):
"""
The inverse of the CDF link
Parameters
----------
z : array-like
The value of the inverse of the link function at `p`
Returns
-------
p : array
Mean probabilities. The value of the inverse of CDF link of `z`
Notes
-----
g^(-1)(`z`) = `dbn`.cdf(`z`)
"""
return self.dbn.cdf(z)
def deriv(self, p):
"""
Derivative of CDF link
Parameters
----------
p : array-like
mean parameters
Returns
-------
g'(p) : array
The derivative of CDF transform at `p`
Notes
-----
g'(`p`) = 1./ `dbn`.pdf(`dbn`.ppf(`p`))
"""
p = self._clean(p)
return 1. / self.dbn.pdf(self.dbn.ppf(p))
def deriv2(self, p):
"""
Second derivative of the link function g''(p)
implemented through numerical differentiation
"""
from statsmodels.tools.numdiff import approx_fprime
p = np.atleast_1d(p)
# Note: special function for norm.ppf does not support complex
return np.diag(approx_fprime(p, self.deriv, centered=True))
def inverse_deriv(self, z):
"""
Derivative of the inverse of the CDF transformation link function
Parameters
----------
z : array
The inverse of the link function at `p`
Returns
-------
g^(-1)'(z) : array
The value of the derivative of the inverse of the logit function
"""
return 1/self.deriv(self.inverse(z))
class probit(CDFLink):
"""
The probit (standard normal CDF) transform
Notes
--------
g(p) = scipy.stats.norm.ppf(p)
probit is an alias of CDFLink.
"""
pass
class cauchy(CDFLink):
"""
The Cauchy (standard Cauchy CDF) transform
Notes
-----
g(p) = scipy.stats.cauchy.ppf(p)
cauchy is an alias of CDFLink with dbn=scipy.stats.cauchy
"""
def __init__(self):
super(cauchy, self).__init__(dbn=scipy.stats.cauchy)
def deriv2(self, p):
"""
Second derivative of the Cauchy link function.
Parameters
----------
p: array-like
Probabilities
Returns
-------
g''(p) : array
Value of the second derivative of Cauchy link function at `p`
"""
a = np.pi * (p - 0.5)
d2 = 2 * np.pi**2 * np.sin(a) / np.cos(a)**3
return d2
class CLogLog(Logit):
"""
The complementary log-log transform
CLogLog inherits from Logit in order to have access to its _clean method
for the link and its derivative.
Notes
-----
CLogLog is untested.
"""
def __call__(self, p):
"""
C-Log-Log transform link function
Parameters
----------
p : array
Mean parameters
Returns
-------
z : array
The CLogLog transform of `p`
Notes
-----
g(p) = log(-log(1-p))
"""
p = self._clean(p)
return np.log(-np.log(1 - p))
def inverse(self, z):
"""
Inverse of C-Log-Log transform link function
Parameters
----------
z : array-like
The value of the inverse of the CLogLog link function at `p`
Returns
-------
p : array
Mean parameters
Notes
-----
g^(-1)(`z`) = 1-exp(-exp(`z`))
"""
return 1 - np.exp(-np.exp(z))
def deriv(self, p):
"""
Derivative of C-Log-Log transform link function
Parameters
----------
p : array-like
Mean parameters
Returns
-------
g'(p) : array
The derivative of the CLogLog transform link function
Notes
-----
g'(p) = - 1 / ((p-1)*log(1-p))
"""
p = self._clean(p)
return 1. / ((p - 1) * (np.log(1 - p)))
def deriv2(self, p):
"""
Second derivative of the C-Log-Log ink function
Parameters
----------
p : array-like
Mean parameters
Returns
-------
g''(p) : array
The second derivative of the CLogLog link function
"""
p = self._clean(p)
fl = np.log(1 - p)
d2 = -1 / ((1 - p)**2 * fl)
d2 *= 1 + 1 / fl
return d2
def inverse_deriv(self, z):
"""
Derivative of the inverse of the C-Log-Log transform link function
Parameters
----------
z : array-like
The value of the inverse of the CLogLog link function at `p`
Returns
-------
g^(-1)'(z) : array
The derivative of the inverse of the CLogLog link function
"""
return np.exp(z - np.exp(z))
class cloglog(CLogLog):
"""
The CLogLog transform link function.
Notes
-----
g(`p`) = log(-log(1-`p`))
cloglog is an alias for CLogLog
cloglog = CLogLog()
"""
pass
class NegativeBinomial(object):
'''
The negative binomial link function
Parameters
----------
alpha : float, optional
Alpha is the ancillary parameter of the Negative Binomial link
function. It is assumed to be nonstochastic. The default value is 1.
Permissible values are usually assumed to be in (.01, 2).
'''
def __init__(self, alpha=1.):
self.alpha = alpha
def _clean(self, x):
return np.clip(x, FLOAT_EPS, np.inf)
def __call__(self, p):
'''
Negative Binomial transform link function
Parameters
----------
p : array-like
Mean parameters
Returns
-------
z : array
The negative binomial transform of `p`
Notes
-----
g(p) = log(p/(p + 1/alpha))
'''
p = self._clean(p)
return np.log(p/(p + 1/self.alpha))
def inverse(self, z):
'''
Inverse of the negative binomial transform
Parameters
-----------
z : array-like
The value of the inverse of the negative binomial link at `p`.
Returns
-------
p : array
Mean parameters
Notes
-----
g^(-1)(z) = exp(z)/(alpha*(1-exp(z)))
'''
return -1/(self.alpha * (1 - np.exp(-z)))
def deriv(self, p):
'''
Derivative of the negative binomial transform
Parameters
----------
p : array-like
Mean parameters
Returns
-------
g'(p) : array
The derivative of the negative binomial transform link function
Notes
-----
g'(x) = 1/(x+alpha*x^2)
'''
return 1/(p + self.alpha * p**2)
def deriv2(self,p):
'''
Second derivative of the negative binomial link function.
Parameters
----------
p : array-like
Mean parameters
Returns
-------
g''(p) : array
The second derivative of the negative binomial transform link
function
Notes
-----
g''(x) = -(1+2*alpha*x)/(x+alpha*x^2)^2
'''
numer = -(1 + 2 * self.alpha * p)
denom = (p + self.alpha * p**2)**2
return numer / denom
def inverse_deriv(self, z):
'''
Derivative of the inverse of the negative binomial transform
Parameters
-----------
z : array-like
Usually the linear predictor for a GLM or GEE model
Returns
-------
g^(-1)'(z) : array
The value of the derivative of the inverse of the negative
binomial link
'''
t = np.exp(z)
return t / (self.alpha * (1-t)**2)
class nbinom(NegativeBinomial):
"""
The negative binomial link function.
Notes
-----
g(p) = log(p/(p + 1/alpha))
nbinom is an alias of NegativeBinomial.
nbinom = NegativeBinomial(alpha=1.)
"""
pass

View File

@ -0,0 +1,993 @@
"""
Tests for generalized linear models. Majority of code either directly borrowed
or closely adapted from statsmodels package. Model results verfiied using glm
function in R and GLM function in statsmodels.
"""
__author__ = 'Taylor Oshan tayoshan@gmail.com'
from pysal.contrib.glm.glm import GLM
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial, QuasiPoisson
import numpy as np
import pysal
import unittest
import math
class TestGaussian(unittest.TestCase):
"""
Tests for Poisson GLM
"""
def setUp(self):
db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')
y = np.array(db.by_col("HOVAL"))
self.y = np.reshape(y, (49,1))
X = []
X.append(db.by_col("INC"))
X.append(db.by_col("CRIME"))
self.X = np.array(X).T
def testIWLS(self):
model = GLM(self.y, self.X, family=Gaussian())
results = model.fit()
self.assertEqual(results.n, 49)
self.assertEqual(results.df_model, 2)
self.assertEqual(results.df_resid, 46)
self.assertEqual(results.aic, 408.73548964604873)
self.assertEqual(results.bic, 10467.991340493107)
self.assertEqual(results.deviance, 10647.015074206196)
self.assertEqual(results.llf, -201.36774482302437)
self.assertEqual(results.null_deviance, 16367.794631703124)
self.assertEqual(results.scale, 231.45684943926514)
np.testing.assert_allclose(results.params, [ 46.42818268, 0.62898397,
-0.48488854])
np.testing.assert_allclose(results.bse, [ 13.19175703, 0.53591045,
0.18267291])
np.testing.assert_allclose(results.cov_params(),
[[ 1.74022453e+02, -6.52060364e+00, -2.15109867e+00],
[ -6.52060364e+00, 2.87200008e-01, 6.80956787e-02],
[ -2.15109867e+00, 6.80956787e-02, 3.33693910e-02]])
np.testing.assert_allclose(results.tvalues, [ 3.51948437, 1.17367365,
-2.65440864])
np.testing.assert_allclose(results.pvalues, [ 0.00043239, 0.24052577,
0.00794475], atol=1.0e-8)
np.testing.assert_allclose(results.conf_int(),
[[ 20.57281401, 72.28355135],
[ -0.42138121, 1.67934915],
[ -0.84292086, -0.12685622]])
np.testing.assert_allclose(results.normalized_cov_params,
[[ 7.51857004e-01, -2.81720055e-02, -9.29373521e-03],
[ -2.81720055e-02, 1.24083607e-03, 2.94204638e-04],
[ -9.29373521e-03, 2.94204638e-04, 1.44171110e-04]])
np.testing.assert_allclose(results.mu,
[ 51.08752105, 50.66601521, 41.61367567, 33.53969014,
28.90638232, 43.87074227, 51.64910882, 34.92671563,
42.69267622, 38.49449134, 20.92815471, 25.25228436,
29.78223486, 25.02403635, 29.07959539, 24.63352275,
34.71372149, 33.40443052, 27.29864225, 65.86219802,
33.69854751, 37.44976435, 50.01304928, 36.81219959,
22.02674837, 31.64775955, 27.63563294, 23.7697291 ,
22.43119725, 21.76987089, 48.51169321, 49.05891819,
32.31656426, 44.20550354, 35.49244888, 51.27811308,
36.55047181, 27.37048914, 48.78812922, 57.31744163,
51.22914162, 54.70515578, 37.06622277, 44.5075759 ,
41.24328983, 49.93821824, 44.85644299, 40.93838609, 47.32045464])
self.assertEqual(results.pearson_chi2, 10647.015074206196)
np.testing.assert_allclose(results.resid_response,
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
9.90732178, 57.90551066, -1.22815371, -5.35228436,
11.91776614, 17.87596565, -11.07959539, -5.83352375,
7.03627851, 26.59556948, 3.30135775, 15.40479998,
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
10.06880275, 0.73012911, -16.71169421, -8.75891919,
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
6.74952719, -4.67048814, -9.18813122, 4.63255937,
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
np.testing.assert_allclose(results.resid_working,
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
9.90732178, 57.90551066, -1.22815371, -5.35228436,
11.91776614, 17.87596565, -11.07959539, -5.83352375,
7.03627851, 26.59556948, 3.30135775, 15.40479998,
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
10.06880275, 0.73012911, -16.71169421, -8.75891919,
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
6.74952719, -4.67048814, -9.18813122, 4.63255937,
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
np.testing.assert_allclose(results.resid_pearson,
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
9.90732178, 57.90551066, -1.22815371, -5.35228436,
11.91776614, 17.87596565, -11.07959539, -5.83352375,
7.03627851, 26.59556948, 3.30135775, 15.40479998,
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
10.06880275, 0.73012911, -16.71169421, -8.75891919,
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
6.74952719, -4.67048814, -9.18813122, 4.63255937,
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
np.testing.assert_allclose(results.resid_anscombe,
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
9.90732178, 57.90551066, -1.22815371, -5.35228436,
11.91776614, 17.87596565, -11.07959539, -5.83352375,
7.03627851, 26.59556948, 3.30135775, 15.40479998,
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
10.06880275, 0.73012911, -16.71169421, -8.75891919,
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
6.74952719, -4.67048814, -9.18813122, 4.63255937,
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
np.testing.assert_allclose(results.resid_deviance,
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
9.90732178, 57.90551066, -1.22815371, -5.35228436,
11.91776614, 17.87596565, -11.07959539, -5.83352375,
7.03627851, 26.59556948, 3.30135775, 15.40479998,
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
10.06880275, 0.73012911, -16.71169421, -8.75891919,
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
6.74952719, -4.67048814, -9.18813122, 4.63255937,
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
np.testing.assert_allclose(results.null,
[ 38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447,
38.43622447, 38.43622447, 38.43622447, 38.43622447, 38.43622447])
self.assertAlmostEqual(results.D2, .349514377851)
self.assertAlmostEqual(results.adj_D2, 0.32123239427957673)
class TestPoisson(unittest.TestCase):
def setUp(self):
db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')
y = np.array(db.by_col("HOVAL"))
y = np.reshape(y, (49,1))
self.y = np.round(y).astype(int)
X = []
X.append(db.by_col("INC"))
X.append(db.by_col("CRIME"))
self.X = np.array(X).T
def testIWLS(self):
model = GLM(self.y, self.X, family=Poisson())
results = model.fit()
self.assertEqual(results.n, 49)
self.assertEqual(results.df_model, 2)
self.assertEqual(results.df_resid, 46)
self.assertAlmostEqual(results.aic, 500.85184179938756)
self.assertAlmostEqual(results.bic, 51.436404535087661)
self.assertAlmostEqual(results.deviance, 230.46013824817649)
self.assertAlmostEqual(results.llf, -247.42592089969378)
self.assertAlmostEqual(results.null_deviance, 376.97293610347361)
self.assertEqual(results.scale, 1.0)
np.testing.assert_allclose(results.params, [ 3.92159085, 0.01183491,
-0.01371397], atol=1.0e-8)
np.testing.assert_allclose(results.bse, [ 0.13049161, 0.00511599,
0.00193769], atol=1.0e-8)
np.testing.assert_allclose(results.cov_params(),
[[ 1.70280610e-02, -6.18628383e-04, -2.21386966e-04],
[ -6.18628383e-04, 2.61733917e-05, 6.77496445e-06],
[ -2.21386966e-04, 6.77496445e-06, 3.75463502e-06]])
np.testing.assert_allclose(results.tvalues, [ 30.0524361 , 2.31331634,
-7.07748998])
np.testing.assert_allclose(results.pvalues, [ 2.02901657e-198,
2.07052532e-002, 1.46788805e-012])
np.testing.assert_allclose(results.conf_int(),
[[ 3.66583199e+00, 4.17734972e+00],
[ 1.80774841e-03, 2.18620753e-02],
[ -1.75117666e-02, -9.91616901e-03]])
np.testing.assert_allclose(results.normalized_cov_params,
[[ 1.70280610e-02, -6.18628383e-04, -2.21386966e-04],
[ -6.18628383e-04, 2.61733917e-05, 6.77496445e-06],
[ -2.21386966e-04, 6.77496445e-06, 3.75463502e-06]])
np.testing.assert_allclose(results.mu,
[ 51.26831574, 50.15022766, 40.06142973, 34.13799739,
28.76119226, 42.6836241 , 55.64593703, 34.08277997,
40.90389582, 37.19727958, 23.47459217, 26.12384057,
29.78303507, 25.96888223, 29.14073823, 26.04369592,
34.18996367, 32.28924005, 27.42284396, 72.69207879,
33.05316347, 36.52276972, 49.2551479 , 35.33439632,
24.07252457, 31.67153709, 27.81699478, 25.38021219,
24.31759259, 23.13586161, 48.40724678, 48.57969818,
31.92596006, 43.3679231 , 34.32925819, 51.78908089,
34.49778584, 27.56236198, 48.34273194, 57.50829097,
50.66038226, 54.68701352, 35.77103116, 43.21886784,
40.07615759, 49.98658004, 43.13352883, 40.28520774, 46.28910294])
self.assertAlmostEqual(results.pearson_chi2, 264.62262932090221)
np.testing.assert_allclose(results.resid_response,
[ 28.73168426, -5.15022766, -14.06142973, -1.13799739,
-5.76119226, -13.6836241 , 19.35406297, 2.91722003,
12.09610418, 58.80272042, -3.47459217, -6.12384057,
12.21696493, 17.03111777, -11.14073823, -7.04369592,
7.81003633, 27.71075995, 3.57715604, 8.30792121,
-13.05316347, -6.52276972, -1.2551479 , 17.66560368,
-6.07252457, -11.67153709, 6.18300522, -2.38021219,
7.68240741, -1.13586161, -16.40724678, -8.57969818,
-7.92596006, -15.3679231 , -7.32925819, -15.78908089,
8.50221416, -4.56236198, -8.34273194, 4.49170903,
-8.66038226, -10.68701352, -9.77103116, -9.21886784,
-12.07615759, 26.01341996, -1.13352883, -13.28520774, -10.28910294])
np.testing.assert_allclose(results.resid_working,
[ 1473.02506034, -258.28508941, -563.32097891, -38.84895192,
-165.69875817, -584.06666725, 1076.97496919, 99.42696848,
494.77778514, 2187.30123163, -81.56463405, -159.97823479,
363.858295 , 442.27909165, -324.64933645, -183.44387481,
267.02485844, 894.75938 , 98.09579187, 603.9200634 ,
-431.44834594, -238.2296165 , -61.82249568, 624.20344168,
-146.18099686, -369.65551968, 171.99262399, -60.41029031,
186.81765356, -26.27913713, -794.22964417, -416.79914795,
-253.04388425, -666.47490701, -251.6079969 , -817.70198717,
293.30756327, -125.74947222, -403.31045369, 258.31051005,
-438.73827602, -584.440853 , -349.51985996, -398.42903071,
-483.96599444, 1300.32189904, -48.89309853, -535.19735391,
-476.27334527])
np.testing.assert_allclose(results.resid_pearson,
[ 4.01269878, -0.72726045, -2.221602 , -0.19477008, -1.07425881,
-2.09445239, 2.59451042, 0.49969118, 1.89131202, 9.64143836,
-0.71714142, -1.19813392, 2.23861212, 3.34207756, -2.0637814 ,
-1.3802231 , 1.33568403, 4.87662684, 0.68309584, 0.97442591,
-2.27043598, -1.07931992, -0.17884182, 2.97186889, -1.23768025,
-2.07392709, 1.1723155 , -0.47246327, 1.55789092, -0.23614708,
-2.35819937, -1.23096188, -1.40274877, -2.33362391, -1.25091503,
-2.19400568, 1.44755952, -0.8690235 , -1.19989348, 0.59230634,
-1.21675413, -1.44515442, -1.63370888, -1.40229988, -1.90759306,
3.67934693, -0.17259375, -2.09312684, -1.51230062])
np.testing.assert_allclose(results.resid_anscombe,
[ 3.70889134, -0.74031295, -2.37729865, -0.19586855, -1.11374751,
-2.22611959, 2.46352013, 0.49282126, 1.80857757, 8.06444452,
-0.73610811, -1.25061371, 2.10820431, 3.05467547, -2.22437611,
-1.45136173, 1.28939698, 4.35942058, 0.66904552, 0.95674923,
-2.45438937, -1.11429881, -0.17961012, 2.76715848, -1.29658591,
-2.22816691, 1.13269136, -0.48017382, 1.48562248, -0.23812278,
-2.51664399, -1.2703721 , -1.4683091 , -2.49907536, -1.30026484,
-2.32398309, 1.39380683, -0.89495368, -1.23735395, 0.58485202,
-1.25435224, -1.4968484 , -1.71888038, -1.45756652, -2.01906267,
3.41729922, -0.17335867, -2.22921828, -1.57470549])
np.testing.assert_allclose(results.resid_deviance,
[ 3.70529668, -0.74027329, -2.37536322, -0.19586751, -1.11349765,
-2.22466106, 2.46246446, 0.4928057 , 1.80799655, 8.02696525,
-0.73602255, -1.25021555, 2.10699958, 3.05084608, -2.22214376,
-1.45072221, 1.28913747, 4.35106213, 0.6689982 , 0.95669662,
-2.45171913, -1.11410444, -0.17960956, 2.76494217, -1.29609865,
-2.22612429, 1.13247453, -0.48015254, 1.48508549, -0.23812 ,
-2.51476072, -1.27015583, -1.46777697, -2.49699318, -1.29992892,
-2.32263069, 1.39348459, -0.89482132, -1.23715363, 0.58483655,
-1.25415329, -1.49653039, -1.7181055 , -1.45719072, -2.01791949,
3.41437156, -0.1733581 , -2.22765605, -1.57426046])
np.testing.assert_allclose(results.null,
[ 38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143, 38.42857143])
self.assertAlmostEqual(results.D2, .388656011675)
self.assertAlmostEqual(results.adj_D2, 0.36207583826952761)#.375648692774)
def testQuasi(self):
model = GLM(self.y, self.X, family=QuasiPoisson())
results = model.fit()
self.assertEqual(results.n, 49)
self.assertEqual(results.df_model, 2)
self.assertEqual(results.df_resid, 46)
self.assertTrue(math.isnan(results.aic))
self.assertAlmostEqual(results.bic, 51.436404535087661)
self.assertAlmostEqual(results.deviance, 230.46013824817649)
self.assertTrue(math.isnan(results.llf))
self.assertAlmostEqual(results.null_deviance, 376.97293610347361)
self.assertAlmostEqual(results.scale, 5.7526658548022223)
np.testing.assert_allclose(results.params, [ 3.92159085, 0.01183491,
-0.01371397], atol=1.0e-8)
np.testing.assert_allclose(results.bse, [ 0.31298042, 0.01227057,
0.00464749], atol=1.0e-8)
np.testing.assert_allclose(results.cov_params(),
[[ 9.79567451e-02, -3.55876238e-03, -1.27356524e-03],
[ -3.55876238e-03, 1.50566777e-04, 3.89741067e-05],
[ -1.27356524e-03, 3.89741067e-05, 2.15991606e-05]])
np.testing.assert_allclose(results.tvalues, [ 12.52982796, 0.96449604,
-2.95083339])
np.testing.assert_allclose(results.pvalues, [ 5.12737770e-36,
3.34797291e-01, 3.16917819e-03])
np.testing.assert_allclose(results.conf_int(),
[[ 3.3081605 , 4.53502121],
[-0.01221495, 0.03588478],
[-0.02282288, -0.00460506]], atol=1.0e-8)
np.testing.assert_allclose(results.normalized_cov_params,
[[ 1.70280610e-02, -6.18628383e-04, -2.21386966e-04],
[ -6.18628383e-04, 2.61733917e-05, 6.77496445e-06],
[ -2.21386966e-04, 6.77496445e-06, 3.75463502e-06]])
np.testing.assert_allclose(results.mu,
[ 51.26831574, 50.15022766, 40.06142973, 34.13799739,
28.76119226, 42.6836241 , 55.64593703, 34.08277997,
40.90389582, 37.19727958, 23.47459217, 26.12384057,
29.78303507, 25.96888223, 29.14073823, 26.04369592,
34.18996367, 32.28924005, 27.42284396, 72.69207879,
33.05316347, 36.52276972, 49.2551479 , 35.33439632,
24.07252457, 31.67153709, 27.81699478, 25.38021219,
24.31759259, 23.13586161, 48.40724678, 48.57969818,
31.92596006, 43.3679231 , 34.32925819, 51.78908089,
34.49778584, 27.56236198, 48.34273194, 57.50829097,
50.66038226, 54.68701352, 35.77103116, 43.21886784,
40.07615759, 49.98658004, 43.13352883, 40.28520774, 46.28910294])
self.assertAlmostEqual(results.pearson_chi2, 264.62262932090221)
np.testing.assert_allclose(results.resid_response,
[ 28.73168426, -5.15022766, -14.06142973, -1.13799739,
-5.76119226, -13.6836241 , 19.35406297, 2.91722003,
12.09610418, 58.80272042, -3.47459217, -6.12384057,
12.21696493, 17.03111777, -11.14073823, -7.04369592,
7.81003633, 27.71075995, 3.57715604, 8.30792121,
-13.05316347, -6.52276972, -1.2551479 , 17.66560368,
-6.07252457, -11.67153709, 6.18300522, -2.38021219,
7.68240741, -1.13586161, -16.40724678, -8.57969818,
-7.92596006, -15.3679231 , -7.32925819, -15.78908089,
8.50221416, -4.56236198, -8.34273194, 4.49170903,
-8.66038226, -10.68701352, -9.77103116, -9.21886784,
-12.07615759, 26.01341996, -1.13352883, -13.28520774, -10.28910294])
np.testing.assert_allclose(results.resid_working,
[ 1473.02506034, -258.28508941, -563.32097891, -38.84895192,
-165.69875817, -584.06666725, 1076.97496919, 99.42696848,
494.77778514, 2187.30123163, -81.56463405, -159.97823479,
363.858295 , 442.27909165, -324.64933645, -183.44387481,
267.02485844, 894.75938 , 98.09579187, 603.9200634 ,
-431.44834594, -238.2296165 , -61.82249568, 624.20344168,
-146.18099686, -369.65551968, 171.99262399, -60.41029031,
186.81765356, -26.27913713, -794.22964417, -416.79914795,
-253.04388425, -666.47490701, -251.6079969 , -817.70198717,
293.30756327, -125.74947222, -403.31045369, 258.31051005,
-438.73827602, -584.440853 , -349.51985996, -398.42903071,
-483.96599444, 1300.32189904, -48.89309853, -535.19735391,
-476.27334527])
np.testing.assert_allclose(results.resid_pearson,
[ 4.01269878, -0.72726045, -2.221602 , -0.19477008, -1.07425881,
-2.09445239, 2.59451042, 0.49969118, 1.89131202, 9.64143836,
-0.71714142, -1.19813392, 2.23861212, 3.34207756, -2.0637814 ,
-1.3802231 , 1.33568403, 4.87662684, 0.68309584, 0.97442591,
-2.27043598, -1.07931992, -0.17884182, 2.97186889, -1.23768025,
-2.07392709, 1.1723155 , -0.47246327, 1.55789092, -0.23614708,
-2.35819937, -1.23096188, -1.40274877, -2.33362391, -1.25091503,
-2.19400568, 1.44755952, -0.8690235 , -1.19989348, 0.59230634,
-1.21675413, -1.44515442, -1.63370888, -1.40229988, -1.90759306,
3.67934693, -0.17259375, -2.09312684, -1.51230062])
np.testing.assert_allclose(results.resid_anscombe,
[ 3.70889134, -0.74031295, -2.37729865, -0.19586855, -1.11374751,
-2.22611959, 2.46352013, 0.49282126, 1.80857757, 8.06444452,
-0.73610811, -1.25061371, 2.10820431, 3.05467547, -2.22437611,
-1.45136173, 1.28939698, 4.35942058, 0.66904552, 0.95674923,
-2.45438937, -1.11429881, -0.17961012, 2.76715848, -1.29658591,
-2.22816691, 1.13269136, -0.48017382, 1.48562248, -0.23812278,
-2.51664399, -1.2703721 , -1.4683091 , -2.49907536, -1.30026484,
-2.32398309, 1.39380683, -0.89495368, -1.23735395, 0.58485202,
-1.25435224, -1.4968484 , -1.71888038, -1.45756652, -2.01906267,
3.41729922, -0.17335867, -2.22921828, -1.57470549])
np.testing.assert_allclose(results.resid_deviance,
[ 3.70529668, -0.74027329, -2.37536322, -0.19586751, -1.11349765,
-2.22466106, 2.46246446, 0.4928057 , 1.80799655, 8.02696525,
-0.73602255, -1.25021555, 2.10699958, 3.05084608, -2.22214376,
-1.45072221, 1.28913747, 4.35106213, 0.6689982 , 0.95669662,
-2.45171913, -1.11410444, -0.17960956, 2.76494217, -1.29609865,
-2.22612429, 1.13247453, -0.48015254, 1.48508549, -0.23812 ,
-2.51476072, -1.27015583, -1.46777697, -2.49699318, -1.29992892,
-2.32263069, 1.39348459, -0.89482132, -1.23715363, 0.58483655,
-1.25415329, -1.49653039, -1.7181055 , -1.45719072, -2.01791949,
3.41437156, -0.1733581 , -2.22765605, -1.57426046])
np.testing.assert_allclose(results.null,
[ 38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143,
38.42857143, 38.42857143, 38.42857143, 38.42857143, 38.42857143])
self.assertAlmostEqual(results.D2, .388656011675)
self.assertAlmostEqual(results.adj_D2, 0.36207583826952761)
class TestBinomial(unittest.TestCase):
def setUp(self):
#London house price data
#y: 'BATH2'
y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
self.y = y.reshape((316,1))
#X: 'FLOORSZ'
X = np.array([ 77, 75, 64, 95, 107, 100, 81, 151, 98, 260, 171, 161, 91,
80, 50, 85, 52, 69, 60, 84, 155, 97, 69, 126, 90, 43,
51, 41, 140, 80, 52, 86, 66, 60, 40, 155, 138, 97, 115,
148, 206, 60, 53, 96, 88, 160, 31, 43, 154, 60, 131, 60,
46, 61, 125, 150, 76, 92, 96, 100, 105, 72, 48, 41, 72,
65, 60, 65, 98, 33, 144, 111, 91, 108, 38, 48, 95, 63,
98, 129, 108, 51, 131, 66, 48, 127, 76, 68, 52, 64, 57,
121, 67, 76, 112, 96, 90, 53, 93, 64, 97, 58, 44, 157,
53, 70, 71, 167, 47, 70, 96, 77, 75, 71, 67, 47, 71,
90, 69, 64, 65, 95, 60, 60, 65, 54, 121, 105, 50, 85,
69, 69, 62, 65, 93, 93, 70, 62, 155, 68, 117, 80, 80,
75, 98, 114, 86, 70, 50, 51, 163, 124, 59, 95, 51, 63,
85, 53, 46, 102, 114, 83, 47, 40, 63, 123, 100, 63, 110,
79, 98, 99, 120, 52, 48, 37, 81, 30, 88, 50, 35, 116,
67, 45, 80, 86, 109, 59, 75, 60, 71, 141, 121, 50, 168,
90, 51, 133, 75, 133, 127, 37, 68, 105, 61, 123, 151, 110,
77, 220, 94, 77, 70, 100, 98, 126, 55, 105, 60, 176, 104,
68, 62, 70, 48, 102, 80, 97, 66, 80, 102, 160, 55, 60,
71, 125, 85, 85, 190, 137, 48, 41, 42, 51, 57, 60, 114,
88, 84, 108, 66, 85, 42, 98, 90, 127, 100, 55, 76, 82,
63, 80, 71, 76, 121, 109, 92, 160, 109, 185, 100, 90, 90,
86, 88, 95, 116, 135, 61, 74, 60, 235, 76, 66, 100, 49,
50, 37, 100, 88, 90, 52, 95, 81, 79, 96, 75, 91, 86,
83, 180, 108, 80, 96, 49, 117, 117, 86, 46, 66, 95, 57,
120, 137, 68, 240])
self.X = X.reshape((316,1))
def testIWLS(self):
model = GLM(self.y, self.X, family=Binomial())
results = model.fit()
self.assertEqual(results.n, 316)
self.assertEqual(results.df_model, 1)
self.assertEqual(results.df_resid, 314)
self.assertEqual(results.aic, 155.19347530342466)
self.assertEqual(results.bic, -1656.1095797628657)
self.assertEqual(results.deviance, 151.19347530342466)
self.assertEqual(results.llf, -75.596737651712331)
self.assertEqual(results.null_deviance, 189.16038985881212)
self.assertEqual(results.scale, 1.0)
np.testing.assert_allclose(results.params, [-5.33638276, 0.0287754 ])
np.testing.assert_allclose(results.bse, [ 0.64499904, 0.00518312],
atol=1.0e-8)
np.testing.assert_allclose(results.cov_params(),
[[ 4.16023762e-01, -3.14338457e-03],
[ -3.14338457e-03, 2.68646833e-05]])
np.testing.assert_allclose(results.tvalues, [-8.27347396, 5.55175826])
np.testing.assert_allclose(results.pvalues, [ 1.30111233e-16,
2.82810512e-08])
np.testing.assert_allclose(results.conf_int(),
[[-6.60055765, -4.07220787],
[ 0.01861668, 0.03893412]], atol=1.0e-8)
np.testing.assert_allclose(results.normalized_cov_params,
[[ 4.16023762e-01, -3.14338457e-03],
[ -3.14338457e-03, 2.68646833e-05]])
np.testing.assert_allclose(results.mu,
[ 0.04226237, 0.03999333, 0.02946178, 0.0689636 , 0.09471181,
0.07879431, 0.04717464, 0.27065598, 0.07471691, 0.89522144,
0.39752487, 0.33102718, 0.06192993, 0.04589793, 0.01988679,
0.0526265 , 0.02104007, 0.03386636, 0.02634295, 0.05121018,
0.29396682, 0.07275173, 0.03386636, 0.15307528, 0.06027915,
0.01631789, 0.02045547, 0.01541937, 0.2128508 , 0.04589793,
0.02104007, 0.05407977, 0.0311527 , 0.02634295, 0.01498855,
0.29396682, 0.20336776, 0.07275173, 0.11637537, 0.25395607,
0.64367488, 0.02634295, 0.02164101, 0.07083428, 0.05710047,
0.32468619, 0.01160845, 0.01631789, 0.28803008, 0.02634295,
0.17267234, 0.02634295, 0.01776301, 0.02709115, 0.14938186,
0.26501331, 0.04111287, 0.06362285, 0.07083428, 0.07879431,
0.08989109, 0.03680743, 0.0187955 , 0.01541937, 0.03680743,
0.03029581, 0.02634295, 0.03029581, 0.07471691, 0.01228768,
0.23277197, 0.10505173, 0.06192993, 0.09720799, 0.01416217,
0.0187955 , 0.0689636 , 0.02865003, 0.07471691, 0.16460503,
0.09720799, 0.02045547, 0.17267234, 0.0311527 , 0.0187955 ,
0.15684317, 0.04111287, 0.03293737, 0.02104007, 0.02946178,
0.02421701, 0.1353385 , 0.03203302, 0.04111287, 0.10778798,
0.07083428, 0.06027915, 0.02164101, 0.06535882, 0.02946178,
0.07275173, 0.02490638, 0.01678627, 0.30605146, 0.02164101,
0.03482061, 0.03580075, 0.37030921, 0.0182721 , 0.03482061,
0.07083428, 0.04226237, 0.03999333, 0.03580075, 0.03203302,
0.0182721 , 0.03580075, 0.06027915, 0.03386636, 0.02946178,
0.03029581, 0.0689636 , 0.02634295, 0.02634295, 0.03029581,
0.02225873, 0.1353385 , 0.08989109, 0.01988679, 0.0526265 ,
0.03386636, 0.03386636, 0.02786 , 0.03029581, 0.06535882,
0.06535882, 0.03482061, 0.02786 , 0.29396682, 0.03293737,
0.12242534, 0.04589793, 0.04589793, 0.03999333, 0.07471691,
0.11344884, 0.05407977, 0.03482061, 0.01988679, 0.02045547,
0.34389327, 0.14576223, 0.02561486, 0.0689636 , 0.02045547,
0.02865003, 0.0526265 , 0.02164101, 0.01776301, 0.08307425,
0.11344884, 0.04982997, 0.0182721 , 0.01498855, 0.02865003,
0.14221564, 0.07879431, 0.02865003, 0.10237696, 0.04465416,
0.07471691, 0.07673078, 0.13200634, 0.02104007, 0.0187955 ,
0.01376599, 0.04717464, 0.01128289, 0.05710047, 0.01988679,
0.01300612, 0.11936722, 0.03203302, 0.01726786, 0.04589793,
0.05407977, 0.09976271, 0.02561486, 0.03999333, 0.02634295,
0.03580075, 0.21771181, 0.1353385 , 0.01988679, 0.37704374,
0.06027915, 0.02045547, 0.18104935, 0.03999333, 0.18104935,
0.15684317, 0.01376599, 0.03293737, 0.08989109, 0.02709115,
0.14221564, 0.27065598, 0.10237696, 0.04226237, 0.72991785,
0.06713876, 0.04226237, 0.03482061, 0.07879431, 0.07471691,
0.15307528, 0.02289366, 0.08989109, 0.02634295, 0.43243779,
0.08756457, 0.03293737, 0.02786 , 0.03482061, 0.0187955 ,
0.08307425, 0.04589793, 0.07275173, 0.0311527 , 0.04589793,
0.08307425, 0.32468619, 0.02289366, 0.02634295, 0.03580075,
0.14938186, 0.0526265 , 0.0526265 , 0.53268924, 0.19874565,
0.0187955 , 0.01541937, 0.01586237, 0.02045547, 0.02421701,
0.02634295, 0.11344884, 0.05710047, 0.05121018, 0.09720799,
0.0311527 , 0.0526265 , 0.01586237, 0.07471691, 0.06027915,
0.15684317, 0.07879431, 0.02289366, 0.04111287, 0.04848506,
0.02865003, 0.04589793, 0.03580075, 0.04111287, 0.1353385 ,
0.09976271, 0.06362285, 0.32468619, 0.09976271, 0.49676673,
0.07879431, 0.06027915, 0.06027915, 0.05407977, 0.05710047,
0.0689636 , 0.11936722, 0.18973955, 0.02709115, 0.03890304,
0.02634295, 0.80625182, 0.04111287, 0.0311527 , 0.07879431,
0.0193336 , 0.01988679, 0.01376599, 0.07879431, 0.05710047,
0.06027915, 0.02104007, 0.0689636 , 0.04717464, 0.04465416,
0.07083428, 0.03999333, 0.06192993, 0.05407977, 0.04982997,
0.46087756, 0.09720799, 0.04589793, 0.07083428, 0.0193336 ,
0.12242534, 0.12242534, 0.05407977, 0.01776301, 0.0311527 ,
0.0689636 , 0.02421701, 0.13200634, 0.19874565, 0.03293737,
0.82774282], atol=1.0e-8)
self.assertAlmostEqual(results.pearson_chi2, 271.21110541713801)
np.testing.assert_allclose(results.resid_response,
[-0.04226237, -0.03999333, -0.02946178, -0.0689636 , -0.09471181,
-0.07879431, -0.04717464, -0.27065598, -0.07471691, 0.10477856,
-0.39752487, 0.66897282, -0.06192993, -0.04589793, -0.01988679,
-0.0526265 , -0.02104007, -0.03386636, -0.02634295, -0.05121018,
-0.29396682, 0.92724827, -0.03386636, -0.15307528, -0.06027915,
-0.01631789, -0.02045547, -0.01541937, -0.2128508 , -0.04589793,
-0.02104007, -0.05407977, -0.0311527 , -0.02634295, -0.01498855,
-0.29396682, 0.79663224, -0.07275173, -0.11637537, 0.74604393,
-0.64367488, -0.02634295, -0.02164101, -0.07083428, -0.05710047,
-0.32468619, -0.01160845, -0.01631789, -0.28803008, -0.02634295,
-0.17267234, -0.02634295, -0.01776301, -0.02709115, 0.85061814,
0.73498669, -0.04111287, -0.06362285, -0.07083428, -0.07879431,
0.91010891, -0.03680743, -0.0187955 , -0.01541937, -0.03680743,
-0.03029581, -0.02634295, -0.03029581, -0.07471691, -0.01228768,
0.76722803, -0.10505173, -0.06192993, -0.09720799, -0.01416217,
-0.0187955 , -0.0689636 , -0.02865003, -0.07471691, -0.16460503,
-0.09720799, -0.02045547, 0.82732766, -0.0311527 , -0.0187955 ,
-0.15684317, -0.04111287, -0.03293737, -0.02104007, -0.02946178,
-0.02421701, -0.1353385 , -0.03203302, -0.04111287, -0.10778798,
-0.07083428, -0.06027915, -0.02164101, -0.06535882, -0.02946178,
-0.07275173, -0.02490638, -0.01678627, -0.30605146, -0.02164101,
-0.03482061, -0.03580075, 0.62969079, -0.0182721 , -0.03482061,
-0.07083428, -0.04226237, -0.03999333, -0.03580075, -0.03203302,
-0.0182721 , -0.03580075, -0.06027915, -0.03386636, -0.02946178,
-0.03029581, -0.0689636 , -0.02634295, -0.02634295, -0.03029581,
-0.02225873, -0.1353385 , -0.08989109, -0.01988679, -0.0526265 ,
-0.03386636, -0.03386636, -0.02786 , -0.03029581, -0.06535882,
-0.06535882, -0.03482061, -0.02786 , -0.29396682, -0.03293737,
-0.12242534, -0.04589793, -0.04589793, -0.03999333, -0.07471691,
-0.11344884, -0.05407977, -0.03482061, -0.01988679, -0.02045547,
0.65610673, 0.85423777, -0.02561486, -0.0689636 , -0.02045547,
-0.02865003, -0.0526265 , -0.02164101, -0.01776301, -0.08307425,
-0.11344884, -0.04982997, -0.0182721 , -0.01498855, -0.02865003,
-0.14221564, -0.07879431, -0.02865003, -0.10237696, -0.04465416,
-0.07471691, -0.07673078, -0.13200634, -0.02104007, -0.0187955 ,
-0.01376599, -0.04717464, -0.01128289, 0.94289953, -0.01988679,
-0.01300612, -0.11936722, -0.03203302, -0.01726786, -0.04589793,
-0.05407977, -0.09976271, -0.02561486, -0.03999333, -0.02634295,
-0.03580075, -0.21771181, 0.8646615 , -0.01988679, 0.62295626,
-0.06027915, -0.02045547, -0.18104935, 0.96000667, -0.18104935,
-0.15684317, -0.01376599, -0.03293737, -0.08989109, -0.02709115,
-0.14221564, 0.72934402, -0.10237696, -0.04226237, -0.72991785,
-0.06713876, -0.04226237, -0.03482061, -0.07879431, -0.07471691,
-0.15307528, 0.97710634, 0.91010891, -0.02634295, -0.43243779,
-0.08756457, -0.03293737, -0.02786 , -0.03482061, -0.0187955 ,
0.91692575, -0.04589793, -0.07275173, -0.0311527 , -0.04589793,
-0.08307425, 0.67531381, -0.02289366, -0.02634295, -0.03580075,
-0.14938186, -0.0526265 , -0.0526265 , 0.46731076, -0.19874565,
-0.0187955 , -0.01541937, -0.01586237, -0.02045547, -0.02421701,
-0.02634295, -0.11344884, -0.05710047, -0.05121018, -0.09720799,
0.9688473 , -0.0526265 , -0.01586237, -0.07471691, -0.06027915,
-0.15684317, -0.07879431, -0.02289366, -0.04111287, -0.04848506,
-0.02865003, -0.04589793, -0.03580075, -0.04111287, -0.1353385 ,
-0.09976271, -0.06362285, 0.67531381, -0.09976271, -0.49676673,
-0.07879431, -0.06027915, -0.06027915, -0.05407977, -0.05710047,
-0.0689636 , -0.11936722, -0.18973955, -0.02709115, -0.03890304,
-0.02634295, 0.19374818, -0.04111287, -0.0311527 , -0.07879431,
-0.0193336 , -0.01988679, -0.01376599, -0.07879431, 0.94289953,
-0.06027915, -0.02104007, -0.0689636 , -0.04717464, -0.04465416,
0.92916572, -0.03999333, -0.06192993, -0.05407977, -0.04982997,
-0.46087756, -0.09720799, -0.04589793, -0.07083428, -0.0193336 ,
-0.12242534, -0.12242534, -0.05407977, -0.01776301, -0.0311527 ,
-0.0689636 , -0.02421701, -0.13200634, -0.19874565, -0.03293737,
-0.82774282], atol=1.0e-8)
np.testing.assert_allclose(results.resid_working,
[ -1.71062283e-03, -1.53549840e-03, -8.42423701e-04,
-4.42798906e-03, -8.12073047e-03, -5.71934606e-03,
-2.12046213e-03, -5.34278480e-02, -5.16550074e-03,
9.82823035e-03, -9.52067472e-02, 1.48142818e-01,
-3.59779501e-03, -2.00993083e-03, -3.87619325e-04,
-2.62379729e-03, -4.33370579e-04, -1.10808799e-03,
-6.75670103e-04, -2.48818484e-03, -6.10129090e-02,
6.25511612e-02, -1.10808799e-03, -1.98451739e-02,
-3.41454749e-03, -2.61928659e-04, -4.09867263e-04,
-2.34090923e-04, -3.56621577e-02, -2.00993083e-03,
-4.33370579e-04, -2.76645832e-03, -9.40257152e-04,
-6.75670103e-04, -2.21289369e-04, -6.10129090e-02,
1.29061842e-01, -4.90775251e-03, -1.19671283e-02,
1.41347263e-01, -1.47631680e-01, -6.75670103e-04,
-4.58198217e-04, -4.66208406e-03, -3.07429001e-03,
-7.11923401e-02, -1.33191898e-04, -2.61928659e-04,
-5.90659690e-02, -6.75670103e-04, -2.46673839e-02,
-6.75670103e-04, -3.09919962e-04, -7.14047519e-04,
1.08085429e-01, 1.43161630e-01, -1.62077632e-03,
-3.79032977e-03, -4.66208406e-03, -5.71934606e-03,
7.44566288e-02, -1.30492035e-03, -3.46630910e-04,
-2.34090923e-04, -1.30492035e-03, -8.90029618e-04,
-6.75670103e-04, -8.90029618e-04, -5.16550074e-03,
-1.49131762e-04, 1.37018624e-01, -9.87652847e-03,
-3.59779501e-03, -8.53083698e-03, -1.97726627e-04,
-3.46630910e-04, -4.42798906e-03, -7.97307494e-04,
-5.16550074e-03, -2.26348718e-02, -8.53083698e-03,
-4.09867263e-04, 1.18189219e-01, -9.40257152e-04,
-3.46630910e-04, -2.07414715e-02, -1.62077632e-03,
-1.04913757e-03, -4.33370579e-04, -8.42423701e-04,
-5.72261321e-04, -1.58375811e-02, -9.93244730e-04,
-1.62077632e-03, -1.03659408e-02, -4.66208406e-03,
-3.41454749e-03, -4.58198217e-04, -3.99257703e-03,
-8.42423701e-04, -4.90775251e-03, -6.04877746e-04,
-2.77048947e-04, -6.50004229e-02, -4.58198217e-04,
-1.17025566e-03, -1.23580799e-03, 1.46831486e-01,
-3.27769165e-04, -1.17025566e-03, -4.66208406e-03,
-1.71062283e-03, -1.53549840e-03, -1.23580799e-03,
-9.93244730e-04, -3.27769165e-04, -1.23580799e-03,
-3.41454749e-03, -1.10808799e-03, -8.42423701e-04,
-8.90029618e-04, -4.42798906e-03, -6.75670103e-04,
-6.75670103e-04, -8.90029618e-04, -4.84422741e-04,
-1.58375811e-02, -7.35405096e-03, -3.87619325e-04,
-2.62379729e-03, -1.10808799e-03, -1.10808799e-03,
-7.54555329e-04, -8.90029618e-04, -3.99257703e-03,
-3.99257703e-03, -1.17025566e-03, -7.54555329e-04,
-6.10129090e-02, -1.04913757e-03, -1.31530576e-02,
-2.00993083e-03, -2.00993083e-03, -1.53549840e-03,
-5.16550074e-03, -1.14104800e-02, -2.76645832e-03,
-1.17025566e-03, -3.87619325e-04, -4.09867263e-04,
1.48037813e-01, 1.06365931e-01, -6.39314594e-04,
-4.42798906e-03, -4.09867263e-04, -7.97307494e-04,
-2.62379729e-03, -4.58198217e-04, -3.09919962e-04,
-6.32800839e-03, -1.14104800e-02, -2.35929680e-03,
-3.27769165e-04, -2.21289369e-04, -7.97307494e-04,
-1.73489362e-02, -5.71934606e-03, -7.97307494e-04,
-9.40802551e-03, -1.90495384e-03, -5.16550074e-03,
-5.43585191e-03, -1.51253748e-02, -4.33370579e-04,
-3.46630910e-04, -1.86893696e-04, -2.12046213e-03,
-1.25867293e-04, 5.07657192e-02, -3.87619325e-04,
-1.66959104e-04, -1.25477263e-02, -9.93244730e-04,
-2.93030065e-04, -2.00993083e-03, -2.76645832e-03,
-8.95970087e-03, -6.39314594e-04, -1.53549840e-03,
-6.75670103e-04, -1.23580799e-03, -3.70792339e-02,
1.01184411e-01, -3.87619325e-04, 1.46321062e-01,
-3.41454749e-03, -4.09867263e-04, -2.68442736e-02,
3.68583645e-02, -2.68442736e-02, -2.07414715e-02,
-1.86893696e-04, -1.04913757e-03, -7.35405096e-03,
-7.14047519e-04, -1.73489362e-02, 1.43973473e-01,
-9.40802551e-03, -1.71062283e-03, -1.43894386e-01,
-4.20497779e-03, -1.71062283e-03, -1.17025566e-03,
-5.71934606e-03, -5.16550074e-03, -1.98451739e-02,
2.18574168e-02, 7.44566288e-02, -6.75670103e-04,
-1.06135519e-01, -6.99614755e-03, -1.04913757e-03,
-7.54555329e-04, -1.17025566e-03, -3.46630910e-04,
6.98449121e-02, -2.00993083e-03, -4.90775251e-03,
-9.40257152e-04, -2.00993083e-03, -6.32800839e-03,
1.48072729e-01, -5.12120512e-04, -6.75670103e-04,
-1.23580799e-03, -1.89814939e-02, -2.62379729e-03,
-2.62379729e-03, 1.16328328e-01, -3.16494123e-02,
-3.46630910e-04, -2.34090923e-04, -2.47623705e-04,
-4.09867263e-04, -5.72261321e-04, -6.75670103e-04,
-1.14104800e-02, -3.07429001e-03, -2.48818484e-03,
-8.53083698e-03, 2.92419496e-02, -2.62379729e-03,
-2.47623705e-04, -5.16550074e-03, -3.41454749e-03,
-2.07414715e-02, -5.71934606e-03, -5.12120512e-04,
-1.62077632e-03, -2.23682205e-03, -7.97307494e-04,
-2.00993083e-03, -1.23580799e-03, -1.62077632e-03,
-1.58375811e-02, -8.95970087e-03, -3.79032977e-03,
1.48072729e-01, -8.95970087e-03, -1.24186489e-01,
-5.71934606e-03, -3.41454749e-03, -3.41454749e-03,
-2.76645832e-03, -3.07429001e-03, -4.42798906e-03,
-1.25477263e-02, -2.91702648e-02, -7.14047519e-04,
-1.45456868e-03, -6.75670103e-04, 3.02653681e-02,
-1.62077632e-03, -9.40257152e-04, -5.71934606e-03,
-3.66561274e-04, -3.87619325e-04, -1.86893696e-04,
-5.71934606e-03, 5.07657192e-02, -3.41454749e-03,
-4.33370579e-04, -4.42798906e-03, -2.12046213e-03,
-1.90495384e-03, 6.11546973e-02, -1.53549840e-03,
-3.59779501e-03, -2.76645832e-03, -2.35929680e-03,
-1.14513988e-01, -8.53083698e-03, -2.00993083e-03,
-4.66208406e-03, -3.66561274e-04, -1.31530576e-02,
-1.31530576e-02, -2.76645832e-03, -3.09919962e-04,
-9.40257152e-04, -4.42798906e-03, -5.72261321e-04,
-1.51253748e-02, -3.16494123e-02, -1.04913757e-03,
-1.18023417e-01])
np.testing.assert_allclose(results.resid_pearson,
[-0.21006498, -0.20410641, -0.17423009, -0.27216147, -0.3234511 ,
-0.29246179, -0.22250903, -0.60917574, -0.28416602, 0.3421141 ,
-0.81229277, 1.42158361, -0.25694055, -0.21933056, -0.142444 ,
-0.23569027, -0.14660243, -0.18722578, -0.16448609, -0.2323235 ,
-0.64526275, 3.57006696, -0.18722578, -0.42513819, -0.25327023,
-0.12879668, -0.14450826, -0.12514332, -0.5200069 , -0.21933056,
-0.14660243, -0.23910582, -0.17931646, -0.16448609, -0.12335569,
-0.64526275, 1.97919183, -0.28010679, -0.36290807, 1.71396874,
-1.3440334 , -0.16448609, -0.14872695, -0.27610555, -0.24608613,
-0.69339243, -0.1083734 , -0.12879668, -0.63604537, -0.16448609,
-0.45684893, -0.16448609, -0.13447767, -0.16686977, 2.3862634 ,
1.66535145, -0.20706426, -0.26066405, -0.27610555, -0.29246179,
3.18191348, -0.19548397, -0.13840353, -0.12514332, -0.19548397,
-0.17675498, -0.16448609, -0.17675498, -0.28416602, -0.11153719,
1.81550268, -0.34261205, -0.25694055, -0.32813846, -0.11985666,
-0.13840353, -0.27216147, -0.17174127, -0.28416602, -0.44389026,
-0.32813846, -0.14450826, 2.18890738, -0.17931646, -0.13840353,
-0.43129917, -0.20706426, -0.18455132, -0.14660243, -0.17423009,
-0.1575374 , -0.39562855, -0.18191506, -0.20706426, -0.34757708,
-0.27610555, -0.25327023, -0.14872695, -0.26444152, -0.17423009,
-0.28010679, -0.15982038, -0.13066317, -0.66410018, -0.14872695,
-0.189939 , -0.19269154, 1.30401147, -0.13642648, -0.189939 ,
-0.27610555, -0.21006498, -0.20410641, -0.19269154, -0.18191506,
-0.13642648, -0.19269154, -0.25327023, -0.18722578, -0.17423009,
-0.17675498, -0.27216147, -0.16448609, -0.16448609, -0.17675498,
-0.15088226, -0.39562855, -0.3142763 , -0.142444 , -0.23569027,
-0.18722578, -0.18722578, -0.169288 , -0.17675498, -0.26444152,
-0.26444152, -0.189939 , -0.169288 , -0.64526275, -0.18455132,
-0.3735026 , -0.21933056, -0.21933056, -0.20410641, -0.28416602,
-0.35772404, -0.23910582, -0.189939 , -0.142444 , -0.14450826,
1.38125991, 2.42084442, -0.16213645, -0.27216147, -0.14450826,
-0.17174127, -0.23569027, -0.14872695, -0.13447767, -0.30099975,
-0.35772404, -0.22900483, -0.13642648, -0.12335569, -0.17174127,
-0.4071783 , -0.29246179, -0.17174127, -0.33771794, -0.21619749,
-0.28416602, -0.28828407, -0.38997712, -0.14660243, -0.13840353,
-0.11814455, -0.22250903, -0.10682532, 4.06361781, -0.142444 ,
-0.11479334, -0.36816723, -0.18191506, -0.1325567 , -0.21933056,
-0.23910582, -0.33289374, -0.16213645, -0.20410641, -0.16448609,
-0.19269154, -0.52754269, 2.52762346, -0.142444 , 1.28538406,
-0.25327023, -0.14450826, -0.47018591, 4.89940505, -0.47018591,
-0.43129917, -0.11814455, -0.18455132, -0.3142763 , -0.16686977,
-0.4071783 , 1.64156241, -0.33771794, -0.21006498, -1.6439517 ,
-0.26827373, -0.21006498, -0.189939 , -0.29246179, -0.28416602,
-0.42513819, 6.53301013, 3.18191348, -0.16448609, -0.87288109,
-0.30978696, -0.18455132, -0.169288 , -0.189939 , -0.13840353,
3.32226189, -0.21933056, -0.28010679, -0.17931646, -0.21933056,
-0.30099975, 1.44218477, -0.1530688 , -0.16448609, -0.19269154,
-0.41906522, -0.23569027, -0.23569027, 0.93662539, -0.4980393 ,
-0.13840353, -0.12514332, -0.12695686, -0.14450826, -0.1575374 ,
-0.16448609, -0.35772404, -0.24608613, -0.2323235 , -0.32813846,
5.57673284, -0.23569027, -0.12695686, -0.28416602, -0.25327023,
-0.43129917, -0.29246179, -0.1530688 , -0.20706426, -0.22573357,
-0.17174127, -0.21933056, -0.19269154, -0.20706426, -0.39562855,
-0.33289374, -0.26066405, 1.44218477, -0.33289374, -0.99355423,
-0.29246179, -0.25327023, -0.25327023, -0.23910582, -0.24608613,
-0.27216147, -0.36816723, -0.48391225, -0.16686977, -0.20119082,
-0.16448609, 0.49021146, -0.20706426, -0.17931646, -0.29246179,
-0.14040923, -0.142444 , -0.11814455, -0.29246179, 4.06361781,
-0.25327023, -0.14660243, -0.27216147, -0.22250903, -0.21619749,
3.6218033 , -0.20410641, -0.25694055, -0.23910582, -0.22900483,
-0.92458976, -0.32813846, -0.21933056, -0.27610555, -0.14040923,
-0.3735026 , -0.3735026 , -0.23910582, -0.13447767, -0.17931646,
-0.27216147, -0.1575374 , -0.38997712, -0.4980393 , -0.18455132,
-2.19209332])
np.testing.assert_allclose(results.resid_anscombe,
[-0.31237627, -0.3036605 , -0.25978208, -0.40240831, -0.47552289,
-0.43149255, -0.33053793, -0.85617194, -0.41962951, 0.50181328,
-1.0954382 , 1.66940149, -0.38048321, -0.3259044 , -0.21280762,
-0.34971301, -0.21896842, -0.27890356, -0.2454118 , -0.34482158,
-0.90063409, 2.80452413, -0.27890356, -0.61652596, -0.37518169,
-0.19255932, -0.2158664 , -0.18713159, -0.74270558, -0.3259044 ,
-0.21896842, -0.35467084, -0.2672722 , -0.2454118 , -0.18447466,
-0.90063409, 2.05763941, -0.41381347, -0.53089521, 1.88552083,
-1.60654218, -0.2454118 , -0.22211425, -0.40807333, -0.3647888 ,
-0.95861559, -0.16218047, -0.19255932, -0.88935802, -0.2454118 ,
-0.65930821, -0.2454118 , -0.20099345, -0.24892975, 2.28774016,
1.85167195, -0.30798858, -0.38585584, -0.40807333, -0.43149255,
2.65398426, -0.2910267 , -0.20681747, -0.18713159, -0.2910267 ,
-0.26350118, -0.2454118 , -0.26350118, -0.41962951, -0.16689207,
1.95381191, -0.50251231, -0.38048321, -0.48214234, -0.17927213,
-0.20681747, -0.40240831, -0.25611424, -0.41962951, -0.64189694,
-0.48214234, -0.2158664 , 2.18071204, -0.2672722 , -0.20681747,
-0.62488429, -0.30798858, -0.27497271, -0.21896842, -0.25978208,
-0.23514749, -0.57618899, -0.27109582, -0.30798858, -0.50947546,
-0.40807333, -0.37518169, -0.22211425, -0.39130036, -0.25978208,
-0.41381347, -0.2385213 , -0.19533116, -0.92350689, -0.22211425,
-0.28288904, -0.28692985, 1.5730846 , -0.20388497, -0.28288904,
-0.40807333, -0.31237627, -0.3036605 , -0.28692985, -0.27109582,
-0.20388497, -0.28692985, -0.37518169, -0.27890356, -0.25978208,
-0.26350118, -0.40240831, -0.2454118 , -0.2454118 , -0.26350118,
-0.22530448, -0.57618899, -0.46253505, -0.21280762, -0.34971301,
-0.27890356, -0.27890356, -0.25249702, -0.26350118, -0.39130036,
-0.39130036, -0.28288904, -0.25249702, -0.90063409, -0.27497271,
-0.5456246 , -0.3259044 , -0.3259044 , -0.3036605 , -0.41962951,
-0.52366614, -0.35467084, -0.28288904, -0.21280762, -0.2158664 ,
1.63703418, 2.30570989, -0.24194253, -0.40240831, -0.2158664 ,
-0.25611424, -0.34971301, -0.22211425, -0.20099345, -0.44366892,
-0.52366614, -0.33999576, -0.20388497, -0.18447466, -0.25611424,
-0.59203547, -0.43149255, -0.25611424, -0.49563627, -0.32133344,
-0.41962951, -0.42552227, -0.56840788, -0.21896842, -0.20681747,
-0.17672552, -0.33053793, -0.15987433, 2.9768074 , -0.21280762,
-0.17173916, -0.53821445, -0.27109582, -0.19814236, -0.3259044 ,
-0.35467084, -0.48884654, -0.24194253, -0.3036605 , -0.2454118 ,
-0.28692985, -0.75249089, 2.35983933, -0.21280762, 1.55726719,
-0.37518169, -0.2158664 , -0.67712261, 3.23165236, -0.67712261,
-0.62488429, -0.17672552, -0.27497271, -0.46253505, -0.24892975,
-0.59203547, 1.83482464, -0.49563627, -0.31237627, -1.83652534,
-0.39681759, -0.31237627, -0.28288904, -0.43149255, -0.41962951,
-0.61652596, 3.63983609, 2.65398426, -0.2454118 , -1.16171662,
-0.45616505, -0.27497271, -0.25249702, -0.28288904, -0.20681747,
2.71015945, -0.3259044 , -0.41381347, -0.2672722 , -0.3259044 ,
-0.44366892, 1.68567947, -0.22853969, -0.2454118 , -0.28692985,
-0.60826548, -0.34971301, -0.34971301, 1.2290223 , -0.71397735,
-0.20681747, -0.18713159, -0.1898263 , -0.2158664 , -0.23514749,
-0.2454118 , -0.52366614, -0.3647888 , -0.34482158, -0.48214234,
3.41271513, -0.34971301, -0.1898263 , -0.41962951, -0.37518169,
-0.62488429, -0.43149255, -0.22853969, -0.30798858, -0.3352348 ,
-0.25611424, -0.3259044 , -0.28692985, -0.30798858, -0.57618899,
-0.48884654, -0.38585584, 1.68567947, -0.48884654, -1.28709718,
-0.43149255, -0.37518169, -0.37518169, -0.35467084, -0.3647888 ,
-0.40240831, -0.53821445, -0.69534436, -0.24892975, -0.29939131,
-0.2454118 , 0.70366797, -0.30798858, -0.2672722 , -0.43149255,
-0.2097915 , -0.21280762, -0.17672552, -0.43149255, 2.9768074 ,
-0.37518169, -0.21896842, -0.40240831, -0.33053793, -0.32133344,
2.82351017, -0.3036605 , -0.38048321, -0.35467084, -0.33999576,
-1.21650102, -0.48214234, -0.3259044 , -0.40807333, -0.2097915 ,
-0.5456246 , -0.5456246 , -0.35467084, -0.20099345, -0.2672722 ,
-0.40240831, -0.23514749, -0.56840788, -0.71397735, -0.27497271,
-2.18250381])
np.testing.assert_allclose(results.resid_deviance,
[-0.29387552, -0.2857098 , -0.24455876, -0.37803944, -0.44609851,
-0.40514674, -0.31088148, -0.79449324, -0.39409528, 0.47049798,
-1.00668653, 1.48698001, -0.35757692, -0.30654405, -0.20043547,
-0.32882173, -0.20622595, -0.26249995, -0.23106769, -0.32424676,
-0.83437766, 2.28941155, -0.26249995, -0.57644334, -0.35262564,
-0.18139734, -0.20331052, -0.17629229, -0.69186337, -0.30654405,
-0.20622595, -0.33345774, -0.251588 , -0.23106769, -0.17379306,
-0.83437766, 1.78479093, -0.38867448, -0.4974393 , 1.65565332,
-1.43660134, -0.23106769, -0.20918228, -0.38332275, -0.34291558,
-0.88609006, -0.15281596, -0.18139734, -0.82428104, -0.23106769,
-0.61571821, -0.23106769, -0.18932865, -0.234371 , 1.94999969,
1.62970871, -0.2897651 , -0.36259328, -0.38332275, -0.40514674,
2.19506559, -0.27386827, -0.19480442, -0.17629229, -0.27386827,
-0.24804925, -0.23106769, -0.24804925, -0.39409528, -0.15725009,
1.7074519 , -0.47114617, -0.35757692, -0.4522457 , -0.16889886,
-0.19480442, -0.37803944, -0.24111595, -0.39409528, -0.59975102,
-0.4522457 , -0.20331052, 1.87422489, -0.251588 , -0.19480442,
-0.5841272 , -0.2897651 , -0.25881274, -0.20622595, -0.24455876,
-0.22142749, -0.53929061, -0.25517563, -0.2897651 , -0.47760126,
-0.38332275, -0.35262564, -0.20918228, -0.36767536, -0.24455876,
-0.38867448, -0.2245965 , -0.18400413, -0.85481866, -0.20918228,
-0.26623785, -0.27002708, 1.40955093, -0.19204738, -0.26623785,
-0.38332275, -0.29387552, -0.2857098 , -0.27002708, -0.25517563,
-0.19204738, -0.27002708, -0.35262564, -0.26249995, -0.24455876,
-0.24804925, -0.37803944, -0.23106769, -0.23106769, -0.24804925,
-0.21218006, -0.53929061, -0.43402996, -0.20043547, -0.32882173,
-0.26249995, -0.26249995, -0.23772023, -0.24804925, -0.36767536,
-0.36767536, -0.26623785, -0.23772023, -0.83437766, -0.25881274,
-0.51106408, -0.30654405, -0.30654405, -0.2857098 , -0.39409528,
-0.49074728, -0.33345774, -0.26623785, -0.20043547, -0.20331052,
1.46111186, 1.96253843, -0.22780971, -0.37803944, -0.20331052,
-0.24111595, -0.32882173, -0.20918228, -0.18932865, -0.41648237,
-0.49074728, -0.31973217, -0.19204738, -0.17379306, -0.24111595,
-0.55389988, -0.40514674, -0.24111595, -0.46476893, -0.30226435,
-0.39409528, -0.39958581, -0.53211065, -0.20622595, -0.19480442,
-0.16650295, -0.31088148, -0.15064545, 2.39288231, -0.20043547,
-0.16181126, -0.5042114 , -0.25517563, -0.18664773, -0.30654405,
-0.33345774, -0.45846897, -0.22780971, -0.2857098 , -0.23106769,
-0.27002708, -0.7007597 , 1.99998811, -0.20043547, 1.39670618,
-0.35262564, -0.20331052, -0.63203077, 2.53733821, -0.63203077,
-0.5841272 , -0.16650295, -0.25881274, -0.43402996, -0.234371 ,
-0.55389988, 1.61672923, -0.46476893, -0.29387552, -1.61804148,
-0.37282386, -0.29387552, -0.26623785, -0.40514674, -0.39409528,
-0.57644334, 2.74841605, 2.19506559, -0.23106769, -1.06433539,
-0.42810736, -0.25881274, -0.23772023, -0.26623785, -0.19480442,
2.23070414, -0.30654405, -0.38867448, -0.251588 , -0.30654405,
-0.41648237, 1.49993075, -0.21521982, -0.23106769, -0.27002708,
-0.5688444 , -0.32882173, -0.32882173, 1.12233423, -0.66569789,
-0.19480442, -0.17629229, -0.17882689, -0.20331052, -0.22142749,
-0.23106769, -0.49074728, -0.34291558, -0.32424676, -0.4522457 ,
2.63395309, -0.32882173, -0.17882689, -0.39409528, -0.35262564,
-0.5841272 , -0.40514674, -0.21521982, -0.2897651 , -0.3152773 ,
-0.24111595, -0.30654405, -0.27002708, -0.2897651 , -0.53929061,
-0.45846897, -0.36259328, 1.49993075, -0.45846897, -1.17192274,
-0.40514674, -0.35262564, -0.35262564, -0.33345774, -0.34291558,
-0.37803944, -0.5042114 , -0.64869028, -0.234371 , -0.28170899,
-0.23106769, 0.65629132, -0.2897651 , -0.251588 , -0.40514674,
-0.19760028, -0.20043547, -0.16650295, -0.40514674, 2.39288231,
-0.35262564, -0.20622595, -0.37803944, -0.31088148, -0.30226435,
2.30104857, -0.2857098 , -0.35757692, -0.33345774, -0.31973217,
-1.11158678, -0.4522457 , -0.30654405, -0.38332275, -0.19760028,
-0.51106408, -0.51106408, -0.33345774, -0.18932865, -0.251588 ,
-0.37803944, -0.22142749, -0.53211065, -0.66569789, -0.25881274,
-1.87550882])
np.testing.assert_allclose(results.null,
[ 0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
0.08860759])
self.assertAlmostEqual(results.D2, .200712816165)
self.assertAlmostEqual(results.adj_D2, 0.19816731557930456)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,350 @@
from __future__ import absolute_import, print_function
import numpy as np
import warnings
def _bit_length_26(x):
if x == 0:
return 0
elif x == 1:
return 1
else:
return len(bin(x)) - 2
try:
from scipy.lib._version import NumpyVersion
except ImportError:
import re
string_types = basestring
class NumpyVersion():
"""Parse and compare numpy version strings.
Numpy has the following versioning scheme (numbers given are examples; they
can be >9) in principle):
- Released version: '1.8.0', '1.8.1', etc.
- Alpha: '1.8.0a1', '1.8.0a2', etc.
- Beta: '1.8.0b1', '1.8.0b2', etc.
- Release candidates: '1.8.0rc1', '1.8.0rc2', etc.
- Development versions: '1.8.0.dev-f1234afa' (git commit hash appended)
- Development versions after a1: '1.8.0a1.dev-f1234afa',
'1.8.0b2.dev-f1234afa',
'1.8.1rc1.dev-f1234afa', etc.
- Development versions (no git hash available): '1.8.0.dev-Unknown'
Comparing needs to be done against a valid version string or other
`NumpyVersion` instance.
Parameters
----------
vstring : str
Numpy version string (``np.__version__``).
Notes
-----
All dev versions of the same (pre-)release compare equal.
Examples
--------
>>> from scipy.lib._version import NumpyVersion
>>> if NumpyVersion(np.__version__) < '1.7.0':
... print('skip')
skip
>>> NumpyVersion('1.7') # raises ValueError, add ".0"
"""
def __init__(self, vstring):
self.vstring = vstring
ver_main = re.match(r'\d[.]\d+[.]\d+', vstring)
if not ver_main:
raise ValueError("Not a valid numpy version string")
self.version = ver_main.group()
self.major, self.minor, self.bugfix = [int(x) for x in
self.version.split('.')]
if len(vstring) == ver_main.end():
self.pre_release = 'final'
else:
alpha = re.match(r'a\d', vstring[ver_main.end():])
beta = re.match(r'b\d', vstring[ver_main.end():])
rc = re.match(r'rc\d', vstring[ver_main.end():])
pre_rel = [m for m in [alpha, beta, rc] if m is not None]
if pre_rel:
self.pre_release = pre_rel[0].group()
else:
self.pre_release = ''
self.is_devversion = bool(re.search(r'.dev-', vstring))
def _compare_version(self, other):
"""Compare major.minor.bugfix"""
if self.major == other.major:
if self.minor == other.minor:
if self.bugfix == other.bugfix:
vercmp = 0
elif self.bugfix > other.bugfix:
vercmp = 1
else:
vercmp = -1
elif self.minor > other.minor:
vercmp = 1
else:
vercmp = -1
elif self.major > other.major:
vercmp = 1
else:
vercmp = -1
return vercmp
def _compare_pre_release(self, other):
"""Compare alpha/beta/rc/final."""
if self.pre_release == other.pre_release:
vercmp = 0
elif self.pre_release == 'final':
vercmp = 1
elif other.pre_release == 'final':
vercmp = -1
elif self.pre_release > other.pre_release:
vercmp = 1
else:
vercmp = -1
return vercmp
def _compare(self, other):
if not isinstance(other, (string_types, NumpyVersion)):
raise ValueError("Invalid object to compare with NumpyVersion.")
if isinstance(other, string_types):
other = NumpyVersion(other)
vercmp = self._compare_version(other)
if vercmp == 0:
# Same x.y.z version, check for alpha/beta/rc
vercmp = self._compare_pre_release(other)
if vercmp == 0:
# Same version and same pre-release, check if dev version
if self.is_devversion is other.is_devversion:
vercmp = 0
elif self.is_devversion:
vercmp = -1
else:
vercmp = 1
return vercmp
def __lt__(self, other):
return self._compare(other) < 0
def __le__(self, other):
return self._compare(other) <= 0
def __eq__(self, other):
return self._compare(other) == 0
def __ne__(self, other):
return self._compare(other) != 0
def __gt__(self, other):
return self._compare(other) > 0
def __ge__(self, other):
return self._compare(other) >= 0
def __repr(self):
return "NumpyVersion(%s)" % self.vstring
def _next_regular(target):
"""
Find the next regular number greater than or equal to target.
Regular numbers are composites of the prime factors 2, 3, and 5.
Also known as 5-smooth numbers or Hamming numbers, these are the optimal
size for inputs to FFTPACK.
Target must be a positive integer.
"""
if target <= 6:
return target
# Quickly check if it's already a power of 2
if not (target & (target - 1)):
return target
match = float('inf') # Anything found will be smaller
p5 = 1
while p5 < target:
p35 = p5
while p35 < target:
# Ceiling integer division, avoiding conversion to float
# (quotient = ceil(target / p35))
quotient = -(-target // p35)
# Quickly find next power of 2 >= quotient
try:
p2 = 2 ** ((quotient - 1).bit_length())
except AttributeError:
# Fallback for Python <2.7
p2 = 2 ** _bit_length_26(quotient - 1)
N = p2 * p35
if N == target:
return N
elif N < match:
match = N
p35 *= 3
if p35 == target:
return p35
if p35 < match:
match = p35
p5 *= 5
if p5 == target:
return p5
if p5 < match:
match = p5
return match
if NumpyVersion(np.__version__) >= '1.7.1':
np_matrix_rank = np.linalg.matrix_rank
else:
def np_matrix_rank(M, tol=None):
"""
Return matrix rank of array using SVD method
Rank of the array is the number of SVD singular values of the array that are
greater than `tol`.
Parameters
----------
M : {(M,), (M, N)} array_like
array of <=2 dimensions
tol : {None, float}, optional
threshold below which SVD values are considered zero. If `tol` is
None, and ``S`` is an array with singular values for `M`, and
``eps`` is the epsilon value for datatype of ``S``, then `tol` is
set to ``S.max() * max(M.shape) * eps``.
Notes
-----
The default threshold to detect rank deficiency is a test on the magnitude
of the singular values of `M`. By default, we identify singular values less
than ``S.max() * max(M.shape) * eps`` as indicating rank deficiency (with
the symbols defined above). This is the algorithm MATLAB uses [1]. It also
appears in *Numerical recipes* in the discussion of SVD solutions for linear
least squares [2].
This default threshold is designed to detect rank deficiency accounting for
the numerical errors of the SVD computation. Imagine that there is a column
in `M` that is an exact (in floating point) linear combination of other
columns in `M`. Computing the SVD on `M` will not produce a singular value
exactly equal to 0 in general: any difference of the smallest SVD value from
0 will be caused by numerical imprecision in the calculation of the SVD.
Our threshold for small SVD values takes this numerical imprecision into
account, and the default threshold will detect such numerical rank
deficiency. The threshold may declare a matrix `M` rank deficient even if
the linear combination of some columns of `M` is not exactly equal to
another column of `M` but only numerically very close to another column of
`M`.
We chose our default threshold because it is in wide use. Other thresholds
are possible. For example, elsewhere in the 2007 edition of *Numerical
recipes* there is an alternative threshold of ``S.max() *
np.finfo(M.dtype).eps / 2. * np.sqrt(m + n + 1.)``. The authors describe
this threshold as being based on "expected roundoff error" (p 71).
The thresholds above deal with floating point roundoff error in the
calculation of the SVD. However, you may have more information about the
sources of error in `M` that would make you consider other tolerance values
to detect *effective* rank deficiency. The most useful measure of the
tolerance depends on the operations you intend to use on your matrix. For
example, if your data come from uncertain measurements with uncertainties
greater than floating point epsilon, choosing a tolerance near that
uncertainty may be preferable. The tolerance may be absolute if the
uncertainties are absolute rather than relative.
References
----------
.. [1] MATLAB reference documention, "Rank"
http://www.mathworks.com/help/techdoc/ref/rank.html
.. [2] W. H. Press, S. A. Teukolsky, W. T. Vetterling and B. P. Flannery,
"Numerical Recipes (3rd edition)", Cambridge University Press, 2007,
page 795.
Examples
--------
>>> from numpy.linalg import matrix_rank
>>> matrix_rank(np.eye(4)) # Full rank matrix
4
>>> I=np.eye(4); I[-1,-1] = 0. # rank deficient matrix
>>> matrix_rank(I)
3
>>> matrix_rank(np.ones((4,))) # 1 dimension - rank 1 unless all 0
1
>>> matrix_rank(np.zeros((4,)))
0
"""
M = np.asarray(M)
if M.ndim > 2:
raise TypeError('array should have 2 or fewer dimensions')
if M.ndim < 2:
return int(not all(M == 0))
S = np.linalg.svd(M, compute_uv=False)
if tol is None:
tol = S.max() * max(M.shape) * np.finfo(S.dtype).eps
return np.sum(S > tol)
class CacheWriteWarning(UserWarning):
pass
class CachedAttribute(object):
def __init__(self, func, cachename=None, resetlist=None):
self.fget = func
self.name = func.__name__
self.cachename = cachename or '_cache'
self.resetlist = resetlist or ()
def __get__(self, obj, type=None):
if obj is None:
return self.fget
# Get the cache or set a default one if needed
_cachename = self.cachename
_cache = getattr(obj, _cachename, None)
if _cache is None:
setattr(obj, _cachename, resettable_cache())
_cache = getattr(obj, _cachename)
# Get the name of the attribute to set and cache
name = self.name
_cachedval = _cache.get(name, None)
# print("[_cachedval=%s]" % _cachedval)
if _cachedval is None:
# Call the "fget" function
_cachedval = self.fget(obj)
# Set the attribute in obj
# print("Setting %s in cache to %s" % (name, _cachedval))
try:
_cache[name] = _cachedval
except KeyError:
setattr(_cache, name, _cachedval)
# Update the reset list if needed (and possible)
resetlist = self.resetlist
if resetlist is not ():
try:
_cache._resetdict[name] = self.resetlist
except AttributeError:
pass
# else:
# print("Reading %s from cache (%s)" % (name, _cachedval))
return _cachedval
def __set__(self, obj, value):
errmsg = "The attribute '%s' cannot be overwritten" % self.name
warnings.warn(errmsg, CacheWriteWarning)
class _cache_readonly(object):
"""
Decorator for CachedAttribute
"""
def __init__(self, cachename=None, resetlist=None):
self.func = None
self.cachename = cachename
self.resetlist = resetlist or None
def __call__(self, func):
return CachedAttribute(func,
cachename=self.cachename,
resetlist=self.resetlist)
cache_readonly = _cache_readonly()

View File

@ -0,0 +1,284 @@
"""
Variance functions for use with the link functions in statsmodels.family.links
"""
__docformat__ = 'restructuredtext'
import numpy as np
FLOAT_EPS = np.finfo(float).eps
class VarianceFunction(object):
"""
Relates the variance of a random variable to its mean. Defaults to 1.
Methods
-------
call
Returns an array of ones that is the same shape as `mu`
Notes
-----
After a variance function is initialized, its call method can be used.
Alias for VarianceFunction:
constant = VarianceFunction()
See also
--------
statsmodels.family.family
"""
def __call__(self, mu):
"""
Default variance function
Parameters
-----------
mu : array-like
mean parameters
Returns
-------
v : array
ones(mu.shape)
"""
mu = np.asarray(mu)
return np.ones(mu.shape, np.float64)
def deriv(self, mu):
"""
Derivative of the variance function v'(mu)
"""
from statsmodels.tools.numdiff import approx_fprime_cs
# TODO: diag workaround proplem with numdiff for 1d
return np.diag(approx_fprime_cs(mu, self))
constant = VarianceFunction()
constant.__doc__ = """
The call method of constant returns a constant variance, i.e., a vector of ones.
constant is an alias of VarianceFunction()
"""
class Power(object):
"""
Power variance function
Parameters
----------
power : float
exponent used in power variance function
Methods
-------
call
Returns the power variance
Formulas
--------
V(mu) = numpy.fabs(mu)**power
Notes
-----
Aliases for Power:
mu = Power()
mu_squared = Power(power=2)
mu_cubed = Power(power=3)
"""
def __init__(self, power=1.):
self.power = power
def __call__(self, mu):
"""
Power variance function
Parameters
----------
mu : array-like
mean parameters
Returns
-------
variance : array
numpy.fabs(mu)**self.power
"""
return np.power(np.fabs(mu), self.power)
def deriv(self, mu):
"""
Derivative of the variance function v'(mu)
"""
from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
#return approx_fprime_cs(mu, self) # TODO fix breaks in `fabs
# TODO: diag is workaround problem with numdiff for 1d
return np.diag(approx_fprime(mu, self))
mu = Power()
mu.__doc__ = """
Returns np.fabs(mu)
Notes
-----
This is an alias of Power()
"""
mu_squared = Power(power=2)
mu_squared.__doc__ = """
Returns np.fabs(mu)**2
Notes
-----
This is an alias of statsmodels.family.links.Power(power=2)
"""
mu_cubed = Power(power=3)
mu_cubed.__doc__ = """
Returns np.fabs(mu)**3
Notes
-----
This is an alias of statsmodels.family.links.Power(power=3)
"""
class Binomial(object):
"""
Binomial variance function
Parameters
----------
n : int, optional
The number of trials for a binomial variable. The default is 1 for
p in (0,1)
Methods
-------
call
Returns the binomial variance
Formulas
--------
V(mu) = p * (1 - p) * n
where p = mu / n
Notes
-----
Alias for Binomial:
binary = Binomial()
A private method _clean trims the data by machine epsilon so that p is
in (0,1)
"""
def __init__(self, n=1):
self.n = n
def _clean(self, p):
return np.clip(p, FLOAT_EPS, 1 - FLOAT_EPS)
def __call__(self, mu):
"""
Binomial variance function
Parameters
-----------
mu : array-like
mean parameters
Returns
-------
variance : array
variance = mu/n * (1 - mu/n) * self.n
"""
p = self._clean(mu / self.n)
return p * (1 - p) * self.n
#TODO: inherit from super
def deriv(self, mu):
"""
Derivative of the variance function v'(mu)
"""
from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
# TODO: diag workaround proplem with numdiff for 1d
return np.diag(approx_fprime_cs(mu, self))
binary = Binomial()
binary.__doc__ = """
The binomial variance function for n = 1
Notes
-----
This is an alias of Binomial(n=1)
"""
class NegativeBinomial(object):
'''
Negative binomial variance function
Parameters
----------
alpha : float
The ancillary parameter for the negative binomial variance function.
`alpha` is assumed to be nonstochastic. The default is 1.
Methods
-------
call
Returns the negative binomial variance
Formulas
--------
V(mu) = mu + alpha*mu**2
Notes
-----
Alias for NegativeBinomial:
nbinom = NegativeBinomial()
A private method _clean trims the data by machine epsilon so that p is
in (0,inf)
'''
def __init__(self, alpha=1.):
self.alpha = alpha
def _clean(self, p):
return np.clip(p, FLOAT_EPS, np.inf)
def __call__(self, mu):
"""
Negative binomial variance function
Parameters
----------
mu : array-like
mean parameters
Returns
-------
variance : array
variance = mu + alpha*mu**2
"""
p = self._clean(mu)
return p + self.alpha*p**2
def deriv(self, mu):
"""
Derivative of the negative binomial variance function.
"""
p = self._clean(mu)
return 1 + 2 * self.alpha * p
nbinom = NegativeBinomial()
nbinom.__doc__ = """
Negative Binomial variance function.
Notes
-----
This is an alias of NegativeBinomial(alpha=1.)
"""

View File

@ -0,0 +1,169 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import sys\n",
"import numpy as np\n",
"import pandas as pd\n",
"import pysal as ps\n",
"sys.path.append('/Users/toshan/Dropbox/GWR/PyGWRJing/PyGWR/')\n",
"from M_FBGWR_May2016 import FBGWR\n",
"from M_GWGLM import GWGLM\n",
"from M_selection import Band_Sel\n",
"import scipy"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"path = ps.examples.get_path('GData_utm.csv')\n",
"shp = pd.read_csv(path)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Prep data into design matrix and coordinates\n",
"\n",
"#Dependent variable\n",
"y = shp.PctBach.reshape((-1,1))\n",
"\n",
"#Design matrix - covariates - intercept added automatically\n",
"pov = shp.PctPov.reshape((-1,1))\n",
"rural = shp.PctRural.reshape((-1,1))\n",
"blk = shp.PctBlack.reshape((-1,1))\n",
"X = np.hstack([pov, rural, blk])\n",
"labels = ['Intercept', 'PctPov', 'PctRural', 'PctBlack']\n",
"\n",
"#Coordinates for calibration points\n",
"u = shp.X\n",
"v = shp.Y\n",
"coords = zip(u,v)\n",
"\n",
"coords_dict = {}\n",
"for i, x in enumerate(coords):\n",
" coords_dict[i] = x"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(60.0, <Kernel.GWR_W object at 0x10b73a450>, [(89.0, 1088.4132720309442), (73.0, 1082.7676645259439), (62.0, 1079.8414440181946), (62.0, 1079.8414440181946), (62.0, 1079.8414440181946), (60.0, 1079.6668177916372), (60.0, 1079.6668177916372), (60.0, 1079.6668177916372)])\n",
"CPU times: user 1.75 s, sys: 80.7 ms, total: 1.83 s\n",
"Wall time: 1.75 s\n"
]
}
],
"source": [
"%%time\n",
"print Band_Sel(y, None, X, coords_dict)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pysal.contrib.gwr.sel_bw import Sel_BW\n",
"from pysal.contrib.gwr.gwr import GWR"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"60.0\n",
"CPU times: user 1.1 s, sys: 3.5 ms, total: 1.11 s\n",
"Wall time: 1.1 s\n"
]
}
],
"source": [
"%%time\n",
"print Sel_BW(coords, y, X, [], kernel='bisquare', constant=False).search()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%time\n",
"results = FBGWR(y, X, coords_dict, tolFB=1e-03)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"%%time\n",
"sel = Sel_BW(coords, y, X, [], kernel='bisquare', fb=True, constant=False)\n",
"results = sel.search(tol_fb=1e-03)"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [fbgwr]",
"language": "python",
"name": "Python [fbgwr]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 0
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,571 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pysal as ps\n",
"from pysal.weights.Distance import Kernel\n",
"import sys\n",
"sys.path.append('/Users/toshan/dev/pysal/pysal/weights')\n",
"from Distance import Kernel as kn\n",
"from util import full2W as f2w"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"n = 5 #number of observations\n",
"m = 3 #number of calibration points"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"x = np.random.randint(1,1000, n)\n",
"y = np.random.randint(1,1000, n)\n",
"D1 = zip(x,y)\n",
"W1 = kn(D1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"x = np.random.randint(1,1000, m)\n",
"y = np.random.randint(1,1000, m)\n",
"D2 = zip(x,y)\n",
"W2 = kn(D2)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0.39894228, 0.1009935 , 0.05619733, 0.04896355, 0.32650695,\n",
" 0.37879836, 0.31411809, 0.1073076 ],\n",
" [ 0.1009935 , 0.39894228, 0.36195557, 0.36626279, 0.07720723,\n",
" 0.11754135, 0.03669346, 0.33721825],\n",
" [ 0.05619733, 0.36195557, 0.39894228, 0.39108403, 0.0345001 ,\n",
" 0.06151089, 0.02103471, 0.24420603],\n",
" [ 0.04896355, 0.36626279, 0.39108403, 0.39894228, 0.0335092 ,\n",
" 0.05713611, 0.01604658, 0.26976648],\n",
" [ 0.32650695, 0.07720723, 0.0345001 , 0.0335092 , 0.39894228,\n",
" 0.37224696, 0.18985479, 0.11774827],\n",
" [ 0.37879836, 0.11754135, 0.06151089, 0.05713611, 0.37224696,\n",
" 0.39894228, 0.24197075, 0.14519262],\n",
" [ 0.31411809, 0.03669346, 0.02103471, 0.01604658, 0.18985479,\n",
" 0.24197075, 0.39894228, 0.03119528],\n",
" [ 0.1073076 , 0.33721825, 0.24420603, 0.26976648, 0.11774827,\n",
" 0.14519262, 0.03119528, 0.39894228]])"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"D3 = np.vstack([D1,D2])\n",
"W3 = Kernel(D3, function='gaussian')\n",
"W3 = kn(D3, function='gaussian', truncate=False)\n",
"\n",
"W3.full()[0]"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n"
]
}
],
"source": [
"\n",
"coord_ids = np.arange(n)\n",
"points_ids = np.arange(n, n+m)\n",
"all_ids = np.arange(n+m)\n",
"dists = np.zeros((m,n))\n",
"\n",
"\n",
"for i in all_ids:\n",
" if i in points_ids:\n",
" for j in coord_ids:\n",
" if j in W3[j].keys():\n",
" if i >= n:\n",
" print 'a'\n",
" dists[i-n][j] = W3.full()[0][i][j]\n",
" elif j >= m:\n",
" print 'b'\n",
" dists[i][j-m] = W3.full()[0][i][j]\n",
" elif (i >= n) & (j >= m):\n",
" print 'c'\n",
" dists[i-n][j-m] = W3.full()[0][i][j]\n",
" else:\n",
" print 'd'\n",
" dists[i][j] = W3.full()[0][i][j]\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0.37879836, 0.11754135, 0.06151089, 0.05713611, 0.37224696],\n",
" [ 0.31411809, 0.03669346, 0.02103471, 0.01604658, 0.18985479],\n",
" [ 0.1073076 , 0.33721825, 0.24420603, 0.26976648, 0.11774827]])"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dists"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "ValueError",
"evalue": "3 is not in list",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-22-3036666a721b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf2w\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdists\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m//anaconda/lib/python2.7/site-packages/pysal/weights/weights.pyc\u001b[0m in \u001b[0;36mfull\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 953\u001b[0m \"\"\"\n\u001b[0;32m--> 954\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 955\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 956\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtowsp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/lib/python2.7/site-packages/pysal/weights/util.pyc\u001b[0m in \u001b[0;36mfull\u001b[0;34m(w)\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mw_i\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 712\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwij\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_i\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw_i\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 713\u001b[0;31m \u001b[0mc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 714\u001b[0m \u001b[0mwfull\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwij\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mwfull\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: 3 is not in list"
]
}
],
"source": [
"w = f2w(dists)\n",
"w.full()[0]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"new_neighbs = {}\n",
"new_weights = {}\n",
"for each in W2.neighbors:\n",
" three = set(W3.neighbors[each])\n",
" two = set(W2.neighbors[each])\n",
" new_neighbs[each] = list(three.difference(two))\n",
" new_weights[each] = {}\n",
" for weight in new_neighbs[each]:\n",
" new_weights[each][weight] = W3[each][weight]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'\\nfor ids in all_ids:\\n if ids in points_ids:\\n _all = set(W3.neighbors[ids])\\n points = set(W2.neighbors[ids])\\n new_neighbs[ids] = list(_all.difference(points))\\n new_weights[ids] = {}\\n for weight in new_neighbs[ids]:\\n new_weights[ids][weight] = W3[ids][weight]\\n\\n else:\\n new_weights[ids] = {}\\n'"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coord_ids = W1.id_order\n",
"points_ids = W2.id_order\n",
"all_ids = W3.id_order\n",
"dists = np.zeros((2,3))\n",
"\n",
"'''\n",
"for ids in all_ids:\n",
" if ids in points_ids:\n",
" _all = set(W3.neighbors[ids])\n",
" points = set(W2.neighbors[ids])\n",
" new_neighbs[ids] = list(_all.difference(points))\n",
" new_weights[ids] = {}\n",
" for weight in new_neighbs[ids]:\n",
" new_weights[ids][weight] = W3[ids][weight]\n",
"\n",
" else:\n",
" new_weights[ids] = {}\n",
"''' \n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3 0\n",
"a\n",
"3 1\n",
"a\n",
"3 2\n",
"a\n",
"4 0\n",
"a\n",
"4 1\n",
"a\n",
"4 2\n",
"a\n",
"5 0\n",
"a\n",
"5 1\n",
"a\n",
"5 2\n",
"a\n",
"6 0\n",
"a\n",
"6 1\n",
"a\n",
"6 2\n",
"a\n",
"7 0\n",
"a\n",
"7 1\n",
"a\n",
"7 2\n",
"a\n"
]
}
],
"source": [
"n = 3 #number of observations\n",
"m = 2 #number of \n",
"for i in all_ids:\n",
" if i in points_ids:\n",
" for j in coord_ids:\n",
" if j in W3[j].keys():\n",
" print i,j\n",
" if i >= n:\n",
" print 'a'\n",
" dists[i-n][j] = W3.full()[0][i][j]\n",
" elif j >= m:\n",
" print 'b'\n",
" dists[i][j-m] = W3.full()[0][i][j]\n",
" elif (i >= n) & (j >= m):\n",
" print 'c'\n",
" dists[i-n][j-m] = W3.full()[0][i][j]\n",
" else:\n",
" print 'd'\n",
" dists[i][j] = W3.full()[0][i][j]\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0. , 0.45118883, 0.85404129],\n",
" [ 0.27754126, 0.40233088, 0.20596816],\n",
" [ 0. , 0.37941649, 0.6032733 ],\n",
" [ 0. , 0. , 0. ],\n",
" [ 0.35446728, 0.73305802, 0.50022967]])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dists"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[0, 1]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"points_ids"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"W4 = ps.W(new_neighbs, new_weights)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{0: [3, 4, 5, 6, 7], 1: [3, 4, 5, 6, 7], 2: [3, 4, 5, 6, 7]}"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"W4.neighbors"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{0: {3: -1.0482874068379293,\n",
" 4: 0.36696681575222334,\n",
" 5: 0.678090925231672,\n",
" 6: 0.308555499289517,\n",
" 7: -0.620566108018012},\n",
" 1: {3: 0.5865615659374835,\n",
" 4: -0.8123595700046831,\n",
" 5: -0.5633467635255329,\n",
" 6: -1.1845906645769202,\n",
" 7: 0.42019589403802593},\n",
" 2: {3: 0.8005291815217084,\n",
" 4: -1.212624893235362,\n",
" 5: -0.9337024333901489,\n",
" 6: -1.4259607135415542,\n",
" 7: 0.009238162970930053}}"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"W4.weights"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[0, 1, 2]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"W4.id_order"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "ValueError",
"evalue": "3 is not in list",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-29-bea43da3bca2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mW4\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m//anaconda/lib/python2.7/site-packages/pysal/weights/weights.pyc\u001b[0m in \u001b[0;36mfull\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 953\u001b[0m \"\"\"\n\u001b[0;32m--> 954\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfull\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 955\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 956\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtowsp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/lib/python2.7/site-packages/pysal/weights/util.pyc\u001b[0m in \u001b[0;36mfull\u001b[0;34m(w)\u001b[0m\n\u001b[1;32m 711\u001b[0m \u001b[0mw_i\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 712\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwij\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_i\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw_i\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 713\u001b[0;31m \u001b[0mc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 714\u001b[0m \u001b[0mwfull\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwij\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mwfull\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: 3 is not in list"
]
}
],
"source": [
"W4.full()[0]"
]
},
{
"cell_type": "code",
"execution_count": 244,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{1: 1}"
]
},
"execution_count": 244,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"W4."
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([5])"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.arange(5,6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [Root]",
"language": "python",
"name": "Python [Root]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,6 @@
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,4 @@
import gwr
import sel_bw
import diagnostics
import kernels

View File

@ -0,0 +1,81 @@
"""
Diagnostics for estimated gwr modesl
"""
__author__ = "Taylor Oshan tayoshan@gmail.com"
import numpy as np
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
def get_AICc(gwr):
"""
Get AICc value
Gaussian: p61, (2.33), Fotheringham, Brunsdon and Charlton (2002)
GWGLM: AICc=AIC+2k(k+1)/(n-k-1), Nakaya et al. (2005): p2704, (36)
"""
n = gwr.n
k = gwr.tr_S
if isinstance(gwr.family, Gaussian):
aicc = -2.0*gwr.llf + 2.0*n*(k + 1.0)/(n-k-2.0)
elif isinstance(gwr.family, (Poisson, Binomial)):
aicc = get_AIC(gwr) + 2.0 * k * (k+1.0) / (n - k - 1.0)
return aicc
def get_AIC(gwr):
"""
Get AIC calue
Gaussian: p96, (4.22), Fotheringham, Brunsdon and Charlton (2002)
GWGLM: AIC(G)=D(G) + 2K(G), where D and K denote the deviance and the effective
number of parameters in the model with bandwidth G, respectively.
"""
k = gwr.tr_S
#deviance = -2*log-likelihood
y = gwr.y
mu = gwr.mu
if isinstance(gwr.family, Gaussian):
aic = -2.0 * gwr.llf + 2.0 * (k+1)
elif isinstance(gwr.family, (Poisson, Binomial)):
aic = np.sum(gwr.family.resid_dev(y, mu)**2) + 2.0 * k
return aic
def get_BIC(gwr):
"""
Get BIC value
Gaussian: p61 (2.34), Fotheringham, Brunsdon and Charlton (2002)
BIC = -2log(L)+klog(n)
GWGLM: BIC = dev + tr_S * log(n)
"""
n = gwr.n # (scalar) number of observations
k = gwr.tr_S
y = gwr.y
mu = gwr.mu
if isinstance(gwr.family, Gaussian):
bic = -2.0 * gwr.llf + (k+1) * np.log(n)
elif isinstance(gwr.family, (Poisson, Binomial)):
bic = np.sum(gwr.family.resid_dev(y, mu)**2) + k * np.log(n)
return bic
def get_CV(gwr):
"""
Get CV value
Gaussian only
Methods: p60, (2.31) or p212 (9.4)
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
Geographically weighted regression: the analysis of spatially varying relationships.
Modification: sum of residual squared is divided by n according to GWR4 results
"""
aa = gwr.resid_response.reshape((-1,1))/(1.0-gwr.influ)
cv = np.sum(aa**2)/gwr.n
return cv

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,120 @@
# GWR kernel function specifications
__author__ = "Taylor Oshan tayoshan@gmail.com"
#from pysal.weights.Distance import Kernel
import scipy
from scipy.spatial.kdtree import KDTree
import numpy as np
#adaptive specifications should be parameterized with nn-1 to match original gwr
#implementation. That is, pysal counts self neighbors with knn automatically.
def fix_gauss(points, bw):
w = _Kernel(points, function='gwr_gaussian', bandwidth=bw,
truncate=False)
return w.kernel
def adapt_gauss(points, nn):
w = _Kernel(points, fixed=False, k=nn-1, function='gwr_gaussian',
truncate=False)
return w.kernel
def fix_bisquare(points, bw):
w = _Kernel(points, function='bisquare', bandwidth=bw)
return w.kernel
def adapt_bisquare(points, nn):
w = _Kernel(points, fixed=False, k=nn-1, function='bisquare')
return w.kernel
def fix_exp(points, bw):
w = _Kernel(points, function='exponential', bandwidth=bw,
truncate=False)
return w.kernel
def adapt_exp(points, nn):
w = _Kernel(points, fixed=False, k=nn-1, function='exponential',
truncate=False)
return w.kernel
from scipy.spatial.distance import cdist
class _Kernel(object):
"""
"""
def __init__(self, data, bandwidth=None, fixed=True, k=None,
function='triangular', eps=1.0000001, ids=None, truncate=True): #Added truncate flag
if issubclass(type(data), scipy.spatial.KDTree):
self.kdt = data
self.data = self.kdt.data
data = self.data
else:
self.data = data
self.kdt = KDTree(self.data)
if k is not None:
self.k = int(k) + 1
else:
self.k = k
self.dmat = cdist(self.data, self.data)
self.function = function.lower()
self.fixed = fixed
self.eps = eps
self.trunc = truncate
if bandwidth:
try:
bandwidth = np.array(bandwidth)
bandwidth.shape = (len(bandwidth), 1)
except:
bandwidth = np.ones((len(data), 1), 'float') * bandwidth
self.bandwidth = bandwidth
else:
self._set_bw()
self.kernel = self._kernel_funcs(self.dmat/self.bandwidth)
if self.trunc:
mask = np.repeat(self.bandwidth, len(self.bandwidth), axis=1)
kernel_mask = self._kernel_funcs(1.0/mask)
self.kernel[(self.dmat >= mask)] = 0
def _set_bw(self):
if self.k is not None:
dmat = np.sort(self.dmat)[:,:self.k]
else:
dmat = self.dmat
if self.fixed:
# use max knn distance as bandwidth
bandwidth = dmat.max() * self.eps
n = len(self.data)
self.bandwidth = np.ones((n, 1), 'float') * bandwidth
else:
# use local max knn distance
self.bandwidth = dmat.max(axis=1) * self.eps
self.bandwidth.shape = (self.bandwidth.size, 1)
def _kernel_funcs(self, zs):
# functions follow Anselin and Rey (2010) table 5.4
if self.function == 'triangular':
return 1 - zs
elif self.function == 'uniform':
return np.ones(zi.shape) * 0.5
elif self.function == 'quadratic':
return (3. / 4) * (1 - zs ** 2)
elif self.function == 'quartic':
return (15. / 16) * (1 - zs ** 2) ** 2
elif self.function == 'gaussian':
c = np.pi * 2
c = c ** (-0.5)
return c * np.exp(-(zs ** 2) / 2.)
elif self.function == 'gwr_gaussian':
return np.exp(-0.5*(zs)**2)
elif self.function == 'bisquare':
return (1-(zs)**2)**2
elif self.function =='exponential':
return np.exp(-zs)
else:
print('Unsupported kernel function', self.function)

View File

@ -0,0 +1,208 @@
#Bandwidth optimization methods
__author__ = "Taylor Oshan"
import numpy as np
def golden_section(a, c, delta, function, tol, max_iter, int_score=False):
"""
Golden section search routine
Method: p212, 9.6.4
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
Geographically weighted regression: the analysis of spatially varying relationships.
Parameters
----------
a : float
initial max search section value
b : float
initial min search section value
delta : float
constant used to determine width of search sections
function : function
obejective function to be evaluated at different section
values
int_score : boolean
False for float score, True for integer score
tol : float
tolerance used to determine convergence
max_iter : integer
maximum iterations if no convergence to tolerance
Returns
-------
opt_val : float
optimal value
opt_score : kernel
optimal score
output : list of tuples
searching history
"""
b = a + delta * np.abs(c-a)
d = c - delta * np.abs(c-a)
score = 0.0
diff = 1.0e9
iters = 0
output = []
while np.abs(diff) > tol and iters < max_iter:
iters += 1
if int_score:
b = np.round(b)
d = np.round(d)
score_a = function(a)
score_b = function(b)
score_c = function(c)
score_d = function(d)
if score_b <= score_d:
opt_val = b
opt_score = score_b
c = d
d = b
b = a + delta * np.abs(c-a)
#if int_score:
#b = np.round(b)
else:
opt_val = d
opt_score = score_d
a = b
b = d
d = c - delta * np.abs(c-a)
#if int_score:
#d = np.round(b)
#if int_score:
# opt_val = np.round(opt_val)
output.append((opt_val, opt_score))
diff = score_b - score_d
score = opt_score
return np.round(opt_val, 2), opt_score, output
def equal_interval(l_bound, u_bound, interval, function, int_score=False):
"""
Interval search, using interval as stepsize
Parameters
----------
l_bound : float
initial min search section value
u_bound : float
initial max search section value
interval : float
constant used to determine width of search sections
function : function
obejective function to be evaluated at different section
values
int_score : boolean
False for float score, True for integer score
Returns
-------
opt_val : float
optimal value
opt_score : kernel
optimal score
output : list of tuples
searching history
"""
a = l_bound
c = u_bound
b = a + interval
if int_score:
a = np.round(a,0)
c = np.round(c,0)
b = np.round(b,0)
output = []
score_a = function(a)
score_c = function(c)
output.append((a,score_a))
output.append((c,score_c))
if score_a < score_c:
opt_val = a
opt_score = score_a
else:
opt_val = c
opt_score = score_c
while b < c:
score_b = function(b)
output.append((b,score_b))
if score_b < opt_score:
opt_val = b
opt_score = score_b
b = b + interval
return opt_val, opt_score, output
def flexible_bw(init, y, X, n, k, family, tol, max_iter, rss_score,
gwr_func, bw_func, sel_func):
if init:
bw = sel_func(bw_func(y, X))
print bw
optim_model = gwr_func(y, X, bw)
err = optim_model.resid_response.reshape((-1,1))
est = optim_model.params
else:
model = GLM(y, X, family=self.family, constant=False).fit()
err = model.resid_response.reshape((-1,1))
est = np.repeat(model.params.T, n, axis=0)
XB = np.multiply(est, X)
if rss_score:
rss = np.sum((err)**2)
iters = 0
scores = []
delta = 1e6
BWs = []
VALs = []
while delta > tol and iters < max_iter:
iters += 1
new_XB = np.zeros_like(X)
bws = []
vals = []
ests = np.zeros_like(X)
f_XB = XB.copy()
f_err = err.copy()
for i in range(k):
temp_y = XB[:,i].reshape((-1,1))
temp_y = temp_y + err
temp_X = X[:,i].reshape((-1,1))
bw_class = bw_func(temp_y, temp_X)
bw = sel_func(bw_class)
optim_model = gwr_func(temp_y, temp_X, bw)
err = optim_model.resid_response.reshape((-1,1))
est = optim_model.params.reshape((-1,))
new_XB[:,i] = np.multiply(est, temp_X.reshape((-1,)))
bws.append(bw)
ests[:,i] = est
vals.append(bw_class.bw[1])
predy = np.sum(np.multiply(ests, X), axis=1).reshape((-1,1))
num = np.sum((new_XB - XB)**2)/n
den = np.sum(np.sum(new_XB, axis=1)**2)
score = (num/den)**0.5
XB = new_XB
if rss_score:
new_rss = np.sum((y - predy)**2)
score = np.abs((new_rss - rss)/new_rss)
rss = new_rss
print score
scores.append(score)
delta = score
BWs.append(bws)
VALs.append(vals)
opt_bws = BWs[-1]
return opt_bws, np.array(BWs), np.array(VALs), np.array(scores), f_XB, f_err

View File

@ -0,0 +1,286 @@
# GWR Bandwidth selection class
#Thinking about removing the search method and just having optimization begin in
#class __init__
#x_glob and offset parameters dont yet do anything; former is for semiparametric
#GWR and later is for offset variable for Poisson model
__author__ = "Taylor Oshan Tayoshan@gmail.com"
from kernels import *
from search import golden_section, equal_interval, flexible_bw
from gwr import GWR
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
import pysal.spreg.user_output as USER
from diagnostics import get_AICc, get_AIC, get_BIC, get_CV
from scipy.spatial.distance import pdist, squareform
from pysal.common import KDTree
import numpy as np
kernels = {1: fix_gauss, 2: adapt_gauss, 3: fix_bisquare, 4:
adapt_bisquare, 5: fix_exp, 6:adapt_exp}
getDiag = {'AICc': get_AICc,'AIC':get_AIC, 'BIC': get_BIC, 'CV': get_CV}
class Sel_BW(object):
"""
Select bandwidth for kernel
Methods: p211 - p213, bandwidth selection
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
Geographically weighted regression: the analysis of spatially varying relationships.
Parameters
----------
y : array
n*1, dependent variable.
x_glob : array
n*k1, fixed independent variable.
x_loc : array
n*k2, local independent variable, including constant.
coords : list of tuples
(x,y) of points used in bandwidth selection
family : string
GWR model type: 'Gaussian', 'logistic, 'Poisson''
offset : array
n*1, offset variable for Poisson model
kernel : string
kernel function: 'gaussian', 'bisquare', 'exponetial'
fixed : boolean
True for fixed bandwidth and False for adaptive (NN)
fb : True for flexible (mutliple covaraite-specific) bandwidths
False for a traditional (same for all covariates)
bandwdith; defualt is False.
constant : boolean
True to include intercept (default) in model and False to exclude
intercept.
Attributes
----------
y : array
n*1, dependent variable.
x_glob : array
n*k1, fixed independent variable.
x_loc : array
n*k2, local independent variable, including constant.
coords : list of tuples
(x,y) of points used in bandwidth selection
family : string
GWR model type: 'Gaussian', 'logistic, 'Poisson''
kernel : string
type of kernel used and wether fixed or adaptive
criterion : string
bw selection criterion: 'AICc', 'AIC', 'BIC', 'CV'
search : string
bw search method: 'golden', 'interval'
bw_min : float
min value used in bandwidth search
bw_max : float
max value used in bandwidth search
interval : float
interval increment used in interval search
tol : float
tolerance used to determine convergence
max_iter : integer
max interations if no convergence to tol
fb : True for flexible (mutliple covaraite-specific) bandwidths
False for a traditional (same for all covariates)
bandwdith; defualt is False.
constant : boolean
True to include intercept (default) in model and False to exclude
intercept.
"""
def __init__(self, coords, y, x_loc, x_glob=None, family=Gaussian(),
offset=None, kernel='bisquare', fixed=False, fb=False, constant=True):
self.coords = coords
self.y = y
self.x_loc = x_loc
if x_glob is not None:
self.x_glob = x_glob
else:
self.x_glob = []
self.family=family
self.fixed = fixed
self.kernel = kernel
if offset is None:
self.offset = np.ones((len(y), 1))
else:
self.offset = offset * 1.0
self.fb = fb
self.constant = constant
def search(self, search='golden_section', criterion='AICc', bw_min=0.0,
bw_max=0.0, interval=0.0, tol=1.0e-6, max_iter=200, init_fb=True,
tol_fb=1.0e-5, rss_score=False, max_iter_fb=200):
"""
Parameters
----------
criterion : string
bw selection criterion: 'AICc', 'AIC', 'BIC', 'CV'
search : string
bw search method: 'golden', 'interval'
bw_min : float
min value used in bandwidth search
bw_max : float
max value used in bandwidth search
interval : float
interval increment used in interval search
tol : float
tolerance used to determine convergence
max_iter : integer
max iterations if no convergence to tol
init_fb : True to initialize flexible bandwidth search with
esitmates from a traditional GWR and False to
initialize flexible bandwidth search with global
regression estimates
tol_fb : convergence tolerence for the flexible bandwidth
backfitting algorithm; a larger tolerance may stop the
algorith faster though it may result in a less optimal
model
max_iter_fb : max iterations if no convergence to tol for flexible
bandwidth backfittign algorithm
rss_score : True to use the residual sum of sqaures to evaluate
each iteration of the flexible bandwidth backfitting
routine and False to use a smooth function; default is
False
Returns
-------
bw : scalar or array
optimal bandwidth value or values; returns scalar for
fb=False and array for fb=True; ordering of bandwidths
matches the ordering of the covariates (columns) of the
designs matrix, X
"""
self.search = search
self.criterion = criterion
self.bw_min = bw_min
self.bw_max = bw_max
self.interval = interval
self.tol = tol
self.max_iter = max_iter
self.init_fb = init_fb
self.tol_fb = tol_fb
self.rss_score = rss_score
self.max_iter_fb = max_iter_fb
if self.fixed:
if self.kernel == 'gaussian':
ktype = 1
elif self.kernel == 'bisquare':
ktype = 3
elif self.kernel == 'exponential':
ktype = 5
else:
raise TypeError('Unsupported kernel function ', self.kernel)
else:
if self.kernel == 'gaussian':
ktype = 2
elif self.kernel == 'bisquare':
ktype = 4
elif self.kernel == 'exponential':
ktype = 6
else:
raise TypeError('Unsupported kernel function ', self.kernel)
function = lambda bw: getDiag[criterion](
GWR(self.coords, self.y, self.x_loc, bw, family=self.family,
kernel=self.kernel, fixed=self.fixed, offset=self.offset).fit())
if ktype % 2 == 0:
int_score = True
else:
int_score = False
self.int_score = int_score
if self.fb:
self._fbw()
print self.bw[1]
self.XB = self.bw[4]
self.err = self.bw[5]
else:
self._bw()
return self.bw[0]
def _bw(self):
gwr_func = lambda bw: getDiag[self.criterion](
GWR(self.coords, self.y, self.x_loc, bw, family=self.family,
kernel=self.kernel, fixed=self.fixed, constant=self.constant).fit())
if self.search == 'golden_section':
a,c = self._init_section(self.x_glob, self.x_loc, self.coords,
self.constant)
delta = 0.38197 #1 - (np.sqrt(5.0)-1.0)/2.0
self.bw = golden_section(a, c, delta, gwr_func, self.tol,
self.max_iter, self.int_score)
elif self.search == 'interval':
self.bw = equal_interval(self.bw_min, self.bw_max, self.interval,
gwr_func, self.int_score)
else:
raise TypeError('Unsupported computational search method ', search)
def _fbw(self):
y = self.y
if self.constant:
X = USER.check_constant(self.x_loc)
else:
X = self.x_loc
n, k = X.shape
family = self.family
offset = self.offset
kernel = self.kernel
fixed = self.fixed
coords = self.coords
search = self.search
criterion = self.criterion
bw_min = self.bw_min
bw_max = self.bw_max
interval = self.interval
tol = self.tol
max_iter = self.max_iter
gwr_func = lambda y, X, bw: GWR(coords, y, X, bw, family=family,
kernel=kernel, fixed=fixed, offset=offset, constant=False).fit()
bw_func = lambda y, X: Sel_BW(coords, y, X, x_glob=[], family=family,
kernel=kernel, fixed=fixed, offset=offset, constant=False)
sel_func = lambda bw_func: bw_func.search(search=search,
criterion=criterion, bw_min=bw_min, bw_max=bw_max,
interval=interval, tol=tol, max_iter=max_iter)
self.bw = flexible_bw(self.init_fb, y, X, n, k, family, self.tol_fb,
self.max_iter_fb, self.rss_score, gwr_func, bw_func, sel_func)
def _init_section(self, x_glob, x_loc, coords, constant):
if len(x_glob) > 0:
n_glob = x_glob.shape[1]
else:
n_glob = 0
if len(x_loc) > 0:
n_loc = x_loc.shape[1]
else:
n_loc = 0
if constant:
n_vars = n_glob + n_loc + 1
else:
n_vars = n_glob + n_loc
n = np.array(coords).shape[0]
if self.int_score:
a = 40 + 2 * n_vars
c = n
else:
nn = 40 + 2 * n_vars
sq_dists = squareform(pdist(coords))
sort_dists = np.sort(sq_dists, axis=1)
min_dists = sort_dists[:,nn-1]
max_dists = sort_dists[:,-1]
a = np.min(min_dists)/2.0
c = np.max(max_dists)/2.0
if a < self.bw_min:
a = self.bw_min
if c > self.bw_max and self.bw_max > 0:
c = self.bw_max
return a, c

View File

@ -0,0 +1,785 @@
"""
GWR is tested against results from GWR4
"""
import unittest
import pickle as pk
from pysal.contrib.gwr.gwr import GWR, FBGWR
from pysal.contrib.gwr.diagnostics import get_AICc, get_AIC, get_BIC, get_CV
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
import numpy as np
import pysal
class TestGWRGaussian(unittest.TestCase):
def setUp(self):
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
self.coords = zip(data.by_col('X'), data.by_col('Y'))
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
black = np.array(data.by_col('PctBlack')).reshape((-1,1))
self.X = np.hstack([rural, pov, black])
self.BS_F = pysal.open(pysal.examples.get_path('georgia_BS_F_listwise.csv'))
self.BS_NN = pysal.open(pysal.examples.get_path('georgia_BS_NN_listwise.csv'))
self.GS_F = pysal.open(pysal.examples.get_path('georgia_GS_F_listwise.csv'))
self.GS_NN = pysal.open(pysal.examples.get_path('georgia_GS_NN_listwise.csv'))
self.FB = pk.load(open(pysal.examples.get_path('FB.p'), 'r'))
self.XB = pk.load(open(pysal.examples.get_path('XB.p'), 'r'))
self.err = pk.load(open(pysal.examples.get_path('err.p'), 'r'))
def test_BS_F(self):
est_Int = self.BS_F.by_col(' est_Intercept')
se_Int = self.BS_F.by_col(' se_Intercept')
t_Int = self.BS_F.by_col(' t_Intercept')
est_rural = self.BS_F.by_col(' est_PctRural')
se_rural = self.BS_F.by_col(' se_PctRural')
t_rural = self.BS_F.by_col(' t_PctRural')
est_pov = self.BS_F.by_col(' est_PctPov')
se_pov = self.BS_F.by_col(' se_PctPov')
t_pov = self.BS_F.by_col(' t_PctPov')
est_black = self.BS_F.by_col(' est_PctBlack')
se_black = self.BS_F.by_col(' se_PctBlack')
t_black = self.BS_F.by_col(' t_PctBlack')
yhat = self.BS_F.by_col(' yhat')
res = np.array(self.BS_F.by_col(' residual'))
std_res = np.array(self.BS_F.by_col(' std_residual')).reshape((-1,1))
localR2 = np.array(self.BS_F.by_col(' localR2')).reshape((-1,1))
inf = np.array(self.BS_F.by_col(' influence')).reshape((-1,1))
cooksD = np.array(self.BS_F.by_col(' CooksD')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=209267.689, fixed=True)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
CV = get_CV(rslt)
self.assertAlmostEquals(np.floor(AICc), 894.0)
self.assertAlmostEquals(np.floor(AIC), 890.0)
self.assertAlmostEquals(np.floor(BIC), 944.0)
self.assertAlmostEquals(np.round(CV,2), 18.25)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
def test_BS_NN(self):
est_Int = self.BS_NN.by_col(' est_Intercept')
se_Int = self.BS_NN.by_col(' se_Intercept')
t_Int = self.BS_NN.by_col(' t_Intercept')
est_rural = self.BS_NN.by_col(' est_PctRural')
se_rural = self.BS_NN.by_col(' se_PctRural')
t_rural = self.BS_NN.by_col(' t_PctRural')
est_pov = self.BS_NN.by_col(' est_PctPov')
se_pov = self.BS_NN.by_col(' se_PctPov')
t_pov = self.BS_NN.by_col(' t_PctPov')
est_black = self.BS_NN.by_col(' est_PctBlack')
se_black = self.BS_NN.by_col(' se_PctBlack')
t_black = self.BS_NN.by_col(' t_PctBlack')
yhat = self.BS_NN.by_col(' yhat')
res = np.array(self.BS_NN.by_col(' residual'))
std_res = np.array(self.BS_NN.by_col(' std_residual')).reshape((-1,1))
localR2 = np.array(self.BS_NN.by_col(' localR2')).reshape((-1,1))
inf = np.array(self.BS_NN.by_col(' influence')).reshape((-1,1))
cooksD = np.array(self.BS_NN.by_col(' CooksD')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=90.000, fixed=False)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
CV = get_CV(rslt)
self.assertAlmostEquals(np.floor(AICc), 896.0)
self.assertAlmostEquals(np.floor(AIC), 892.0)
self.assertAlmostEquals(np.floor(BIC), 941.0)
self.assertAlmostEquals(np.around(CV, 2), 19.19)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
def test_GS_F(self):
est_Int = self.GS_F.by_col(' est_Intercept')
se_Int = self.GS_F.by_col(' se_Intercept')
t_Int = self.GS_F.by_col(' t_Intercept')
est_rural = self.GS_F.by_col(' est_PctRural')
se_rural = self.GS_F.by_col(' se_PctRural')
t_rural = self.GS_F.by_col(' t_PctRural')
est_pov = self.GS_F.by_col(' est_PctPov')
se_pov = self.GS_F.by_col(' se_PctPov')
t_pov = self.GS_F.by_col(' t_PctPov')
est_black = self.GS_F.by_col(' est_PctBlack')
se_black = self.GS_F.by_col(' se_PctBlack')
t_black = self.GS_F.by_col(' t_PctBlack')
yhat = self.GS_F.by_col(' yhat')
res = np.array(self.GS_F.by_col(' residual'))
std_res = np.array(self.GS_F.by_col(' std_residual')).reshape((-1,1))
localR2 = np.array(self.GS_F.by_col(' localR2')).reshape((-1,1))
inf = np.array(self.GS_F.by_col(' influence')).reshape((-1,1))
cooksD = np.array(self.GS_F.by_col(' CooksD')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=87308.298,
kernel='gaussian', fixed=True)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
CV = get_CV(rslt)
self.assertAlmostEquals(np.floor(AICc), 895.0)
self.assertAlmostEquals(np.floor(AIC), 890.0)
self.assertAlmostEquals(np.floor(BIC), 943.0)
self.assertAlmostEquals(np.around(CV, 2), 18.21)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
def test_GS_NN(self):
est_Int = self.GS_NN.by_col(' est_Intercept')
se_Int = self.GS_NN.by_col(' se_Intercept')
t_Int = self.GS_NN.by_col(' t_Intercept')
est_rural = self.GS_NN.by_col(' est_PctRural')
se_rural = self.GS_NN.by_col(' se_PctRural')
t_rural = self.GS_NN.by_col(' t_PctRural')
est_pov = self.GS_NN.by_col(' est_PctPov')
se_pov = self.GS_NN.by_col(' se_PctPov')
t_pov = self.GS_NN.by_col(' t_PctPov')
est_black = self.GS_NN.by_col(' est_PctBlack')
se_black = self.GS_NN.by_col(' se_PctBlack')
t_black = self.GS_NN.by_col(' t_PctBlack')
yhat = self.GS_NN.by_col(' yhat')
res = np.array(self.GS_NN.by_col(' residual'))
std_res = np.array(self.GS_NN.by_col(' std_residual')).reshape((-1,1))
localR2 = np.array(self.GS_NN.by_col(' localR2')).reshape((-1,1))
inf = np.array(self.GS_NN.by_col(' influence')).reshape((-1,1))
cooksD = np.array(self.GS_NN.by_col(' CooksD')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=49.000,
kernel='gaussian', fixed=False)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
CV = get_CV(rslt)
self.assertAlmostEquals(np.floor(AICc), 896)
self.assertAlmostEquals(np.floor(AIC), 894.0)
self.assertAlmostEquals(np.floor(BIC), 922.0)
self.assertAlmostEquals(np.around(CV, 2), 17.91)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
def test_FBGWR(self):
model = FBGWR(self.coords, self.y, self.X, [157.0, 65.0, 52.0],
XB=self.XB, err=self.err, constant=False)
rslt = model.fit()
np.testing.assert_allclose(rslt.predy, self.FB['predy'], atol=1e-07)
np.testing.assert_allclose(rslt.params, self.FB['params'], atol=1e-07)
np.testing.assert_allclose(rslt.resid_response, self.FB['u'], atol=1e-05)
np.testing.assert_almost_equal(rslt.resid_ss, 6339.3497144025841)
class TestGWRPoisson(unittest.TestCase):
def setUp(self):
data = pysal.open(pysal.examples.get_path('Tokyomortality.csv'), mode='Ur')
self.coords = zip(data.by_col('X_CENTROID'), data.by_col('Y_CENTROID'))
self.y = np.array(data.by_col('db2564')).reshape((-1,1))
self.off = np.array(data.by_col('eb2564')).reshape((-1,1))
OCC = np.array(data.by_col('OCC_TEC')).reshape((-1,1))
OWN = np.array(data.by_col('OWNH')).reshape((-1,1))
POP = np.array(data.by_col('POP65')).reshape((-1,1))
UNEMP = np.array(data.by_col('UNEMP')).reshape((-1,1))
self.X = np.hstack([OCC,OWN,POP,UNEMP])
self.BS_F = pysal.open(pysal.examples.get_path('tokyo_BS_F_listwise.csv'))
self.BS_NN = pysal.open(pysal.examples.get_path('tokyo_BS_NN_listwise.csv'))
self.GS_F = pysal.open(pysal.examples.get_path('tokyo_GS_F_listwise.csv'))
self.GS_NN = pysal.open(pysal.examples.get_path('tokyo_GS_NN_listwise.csv'))
self.BS_NN_OFF = pysal.open(pysal.examples.get_path('tokyo_BS_NN_OFF_listwise.csv'))
def test_BS_F(self):
est_Int = self.BS_F.by_col(' est_Intercept')
se_Int = self.BS_F.by_col(' se_Intercept')
t_Int = self.BS_F.by_col(' t_Intercept')
est_OCC = self.BS_F.by_col(' est_OCC_TEC')
se_OCC = self.BS_F.by_col(' se_OCC_TEC')
t_OCC = self.BS_F.by_col(' t_OCC_TEC')
est_OWN = self.BS_F.by_col(' est_OWNH')
se_OWN = self.BS_F.by_col(' se_OWNH')
t_OWN = self.BS_F.by_col(' t_OWNH')
est_POP = self.BS_F.by_col(' est_POP65')
se_POP = self.BS_F.by_col(' se_POP65')
t_POP = self.BS_F.by_col(' t_POP65')
est_UNEMP = self.BS_F.by_col(' est_UNEMP')
se_UNEMP = self.BS_F.by_col(' se_UNEMP')
t_UNEMP = self.BS_F.by_col(' t_UNEMP')
yhat = self.BS_F.by_col(' yhat')
pdev = np.array(self.BS_F.by_col(' localpdev')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=26029.625, family=Poisson(),
kernel='bisquare', fixed=True)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
self.assertAlmostEquals(np.floor(AICc), 13294.0)
self.assertAlmostEquals(np.floor(AIC), 13247.0)
self.assertAlmostEquals(np.floor(BIC), 13485.0)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-05)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-03)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-03)
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-04)
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-03)
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-03)
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-04)
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04)
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
def test_BS_NN(self):
est_Int = self.BS_NN.by_col(' est_Intercept')
se_Int = self.BS_NN.by_col(' se_Intercept')
t_Int = self.BS_NN.by_col(' t_Intercept')
est_OCC = self.BS_NN.by_col(' est_OCC_TEC')
se_OCC = self.BS_NN.by_col(' se_OCC_TEC')
t_OCC = self.BS_NN.by_col(' t_OCC_TEC')
est_OWN = self.BS_NN.by_col(' est_OWNH')
se_OWN = self.BS_NN.by_col(' se_OWNH')
t_OWN = self.BS_NN.by_col(' t_OWNH')
est_POP = self.BS_NN.by_col(' est_POP65')
se_POP = self.BS_NN.by_col(' se_POP65')
t_POP = self.BS_NN.by_col(' t_POP65')
est_UNEMP = self.BS_NN.by_col(' est_UNEMP')
se_UNEMP = self.BS_NN.by_col(' se_UNEMP')
t_UNEMP = self.BS_NN.by_col(' t_UNEMP')
yhat = self.BS_NN.by_col(' yhat')
pdev = np.array(self.BS_NN.by_col(' localpdev')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(),
kernel='bisquare', fixed=False)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
self.assertAlmostEquals(np.floor(AICc), 13285)
self.assertAlmostEquals(np.floor(AIC), 13259.0)
self.assertAlmostEquals(np.floor(BIC), 13442.0)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-03)
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04)
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
def test_BS_NN_Offset(self):
est_Int = self.BS_NN_OFF.by_col(' est_Intercept')
se_Int = self.BS_NN_OFF.by_col(' se_Intercept')
t_Int = self.BS_NN_OFF.by_col(' t_Intercept')
est_OCC = self.BS_NN_OFF.by_col(' est_OCC_TEC')
se_OCC = self.BS_NN_OFF.by_col(' se_OCC_TEC')
t_OCC = self.BS_NN_OFF.by_col(' t_OCC_TEC')
est_OWN = self.BS_NN_OFF.by_col(' est_OWNH')
se_OWN = self.BS_NN_OFF.by_col(' se_OWNH')
t_OWN = self.BS_NN_OFF.by_col(' t_OWNH')
est_POP = self.BS_NN_OFF.by_col(' est_POP65')
se_POP = self.BS_NN_OFF.by_col(' se_POP65')
t_POP = self.BS_NN_OFF.by_col(' t_POP65')
est_UNEMP = self.BS_NN_OFF.by_col(' est_UNEMP')
se_UNEMP = self.BS_NN_OFF.by_col(' se_UNEMP')
t_UNEMP = self.BS_NN_OFF.by_col(' t_UNEMP')
yhat = self.BS_NN_OFF.by_col(' yhat')
pdev = np.array(self.BS_NN_OFF.by_col(' localpdev')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=100, offset=self.off, family=Poisson(),
kernel='bisquare', fixed=False)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
self.assertAlmostEquals(np.floor(AICc), 367.0)
self.assertAlmostEquals(np.floor(AIC), 361.0)
self.assertAlmostEquals(np.floor(BIC), 451.0)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-02,
atol=1e-02)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02, atol=1e-02)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-01,
atol=1e-02)
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03,
atol=1e-02)
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02, atol=1e-02)
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-01,
atol=1e-02)
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04,
atol=1e-02)
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02, atol=1e-02)
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-01,
atol=1e-02)
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-03,
atol=1e-02)
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02, atol=1e-02)
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-01,
atol=1e-02)
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04,
atol=1e-02)
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02,
atol=1e-02)
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-01,
atol=1e-02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-03, atol=1e-02)
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-04, atol=1e-02)
def test_GS_F(self):
est_Int = self.GS_F.by_col(' est_Intercept')
se_Int = self.GS_F.by_col(' se_Intercept')
t_Int = self.GS_F.by_col(' t_Intercept')
est_OCC = self.GS_F.by_col(' est_OCC_TEC')
se_OCC = self.GS_F.by_col(' se_OCC_TEC')
t_OCC = self.GS_F.by_col(' t_OCC_TEC')
est_OWN = self.GS_F.by_col(' est_OWNH')
se_OWN = self.GS_F.by_col(' se_OWNH')
t_OWN = self.GS_F.by_col(' t_OWNH')
est_POP = self.GS_F.by_col(' est_POP65')
se_POP = self.GS_F.by_col(' se_POP65')
t_POP = self.GS_F.by_col(' t_POP65')
est_UNEMP = self.GS_F.by_col(' est_UNEMP')
se_UNEMP = self.GS_F.by_col(' se_UNEMP')
t_UNEMP = self.GS_F.by_col(' t_UNEMP')
yhat = self.GS_F.by_col(' yhat')
pdev = np.array(self.GS_F.by_col(' localpdev')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=8764.474, family=Poisson(),
kernel='gaussian', fixed=True)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
self.assertAlmostEquals(np.floor(AICc), 11283.0)
self.assertAlmostEquals(np.floor(AIC), 11211.0)
self.assertAlmostEquals(np.floor(BIC), 11497.0)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-03)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-03)
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-02)
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-02)
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
def test_GS_NN(self):
est_Int = self.GS_NN.by_col(' est_Intercept')
se_Int = self.GS_NN.by_col(' se_Intercept')
t_Int = self.GS_NN.by_col(' t_Intercept')
est_OCC = self.GS_NN.by_col(' est_OCC_TEC')
se_OCC = self.GS_NN.by_col(' se_OCC_TEC')
t_OCC = self.GS_NN.by_col(' t_OCC_TEC')
est_OWN = self.GS_NN.by_col(' est_OWNH')
se_OWN = self.GS_NN.by_col(' se_OWNH')
t_OWN = self.GS_NN.by_col(' t_OWNH')
est_POP = self.GS_NN.by_col(' est_POP65')
se_POP = self.GS_NN.by_col(' se_POP65')
t_POP = self.GS_NN.by_col(' t_POP65')
est_UNEMP = self.GS_NN.by_col(' est_UNEMP')
se_UNEMP = self.GS_NN.by_col(' se_UNEMP')
t_UNEMP = self.GS_NN.by_col(' t_UNEMP')
yhat = self.GS_NN.by_col(' yhat')
pdev = np.array(self.GS_NN.by_col(' localpdev')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(),
kernel='gaussian', fixed=False)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
self.assertAlmostEquals(np.floor(AICc), 21070.0)
self.assertAlmostEquals(np.floor(AIC), 21069.0)
self.assertAlmostEquals(np.floor(BIC), 21111.0)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-02)
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-02)
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
class TestGWRBinomial(unittest.TestCase):
def setUp(self):
data = pysal.open(pysal.examples.get_path('landslides.csv'))
self.coords = zip(data.by_col('X'), data.by_col('Y'))
self.y = np.array(data.by_col('Landslid')).reshape((-1,1))
ELEV = np.array(data.by_col('Elev')).reshape((-1,1))
SLOPE = np.array(data.by_col('Slope')).reshape((-1,1))
SIN = np.array(data.by_col('SinAspct')).reshape((-1,1))
COS = np.array(data.by_col('CosAspct')).reshape((-1,1))
SOUTH = np.array(data.by_col('AbsSouth')).reshape((-1,1))
DIST = np.array(data.by_col('DistStrm')).reshape((-1,1))
self.X = np.hstack([ELEV, SLOPE, SIN, COS, SOUTH, DIST])
self.BS_F = pysal.open(pysal.examples.get_path('clearwater_BS_F_listwise.csv'))
self.BS_NN = pysal.open(pysal.examples.get_path('clearwater_BS_NN_listwise.csv'))
self.GS_F = pysal.open(pysal.examples.get_path('clearwater_GS_F_listwise.csv'))
self.GS_NN = pysal.open(pysal.examples.get_path('clearwater_GS_NN_listwise.csv'))
def test_BS_F(self):
est_Int = self.BS_F.by_col(' est_Intercept')
se_Int = self.BS_F.by_col(' se_Intercept')
t_Int = self.BS_F.by_col(' t_Intercept')
est_elev = self.BS_F.by_col(' est_Elev')
se_elev = self.BS_F.by_col(' se_Elev')
t_elev = self.BS_F.by_col(' t_Elev')
est_slope = self.BS_F.by_col(' est_Slope')
se_slope = self.BS_F.by_col(' se_Slope')
t_slope = self.BS_F.by_col(' t_Slope')
est_sin = self.BS_F.by_col(' est_SinAspct')
se_sin = self.BS_F.by_col(' se_SinAspct')
t_sin = self.BS_F.by_col(' t_SinAspct')
est_cos = self.BS_F.by_col(' est_CosAspct')
se_cos = self.BS_F.by_col(' se_CosAspct')
t_cos = self.BS_F.by_col(' t_CosAspct')
est_south = self.BS_F.by_col(' est_AbsSouth')
se_south = self.BS_F.by_col(' se_AbsSouth')
t_south = self.BS_F.by_col(' t_AbsSouth')
est_strm = self.BS_F.by_col(' est_DistStrm')
se_strm = self.BS_F.by_col(' se_DistStrm')
t_strm = self.BS_F.by_col(' t_DistStrm')
yhat = self.BS_F.by_col(' yhat')
pdev = np.array(self.BS_F.by_col(' localpdev')).reshape((-1,1))
model = GWR(self.coords, self.y, self.X, bw=19642.170, family=Binomial(),
kernel='bisquare', fixed=True)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
self.assertAlmostEquals(np.floor(AICc), 275.0)
self.assertAlmostEquals(np.floor(AIC), 271.0)
self.assertAlmostEquals(np.floor(BIC), 349.0)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
#This test fails - likely due to compound rounding errors
#Has been tested using statsmodels.family calculations and
#code from Jing's python version, which both yield the same
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
def test_BS_NN(self):
est_Int = self.BS_NN.by_col(' est_Intercept')
se_Int = self.BS_NN.by_col(' se_Intercept')
t_Int = self.BS_NN.by_col(' t_Intercept')
est_elev = self.BS_NN.by_col(' est_Elev')
se_elev = self.BS_NN.by_col(' se_Elev')
t_elev = self.BS_NN.by_col(' t_Elev')
est_slope = self.BS_NN.by_col(' est_Slope')
se_slope = self.BS_NN.by_col(' se_Slope')
t_slope = self.BS_NN.by_col(' t_Slope')
est_sin = self.BS_NN.by_col(' est_SinAspct')
se_sin = self.BS_NN.by_col(' se_SinAspct')
t_sin = self.BS_NN.by_col(' t_SinAspct')
est_cos = self.BS_NN.by_col(' est_CosAspct')
se_cos = self.BS_NN.by_col(' se_CosAspct')
t_cos = self.BS_NN.by_col(' t_CosAspct')
est_south = self.BS_NN.by_col(' est_AbsSouth')
se_south = self.BS_NN.by_col(' se_AbsSouth')
t_south = self.BS_NN.by_col(' t_AbsSouth')
est_strm = self.BS_NN.by_col(' est_DistStrm')
se_strm = self.BS_NN.by_col(' se_DistStrm')
t_strm = self.BS_NN.by_col(' t_DistStrm')
yhat = self.BS_NN.by_col(' yhat')
pdev = self.BS_NN.by_col(' localpdev')
model = GWR(self.coords, self.y, self.X, bw=158, family=Binomial(),
kernel='bisquare', fixed=False)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
self.assertAlmostEquals(np.floor(AICc), 277.0)
self.assertAlmostEquals(np.floor(AIC), 271.0)
self.assertAlmostEquals(np.floor(BIC), 358.0)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e03)
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e03)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
#This test fails - likely due to compound rounding errors
#Has been tested using statsmodels.family calculations and
#code from Jing's python version, which both yield the same
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
def test_GS_F(self):
est_Int = self.GS_F.by_col(' est_Intercept')
se_Int = self.GS_F.by_col(' se_Intercept')
t_Int = self.GS_F.by_col(' t_Intercept')
est_elev = self.GS_F.by_col(' est_Elev')
se_elev = self.GS_F.by_col(' se_Elev')
t_elev = self.GS_F.by_col(' t_Elev')
est_slope = self.GS_F.by_col(' est_Slope')
se_slope = self.GS_F.by_col(' se_Slope')
t_slope = self.GS_F.by_col(' t_Slope')
est_sin = self.GS_F.by_col(' est_SinAspct')
se_sin = self.GS_F.by_col(' se_SinAspct')
t_sin = self.GS_F.by_col(' t_SinAspct')
est_cos = self.GS_F.by_col(' est_CosAspct')
se_cos = self.GS_F.by_col(' se_CosAspct')
t_cos = self.GS_F.by_col(' t_CosAspct')
est_south = self.GS_F.by_col(' est_AbsSouth')
se_south = self.GS_F.by_col(' se_AbsSouth')
t_south = self.GS_F.by_col(' t_AbsSouth')
est_strm = self.GS_F.by_col(' est_DistStrm')
se_strm = self.GS_F.by_col(' se_DistStrm')
t_strm = self.GS_F.by_col(' t_DistStrm')
yhat = self.GS_F.by_col(' yhat')
pdev = self.GS_F.by_col(' localpdev')
model = GWR(self.coords, self.y, self.X, bw=8929.061, family=Binomial(),
kernel='gaussian', fixed=True)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
self.assertAlmostEquals(np.floor(AICc), 276.0)
self.assertAlmostEquals(np.floor(AIC), 272.0)
self.assertAlmostEquals(np.floor(BIC), 341.0)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
#This test fails - likely due to compound rounding errors
#Has been tested using statsmodels.family calculations and
#code from Jing's python version, which both yield the same
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
def test_GS_NN(self):
est_Int = self.GS_NN.by_col(' est_Intercept')
se_Int = self.GS_NN.by_col(' se_Intercept')
t_Int = self.GS_NN.by_col(' t_Intercept')
est_elev = self.GS_NN.by_col(' est_Elev')
se_elev = self.GS_NN.by_col(' se_Elev')
t_elev = self.GS_NN.by_col(' t_Elev')
est_slope = self.GS_NN.by_col(' est_Slope')
se_slope = self.GS_NN.by_col(' se_Slope')
t_slope = self.GS_NN.by_col(' t_Slope')
est_sin = self.GS_NN.by_col(' est_SinAspct')
se_sin = self.GS_NN.by_col(' se_SinAspct')
t_sin = self.GS_NN.by_col(' t_SinAspct')
est_cos = self.GS_NN.by_col(' est_CosAspct')
se_cos = self.GS_NN.by_col(' se_CosAspct')
t_cos = self.GS_NN.by_col(' t_CosAspct')
est_south = self.GS_NN.by_col(' est_AbsSouth')
se_south = self.GS_NN.by_col(' se_AbsSouth')
t_south = self.GS_NN.by_col(' t_AbsSouth')
est_strm = self.GS_NN.by_col(' est_DistStrm')
se_strm = self.GS_NN.by_col(' se_DistStrm')
t_strm = self.GS_NN.by_col(' t_DistStrm')
yhat = self.GS_NN.by_col(' yhat')
pdev = self.GS_NN.by_col(' localpdev')
model = GWR(self.coords, self.y, self.X, bw=64, family=Binomial(),
kernel='gaussian', fixed=False)
rslt = model.fit()
AICc = get_AICc(rslt)
AIC = get_AIC(rslt)
BIC = get_BIC(rslt)
self.assertAlmostEquals(np.floor(AICc), 276.0)
self.assertAlmostEquals(np.floor(AIC), 273.0)
self.assertAlmostEquals(np.floor(BIC), 331.0)
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-00)
#This test fails - likely due to compound rounding errors
#Has been tested using statsmodels.family calculations and
#code from Jing's python version, which both yield the same
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,84 @@
import unittest
import numpy as np
import pysal
from pysal.contrib.gwr.kernels import *
PEGP = pysal.examples.get_path
class TestKernels(unittest.TestCase):
def setUp(self):
np.random.seed(1234)
x = np.arange(1,6)
y = np.arange(5,0, -1)
np.random.shuffle(x)
np.random.shuffle(y)
self.coords = np.array(zip(x, y))
self.fix_gauss_kern = np.array([
[ 1. , 0.38889556, 0.48567179, 0.48567179, 0.89483932],
[ 0.38889556, 1. , 0.89483932, 0.64118039, 0.48567179],
[ 0.48567179, 0.89483932, 1. , 0.89483932, 0.48567179],
[ 0.48567179, 0.64118039, 0.89483932, 1. , 0.38889556],
[ 0.89483932, 0.48567179, 0.48567179, 0.38889556, 1. ]])
self.adapt_gauss_kern = np.array([
[ 1. , 0.52004183, 0.60653072, 0.60653072, 0.92596109],
[ 0.34559083, 1. , 0.88249692, 0.60653072, 0.44374738],
[ 0.03877423, 0.60653072, 1. , 0.60653072, 0.03877423],
[ 0.44374738, 0.60653072, 0.88249692, 1. , 0.34559083],
[ 0.92596109, 0.60653072, 0.60653072, 0.52004183, 1. ]])
self.fix_bisquare_kern = np.array([
[ 1. , 0. , 0. , 0. , 0.60493827],
[ 0. , 1. , 0.60493827, 0.01234568, 0. ],
[ 0. , 0.60493827, 1. , 0.60493827, 0. ],
[ 0. , 0.01234568, 0.60493827, 1. , 0. ],
[ 0.60493827, 0. , 0. , 0. , 1. ]])
self.adapt_bisquare_kern = np.array([
[ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00,
3.99999881e-14, 7.15976383e-01],
[ 0.00000000e+00, 1.00000000e+00, 5.62500075e-01,
3.99999881e-14, 0.00000000e+00],
[ 0.00000000e+00, 3.99999881e-14, 1.00000000e+00,
3.99999881e-14, 0.00000000e+00],
[ 0.00000000e+00, 3.99999881e-14, 5.62500075e-01,
1.00000000e+00, 0.00000000e+00],
[ 7.15976383e-01, 0.00000000e+00, 3.99999881e-14,
0.00000000e+00, 1.00000000e+00]])
self.fix_exp_kern = np.array([
[ 1. , 0.2529993 , 0.30063739, 0.30063739, 0.62412506],
[ 0.2529993 , 1. , 0.62412506, 0.38953209, 0.30063739],
[ 0.30063739, 0.62412506, 1. , 0.62412506, 0.30063739],
[ 0.30063739, 0.38953209, 0.62412506, 1. , 0.2529993 ],
[ 0.62412506, 0.30063739, 0.30063739, 0.2529993 , 1. ]])
self.adapt_exp_kern = np.array([
[ 1. , 0.31868771, 0.36787948, 0.36787948, 0.67554721],
[ 0.23276223, 1. , 0.60653069, 0.36787948, 0.27949951],
[ 0.07811997, 0.36787948, 1. , 0.36787948, 0.07811997],
[ 0.27949951, 0.36787948, 0.60653069, 1. , 0.23276223],
[ 0.67554721, 0.36787948, 0.36787948, 0.31868771, 1. ]])
def test_fix_gauss(self):
kern = fix_gauss(self.coords, 3)
np.testing.assert_allclose(kern, self.fix_gauss_kern)
def test_adapt_gauss(self):
kern = adapt_gauss(self.coords, 3)
np.testing.assert_allclose(kern, self.adapt_gauss_kern)
def test_fix_biqsquare(self):
kern = fix_bisquare(self.coords, 3)
np.testing.assert_allclose(kern, self.fix_bisquare_kern,
atol=1e-01)
def test_adapt_bisqaure(self):
kern = adapt_bisquare(self.coords, 3)
np.testing.assert_allclose(kern, self.adapt_bisquare_kern, atol=1e-012)
def test_fix_exp(self):
kern = fix_exp(self.coords, 3)
np.testing.assert_allclose(kern, self.fix_exp_kern)
def test_adapt_exp(self):
kern = adapt_exp(self.coords, 3)
np.testing.assert_allclose(kern, self.adapt_exp_kern)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,139 @@
"""
GWR is tested against results from GWR4
"""
import unittest
import pickle as pk
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
from pysal.contrib.gwr.sel_bw import Sel_BW
import numpy as np
import pysal
class TestSelBW(unittest.TestCase):
def setUp(self):
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
self.coords = zip(data.by_col('X'), data.by_col('Y'))
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
black = np.array(data.by_col('PctBlack')).reshape((-1,1))
self.X = np.hstack([rural, pov, black])
self.XB = pk.load(open(pysal.examples.get_path('XB.p'), 'r'))
self.err = pk.load(open(pysal.examples.get_path('err.p'), 'r'))
def test_golden_fixed_AICc(self):
bw1 = 211027.34
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
fixed=True).search(criterion='AICc')
self.assertAlmostEqual(bw1, bw2)
def test_golden_adapt_AICc(self):
bw1 = 93.0
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
fixed=False).search(criterion='AICc')
self.assertAlmostEqual(bw1, bw2)
def test_golden_fixed_AIC(self):
bw1 = 76169.15
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=True).search(criterion='AIC')
self.assertAlmostEqual(bw1, bw2)
def test_golden_adapt_AIC(self):
bw1 = 50.0
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=False).search(criterion='AIC')
self.assertAlmostEqual(bw1, bw2)
def test_golden_fixed_BIC(self):
bw1 = 279451.43
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=True).search(criterion='BIC')
self.assertAlmostEqual(bw1, bw2)
def test_golden_adapt_BIC(self):
bw1 = 62.0
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=False).search(criterion='BIC')
self.assertAlmostEqual(bw1, bw2)
def test_golden_fixed_CV(self):
bw1 = 130406.67
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=True).search(criterion='CV')
self.assertAlmostEqual(bw1, bw2)
def test_golden_adapt_CV(self):
bw1 = 68.0
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=False).search(criterion='CV')
self.assertAlmostEqual(bw1, bw2)
def test_interval_fixed_AICc(self):
bw1 = 211025.0#211027.00
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
fixed=True).search(criterion='AICc', search='interval', bw_min=211001.,
bw_max=211035.0, interval=2)
self.assertAlmostEqual(bw1, bw2)
def test_interval_adapt_AICc(self):
bw1 = 93.0
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
fixed=False).search(criterion='AICc', search='interval',
bw_min=90.0, bw_max=95.0, interval=1)
self.assertAlmostEqual(bw1, bw2)
def test_interval_fixed_AIC(self):
bw1 = 76175.0#76169.00
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=True).search(criterion='AIC', search='interval',
bw_min=76161.0, bw_max=76175.0, interval=1)
self.assertAlmostEqual(bw1, bw2)
def test_interval_adapt_AIC(self):
bw1 = 40.0#50.0
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=False).search(criterion='AIC', search='interval', bw_min=40.0,
bw_max=60.0, interval=2)
self.assertAlmostEqual(bw1, bw2)
def test_interval_fixed_BIC(self):
bw1 = 279461.0#279451.00
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=True).search(criterion='BIC', search='interval', bw_min=279441.0,
bw_max=279461.0, interval=2)
self.assertAlmostEqual(bw1, bw2)
def test_interval_adapt_BIC(self):
bw1 = 62.0
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=False).search(criterion='BIC', search='interval',
bw_min=52.0, bw_max=72.0, interval=2)
self.assertAlmostEqual(bw1, bw2)
def test_interval_fixed_CV(self):
bw1 = 130400.0#130406.00
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=True).search(criterion='CV', search='interval', bw_min=130400.0,
bw_max=130410.0, interval=1)
self.assertAlmostEqual(bw1, bw2)
def test_interval_adapt_CV(self):
bw1 = 62.0#68.0
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
fixed=False).search(criterion='CV', search='interval', bw_min=60.0,
bw_max=76.0 , interval=2)
self.assertAlmostEqual(bw1, bw2)
def test_FBGWR_AIC(self):
bw1 = [157.0, 65.0, 52.0]
sel = Sel_BW(self.coords, self.y, self.X, fb=True, kernel='bisquare',
constant=False)
bw2 = sel.search(tol_fb=1e-03)
np.testing.assert_allclose(bw1, bw2)
np.testing.assert_allclose(sel.XB, self.XB, atol=1e-05)
np.testing.assert_allclose(sel.err, self.err, atol=1e-05)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,42 @@
import numpy as np
from base.gwr import GWR
from base.sel_bw import Sel_BW
def gwr(subquery, dep_var, ind_vars, fixed=False, kernel='bisquare'):
"""
subquery: 'select * from interesting_table'
dep_var: 'pctbachelor'
ind_vars: ['intercept', 'pctpov', 'pctrural', 'pctblack']
fixed: False (kNN) or True ('distance')
kernel: 'bisquare' (default), or 'exponential', 'gaussian'
"""
query_result = subquery
rowid = np.array(query_result[0]['rowid'])
x = np.array(query_result[0]['x'])
y = np.array(query_result[0]['y'])
coords = zip(x,y)
Y = query_result[0]['dep'].reshape((-1,1))
n = Y.shape[0]
k = len(ind_vars)
X = np.zeros((n, k))
for attr in range(0,k):
attr_name = 'attr' + str(attr+1)
X[:, attr] = np.array(query_result[0][attr_name]).flatten()
bw = Sel_BW(coords, Y, X, fixed=fixed, kernel=kernel).search()
model = GWR(coords, Y, X, bw, fixed=fixed, kernel=kernel).fit()
coefficients = model.params.reshape((-1,))
t_vals = model.tvalues.reshape((-1,))
stand_errs = model.bse.reshape((-1))
predicted = np.repeat(model.predy.reshape((-1,)), k+1)
residuals = np.repeat(model.resid_response.reshape((-1,)), k+1)
r_squared = np.tile(model.localR2.reshape((-1,)), k+1)
rowid = np.tile(rowid, k+1).reshape((-1,))
var_name = np.tile(ind_vars, k+1).reshape((-1,))
return zip(coefficients, stand_errs, t_vals, predicted, residuals, r_squared, rowid, var_name)