adds release files for v0.8.1
This commit is contained in:
parent
1531350bfe
commit
5ee795dc11
4
NEWS.md
4
NEWS.md
@ -1,3 +1,7 @@
|
||||
0.8.1 (2018-03-12)
|
||||
------------------
|
||||
* Adds improperly added version files
|
||||
|
||||
0.8.0 (2018-03-12)
|
||||
------------------
|
||||
* Adds `CDB_MoransILocal*` functions that return spatial lag [#202](https://github.com/CartoDB/crankshaft/pull/202)
|
||||
|
2307
release/crankshaft--0.8.0--0.8.1.sql
Normal file
2307
release/crankshaft--0.8.0--0.8.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
2307
release/crankshaft--0.8.1.sql
Normal file
2307
release/crankshaft--0.8.1.sql
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB Spatial Analysis extension'
|
||||
default_version = '0.8.0'
|
||||
default_version = '0.8.1'
|
||||
requires = 'plpythonu, postgis'
|
||||
superuser = true
|
||||
schema = cdb_crankshaft
|
||||
|
Binary file not shown.
7
release/python/0.8.1/crankshaft/crankshaft/__init__.py
Normal file
7
release/python/0.8.1/crankshaft/crankshaft/__init__.py
Normal file
@ -0,0 +1,7 @@
|
||||
"""Import all modules"""
|
||||
import crankshaft.random_seeds
|
||||
import crankshaft.clustering
|
||||
import crankshaft.space_time_dynamics
|
||||
import crankshaft.segmentation
|
||||
import crankshaft.regression
|
||||
import analysis_data_provider
|
@ -0,0 +1,98 @@
|
||||
"""class for fetching data"""
|
||||
import plpy
|
||||
import pysal_utils as pu
|
||||
|
||||
NULL_VALUE_ERROR = ('No usable data passed to analysis. Check your input rows '
|
||||
'for null values and fill in appropriately.')
|
||||
|
||||
|
||||
def verify_data(func):
|
||||
"""decorator to verify data result before returning to algorithm"""
|
||||
def wrapper(*args, **kwargs):
|
||||
"""Error checking"""
|
||||
try:
|
||||
data = func(*args, **kwargs)
|
||||
if not data:
|
||||
plpy.error(NULL_VALUE_ERROR)
|
||||
else:
|
||||
return data
|
||||
except Exception as err:
|
||||
plpy.error('Analysis failed: {}'.format(err))
|
||||
|
||||
return []
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class AnalysisDataProvider(object):
|
||||
@verify_data
|
||||
def get_getis(self, w_type, params):
|
||||
"""fetch data for getis ord's g"""
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
return plpy.execute(query)
|
||||
|
||||
@verify_data
|
||||
def get_markov(self, w_type, params):
|
||||
"""fetch data for spatial markov"""
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
return plpy.execute(query)
|
||||
|
||||
@verify_data
|
||||
def get_moran(self, w_type, params):
|
||||
"""fetch data for moran's i analyses"""
|
||||
query = pu.construct_neighbor_query(w_type, params)
|
||||
return plpy.execute(query)
|
||||
|
||||
@verify_data
|
||||
def get_nonspatial_kmeans(self, params):
|
||||
"""
|
||||
Fetch data for non-spatial k-means.
|
||||
|
||||
Inputs - a dict (params) with the following keys:
|
||||
colnames: a (text) list of column names (e.g.,
|
||||
`['andy', 'cookie']`)
|
||||
id_col: the name of the id column (e.g., `'cartodb_id'`)
|
||||
subquery: the subquery for exposing the data (e.g.,
|
||||
SELECT * FROM favorite_things)
|
||||
Output:
|
||||
A SQL query for packaging the data for consumption within
|
||||
`KMeans().nonspatial`. Format will be a list of length one,
|
||||
with the first element a dict with keys ('rowid', 'attr1',
|
||||
'attr2', ...)
|
||||
"""
|
||||
agg_cols = ', '.join([
|
||||
'array_agg({0}) As arr_col{1}'.format(val, idx+1)
|
||||
for idx, val in enumerate(params['colnames'])
|
||||
])
|
||||
query = '''
|
||||
SELECT {cols}, array_agg({id_col}) As rowid
|
||||
FROM ({subquery}) As a
|
||||
'''.format(subquery=params['subquery'],
|
||||
id_col=params['id_col'],
|
||||
cols=agg_cols).strip()
|
||||
return plpy.execute(query)
|
||||
|
||||
@verify_data
|
||||
def get_spatial_kmeans(self, params):
|
||||
"""fetch data for spatial kmeans"""
|
||||
query = '''
|
||||
SELECT
|
||||
array_agg("{id_col}" ORDER BY "{id_col}") as ids,
|
||||
array_agg(ST_X("{geom_col}") ORDER BY "{id_col}") As xs,
|
||||
array_agg(ST_Y("{geom_col}") ORDER BY "{id_col}") As ys
|
||||
FROM ({subquery}) As a
|
||||
WHERE "{geom_col}" IS NOT NULL
|
||||
'''.format(**params)
|
||||
return plpy.execute(query)
|
||||
|
||||
@verify_data
|
||||
def get_gwr(self, params):
|
||||
"""fetch data for gwr analysis"""
|
||||
query = pu.gwr_query(params)
|
||||
return plpy.execute(query)
|
||||
|
||||
@verify_data
|
||||
def get_gwr_predict(self, params):
|
||||
"""fetch data for gwr predict"""
|
||||
query = pu.gwr_predict_query(params)
|
||||
return plpy.execute(query)
|
@ -0,0 +1,76 @@
|
||||
"""
|
||||
Based on the Weiszfeld algorithm:
|
||||
https://en.wikipedia.org/wiki/Geometric_median
|
||||
"""
|
||||
|
||||
|
||||
# import plpy
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
|
||||
def median_center(tablename, geom_col, num_iters=50, tolerance=0.001):
|
||||
|
||||
query = '''
|
||||
SELECT array_agg(ST_X({geom_col})) As x_coords,
|
||||
array_agg(ST_Y({geom_col})) As y_coords
|
||||
FROM {tablename}
|
||||
'''.format(geom_col=geom_col, tablename=tablename)
|
||||
|
||||
try:
|
||||
resp = plpy.execute(query)
|
||||
data = np.vstack((resp['x_coords'][0],
|
||||
resp['y_coords'][0])).T
|
||||
|
||||
plpy.notice('coords: %s' % str(coords))
|
||||
except Exception, err:
|
||||
# plpy.error('Analysis failed: %s' % err)
|
||||
print('No plpy')
|
||||
data = np.array([[1.2 * np.random.random() + 10.,
|
||||
1.1 * (np.random.random() - 1.) + 3.]
|
||||
for i in range(1, 100)])
|
||||
|
||||
# initialize 'median center' to be the mean
|
||||
coords_center_temp = data.mean(axis=0)
|
||||
|
||||
# plpy.notice('temp_center: %s' % str(coords_center_temp))
|
||||
print('temp_center: %s' % str(coords_center_temp))
|
||||
|
||||
for i in range(0, num_iters):
|
||||
old_coords_center = coords_center_temp.copy()
|
||||
denom = denominator(coords_center_temp, data)
|
||||
coords_center_temp = np.sum([data[j] * numerator(coords_center_temp,
|
||||
data[j])
|
||||
for j in range(len(data))], axis=0)
|
||||
coords_center_temp = coords_center_temp / denom
|
||||
|
||||
print("Pass #%d" % i)
|
||||
print("max, min of data: %0.4f, %0.4f" % (data.max(), data.min()))
|
||||
print('temp_center: %s' % str(coords_center_temp))
|
||||
print("Change in center: %0.4f" % np.linalg.norm(old_coords_center -
|
||||
coords_center_temp))
|
||||
print("Center coords: %s" % str(coords_center_temp))
|
||||
print("Objective Function: %0.4f" % obj_func(coords_center_temp, data))
|
||||
|
||||
return coords_center_temp
|
||||
|
||||
|
||||
def obj_func(center_coords, data):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.linalg.norm(center_coords - data)
|
||||
|
||||
|
||||
def numerator(center_coords, data_i):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.reciprocal(np.linalg.norm(center_coords - data_i))
|
||||
|
||||
|
||||
def denominator(center_coords, data):
|
||||
"""
|
||||
|
||||
"""
|
||||
return np.reciprocal(np.linalg.norm(data - center_coords))
|
@ -0,0 +1,4 @@
|
||||
"""Import all functions from for clustering"""
|
||||
from moran import *
|
||||
from kmeans import *
|
||||
from getis import *
|
@ -0,0 +1,50 @@
|
||||
"""
|
||||
Getis-Ord's G geostatistics (hotspot/coldspot analysis)
|
||||
"""
|
||||
|
||||
import pysal as ps
|
||||
from collections import OrderedDict
|
||||
|
||||
# crankshaft modules
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
|
||||
class Getis(object):
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def getis_ord(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Getis-Ord's G*
|
||||
Implementation building neighbors with a PostGIS database and PySAL's
|
||||
Getis-Ord's G* hotspot/coldspot module.
|
||||
Andy Eschbacher
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors if kNN is chosen
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_getis(w_type, params)
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
# build PySAL weight object
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate Getis-Ord's G* z- and p-values
|
||||
getis = ps.esda.getisord.G_Local(attr_vals, weight,
|
||||
star=True, permutations=permutations)
|
||||
|
||||
return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)
|
113
release/python/0.8.1/crankshaft/crankshaft/clustering/kmeans.py
Normal file
113
release/python/0.8.1/crankshaft/crankshaft/clustering/kmeans.py
Normal file
@ -0,0 +1,113 @@
|
||||
from sklearn.cluster import KMeans
|
||||
import numpy as np
|
||||
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
|
||||
class Kmeans(object):
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def spatial(self, query, no_clusters, no_init=20):
|
||||
"""
|
||||
find centers based on clusters of latitude/longitude pairs
|
||||
query: SQL query that has a WGS84 geometry (the_geom)
|
||||
"""
|
||||
params = {"subquery": query,
|
||||
"geom_col": "the_geom",
|
||||
"id_col": "cartodb_id"}
|
||||
|
||||
result = self.data_provider.get_spatial_kmeans(params)
|
||||
|
||||
# Unpack query response
|
||||
xs = result[0]['xs']
|
||||
ys = result[0]['ys']
|
||||
ids = result[0]['ids']
|
||||
|
||||
km = KMeans(n_clusters=no_clusters, n_init=no_init)
|
||||
labels = km.fit_predict(zip(xs, ys))
|
||||
return zip(ids, labels)
|
||||
|
||||
def nonspatial(self, subquery, colnames, no_clusters=5,
|
||||
standardize=True, id_col='cartodb_id'):
|
||||
"""
|
||||
Arguments:
|
||||
query (string): A SQL query to retrieve the data required to do the
|
||||
k-means clustering analysis, like so:
|
||||
SELECT * FROM iris_flower_data
|
||||
colnames (list): a list of the column names which contain the data
|
||||
of interest, like so: ['sepal_width',
|
||||
'petal_width',
|
||||
'sepal_length',
|
||||
'petal_length']
|
||||
no_clusters (int): number of clusters (greater than zero)
|
||||
id_col (string): name of the input id_column
|
||||
|
||||
Returns:
|
||||
A list of tuples with the following columns:
|
||||
cluster labels: a label for the cluster that the row belongs to
|
||||
centers: center of the cluster that this row belongs to
|
||||
silhouettes: silhouette measure for this value
|
||||
rowid: row that these values belong to (corresponds to the value in
|
||||
`id_col`)
|
||||
"""
|
||||
import json
|
||||
from sklearn import metrics
|
||||
|
||||
params = {
|
||||
"colnames": colnames,
|
||||
"subquery": subquery,
|
||||
"id_col": id_col
|
||||
}
|
||||
|
||||
data = self.data_provider.get_nonspatial_kmeans(params)
|
||||
|
||||
# fill array with values for k-means clustering
|
||||
if standardize:
|
||||
cluster_columns = _scale_data(
|
||||
_extract_columns(data))
|
||||
else:
|
||||
cluster_columns = _extract_columns(data)
|
||||
|
||||
kmeans = KMeans(n_clusters=no_clusters,
|
||||
random_state=0).fit(cluster_columns)
|
||||
|
||||
centers = [json.dumps(dict(zip(colnames, c)))
|
||||
for c in kmeans.cluster_centers_[kmeans.labels_]]
|
||||
|
||||
silhouettes = metrics.silhouette_samples(cluster_columns,
|
||||
kmeans.labels_,
|
||||
metric='sqeuclidean')
|
||||
|
||||
return zip(kmeans.labels_,
|
||||
centers,
|
||||
silhouettes,
|
||||
[kmeans.inertia_] * kmeans.labels_.shape[0],
|
||||
data[0]['rowid'])
|
||||
|
||||
|
||||
# -- Preprocessing steps
|
||||
|
||||
def _extract_columns(data):
|
||||
"""
|
||||
Extract the features from the query and pack them into a NumPy array
|
||||
data (list of dicts): result of the kmeans request
|
||||
"""
|
||||
# number of columns minus rowid column
|
||||
n_cols = len(data[0]) - 1
|
||||
return np.array([data[0]['arr_col{0}'.format(i+1)]
|
||||
for i in xrange(n_cols)],
|
||||
dtype=float).T
|
||||
|
||||
|
||||
def _scale_data(features):
|
||||
"""
|
||||
Scale all input columns to center on 0 with a standard devation of 1
|
||||
features (numpy matrix): features of dimension (n_features, n_samples)
|
||||
"""
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
scaler = StandardScaler()
|
||||
return scaler.fit_transform(features)
|
341
release/python/0.8.1/crankshaft/crankshaft/clustering/moran.py
Normal file
341
release/python/0.8.1/crankshaft/crankshaft/clustering/moran.py
Normal file
@ -0,0 +1,341 @@
|
||||
"""
|
||||
Moran's I geostatistics (global clustering & outliers presence)
|
||||
Functionality relies on a combination of `PySAL
|
||||
<http://pysal.readthedocs.io/en/latest/>`__ and the data providered provided in
|
||||
the class instantiation (which defaults to PostgreSQL's plpy module's `database
|
||||
access functions <https://www.postgresql.org/docs/10/static/plpython.html>`__).
|
||||
"""
|
||||
|
||||
from collections import OrderedDict
|
||||
import pysal as ps
|
||||
|
||||
# crankshaft module
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# High level interface ---------------------------------------
|
||||
|
||||
|
||||
class Moran(object):
|
||||
"""Class for calculation of Moran's I statistics (global, local, and local
|
||||
rate)
|
||||
|
||||
Parameters:
|
||||
data_provider (:obj:`AnalysisDataProvider`): Class for fetching data. See
|
||||
the `crankshaft.analysis_data_provider` module for more information.
|
||||
"""
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def global_stat(self, subquery, attr_name,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (global)
|
||||
Implementation building neighbors with a PostGIS database and Moran's I
|
||||
core clusters with PySAL.
|
||||
|
||||
Args:
|
||||
|
||||
subquery (str): Query to give access to the data needed. This query
|
||||
must give access to ``attr_name``, ``geom_col``, and ``id_col``.
|
||||
attr_name (str): Column name of data to analyze
|
||||
w_type (str): Type of spatial weight. Must be one of `knn`
|
||||
or `queen`. See `PySAL documentation
|
||||
<http://pysal.readthedocs.io/en/latest/users/tutorials/weights.html>`__
|
||||
for more information.
|
||||
num_ngbrs (int): If using `knn` for ``w_type``, this
|
||||
specifies the number of neighbors to be used to define the spatial
|
||||
neighborhoods.
|
||||
permutations (int): Number of permutations for performing
|
||||
conditional randomization to find the p-value. Higher numbers
|
||||
takes a longer time for getting results.
|
||||
geom_col (str): Name of the geometry column in the dataset for
|
||||
finding the spatial neighborhoods.
|
||||
id_col (str): Row index for each value. Usually the database index.
|
||||
|
||||
"""
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr_name),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
attr_vals = pu.get_attributes(result)
|
||||
|
||||
# calculate weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate moran global
|
||||
moran_global = ps.esda.moran.Moran(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([moran_global.I], [moran_global.EI])
|
||||
|
||||
def local_stat(self, subquery, attr,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I (local)
|
||||
|
||||
Args:
|
||||
|
||||
subquery (str): Query to give access to the data needed. This query
|
||||
must give access to ``attr_name``, ``geom_col``, and ``id_col``.
|
||||
attr (str): Column name of data to analyze
|
||||
w_type (str): Type of spatial weight. Must be one of `knn`
|
||||
or `queen`. See `PySAL documentation
|
||||
<http://pysal.readthedocs.io/en/latest/users/tutorials/weights.html>`__
|
||||
for more information.
|
||||
num_ngbrs (int): If using `knn` for ``w_type``, this
|
||||
specifies the number of neighbors to be used to define the spatial
|
||||
neighborhoods.
|
||||
permutations (int): Number of permutations for performing
|
||||
conditional randomization to find the p-value. Higher numbers
|
||||
takes a longer time for getting results.
|
||||
geom_col (str): Name of the geometry column in the dataset for
|
||||
finding the spatial neighborhoods.
|
||||
id_col (str): Row index for each value. Usually the database index.
|
||||
|
||||
Returns:
|
||||
list of tuples: Where each tuple consists of the following values:
|
||||
- quadrants classification (one of `HH`, `HL`, `LL`, or `LH`)
|
||||
- p-value
|
||||
- spatial lag
|
||||
- standardized spatial lag (centered on the mean, normalized by the
|
||||
standard deviation)
|
||||
- original value
|
||||
- standardized value
|
||||
- Moran's I statistic
|
||||
- original row index
|
||||
"""
|
||||
|
||||
# geometries with attributes that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
attr_vals = pu.get_attributes(result)
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
# calculate spatial lag
|
||||
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
||||
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
||||
|
||||
return zip(
|
||||
quads,
|
||||
lisa.p_sim,
|
||||
lag,
|
||||
lag_std,
|
||||
lisa.y,
|
||||
lisa.z,
|
||||
lisa.Is,
|
||||
weight.id_order
|
||||
)
|
||||
|
||||
def global_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Rate (global)
|
||||
|
||||
Args:
|
||||
|
||||
subquery (str): Query to give access to the data needed. This query
|
||||
must give access to ``attr_name``, ``geom_col``, and ``id_col``.
|
||||
numerator (str): Column name of numerator to analyze
|
||||
denominator (str): Column name of the denominator
|
||||
w_type (str): Type of spatial weight. Must be one of `knn`
|
||||
or `queen`. See `PySAL documentation
|
||||
<http://pysal.readthedocs.io/en/latest/users/tutorials/weights.html>`__
|
||||
for more information.
|
||||
num_ngbrs (int): If using `knn` for ``w_type``, this
|
||||
specifies the number of neighbors to be used to define the spatial
|
||||
neighborhoods.
|
||||
permutations (int): Number of permutations for performing
|
||||
conditional randomization to find the p-value. Higher numbers
|
||||
takes a longer time for getting results.
|
||||
geom_col (str): Name of the geometry column in the dataset for
|
||||
finding the spatial neighborhoods.
|
||||
id_col (str): Row index for each value. Usually the database index.
|
||||
"""
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", numerator),
|
||||
("attr2", denominator),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate moran global rate
|
||||
lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
return zip([lisa_rate.I], [lisa_rate.EI])
|
||||
|
||||
def local_rate_stat(self, subquery, numerator, denominator,
|
||||
w_type, num_ngbrs, permutations, geom_col, id_col):
|
||||
"""
|
||||
Moran's I Local Rate
|
||||
|
||||
Args:
|
||||
|
||||
subquery (str): Query to give access to the data needed. This query
|
||||
must give access to ``attr_name``, ``geom_col``, and ``id_col``.
|
||||
numerator (str): Column name of numerator to analyze
|
||||
denominator (str): Column name of the denominator
|
||||
w_type (str): Type of spatial weight. Must be one of `knn`
|
||||
or `queen`. See `PySAL documentation
|
||||
<http://pysal.readthedocs.io/en/latest/users/tutorials/weights.html>`__
|
||||
for more information.
|
||||
num_ngbrs (int): If using `knn` for ``w_type``, this
|
||||
specifies the number of neighbors to be used to define the spatial
|
||||
neighborhoods.
|
||||
permutations (int): Number of permutations for performing
|
||||
conditional randomization to find the p-value. Higher numbers
|
||||
takes a longer time for getting results.
|
||||
geom_col (str): Name of the geometry column in the dataset for
|
||||
finding the spatial neighborhoods.
|
||||
id_col (str): Row index for each value. Usually the database index.
|
||||
|
||||
Returns:
|
||||
list of tuples: Where each tuple consists of the following values:
|
||||
- quadrants classification (one of `HH`, `HL`, `LL`, or `LH`)
|
||||
- p-value
|
||||
- spatial lag
|
||||
- standardized spatial lag (centered on the mean, normalized by the
|
||||
standard deviation)
|
||||
- original value (roughly numerator divided by denominator)
|
||||
- standardized value
|
||||
- Moran's I statistic
|
||||
- original row index
|
||||
"""
|
||||
# geometries with values that are null are ignored
|
||||
# resulting in a collection of not as near neighbors
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("numerator", numerator),
|
||||
("denominator", denominator),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
numer = pu.get_attributes(result, 1)
|
||||
denom = pu.get_attributes(result, 2)
|
||||
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find quadrants for each geometry
|
||||
quads = quad_position(lisa.q)
|
||||
|
||||
# spatial lag
|
||||
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
|
||||
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
|
||||
|
||||
return zip(
|
||||
quads,
|
||||
lisa.p_sim,
|
||||
lag,
|
||||
lag_std,
|
||||
lisa.y,
|
||||
lisa.z,
|
||||
lisa.Is,
|
||||
weight.id_order
|
||||
)
|
||||
|
||||
def local_bivariate_stat(self, subquery, attr1, attr2,
|
||||
permutations, geom_col, id_col,
|
||||
w_type, num_ngbrs):
|
||||
"""
|
||||
Moran's I (local) Bivariate (untested)
|
||||
"""
|
||||
|
||||
params = OrderedDict([("id_col", id_col),
|
||||
("attr1", attr1),
|
||||
("attr2", attr2),
|
||||
("geom_col", geom_col),
|
||||
("subquery", subquery),
|
||||
("num_ngbrs", num_ngbrs)])
|
||||
|
||||
result = self.data_provider.get_moran(w_type, params)
|
||||
|
||||
# collect attributes
|
||||
attr1_vals = pu.get_attributes(result, 1)
|
||||
attr2_vals = pu.get_attributes(result, 2)
|
||||
|
||||
# create weights
|
||||
weight = pu.get_weight(result, w_type, num_ngbrs)
|
||||
|
||||
# calculate LISA values
|
||||
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
|
||||
permutations=permutations)
|
||||
|
||||
# find clustering of significance
|
||||
lisa_sig = quad_position(lisa.q)
|
||||
|
||||
return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
|
||||
|
||||
# Low level functions ----------------------------------------
|
||||
|
||||
|
||||
def map_quads(coord):
|
||||
"""
|
||||
Map a quadrant number to Moran's I designation
|
||||
HH=1, LH=2, LL=3, HL=4
|
||||
Args:
|
||||
coord (int): quadrant of a specific measurement
|
||||
Returns:
|
||||
classification (one of 'HH', 'LH', 'LL', or 'HL')
|
||||
"""
|
||||
if coord == 1:
|
||||
return 'HH'
|
||||
elif coord == 2:
|
||||
return 'LH'
|
||||
elif coord == 3:
|
||||
return 'LL'
|
||||
elif coord == 4:
|
||||
return 'HL'
|
||||
return None
|
||||
|
||||
|
||||
def quad_position(quads):
|
||||
"""
|
||||
Map all quads
|
||||
|
||||
Args:
|
||||
quads (:obj:`numpy.ndarray`): an array of quads classified by
|
||||
1-4 (PySAL default)
|
||||
Returns:
|
||||
list: an array of quads classied by 'HH', 'LL', etc.
|
||||
"""
|
||||
return [map_quads(q) for q in quads]
|
@ -0,0 +1,2 @@
|
||||
"""Import all functions for pysal_utils"""
|
||||
from crankshaft.pysal_utils.pysal_utils import *
|
@ -0,0 +1,251 @@
|
||||
"""
|
||||
Utilities module for generic PySAL functionality, mainly centered on
|
||||
translating queries into numpy arrays or PySAL weights objects
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
|
||||
|
||||
def construct_neighbor_query(w_type, query_vals):
|
||||
"""Return query (a string) used for finding neighbors
|
||||
@param w_type text: type of neighbors to calculate ('knn' or 'queen')
|
||||
@param query_vals dict: values used to construct the query
|
||||
"""
|
||||
|
||||
if w_type.lower() == 'knn':
|
||||
return knn(query_vals)
|
||||
else:
|
||||
return queen(query_vals)
|
||||
|
||||
|
||||
# Build weight object
|
||||
def get_weight(query_res, w_type='knn', num_ngbrs=5):
|
||||
"""
|
||||
Construct PySAL weight from return value of query
|
||||
@param query_res dict-like: query results with attributes and neighbors
|
||||
"""
|
||||
|
||||
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||
print 'len of neighbors: %d' % len(neighbors)
|
||||
|
||||
built_weight = ps.W(neighbors)
|
||||
built_weight.transform = 'r'
|
||||
|
||||
return built_weight
|
||||
|
||||
|
||||
def query_attr_select(params, table_ref=True):
|
||||
"""
|
||||
Create portion of SELECT statement for attributes inolved in query.
|
||||
Defaults to order in the params
|
||||
@param params: dict of information used in query (column names,
|
||||
table name, etc.)
|
||||
Example:
|
||||
OrderedDict([('numerator', 'price'),
|
||||
('denominator', 'sq_meters'),
|
||||
('subquery', 'SELECT * FROM interesting_data')])
|
||||
Output:
|
||||
"i.\"price\"::numeric As attr1, " \
|
||||
"i.\"sq_meters\"::numeric As attr2, "
|
||||
"""
|
||||
|
||||
attr_string = ""
|
||||
template = "\"%(col)s\"::numeric As attr%(alias_num)s, "
|
||||
|
||||
if table_ref:
|
||||
template = "i." + template
|
||||
|
||||
if ('time_cols' in params) or ('ind_vars' in params):
|
||||
# if markov or gwr analysis
|
||||
attrs = (params['time_cols'] if 'time_cols' in params
|
||||
else params['ind_vars'])
|
||||
if 'ind_vars' in params:
|
||||
template = "array_agg(\"%(col)s\"::numeric) As attr%(alias_num)s, "
|
||||
|
||||
for idx, val in enumerate(attrs):
|
||||
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||
else:
|
||||
# if moran's analysis
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery',
|
||||
'num_ngbrs', 'subquery')]
|
||||
|
||||
for idx, val in enumerate(attrs):
|
||||
attr_string += template % {"col": params[val],
|
||||
"alias_num": idx + 1}
|
||||
|
||||
return attr_string
|
||||
|
||||
|
||||
def query_attr_where(params, table_ref=True):
|
||||
"""
|
||||
Construct where conditions when building neighbors query
|
||||
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||
Input: dict of params:
|
||||
{'subquery': ...,
|
||||
'numerator': 'data1',
|
||||
'denominator': 'data2',
|
||||
'': ...}
|
||||
Output:
|
||||
'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL'
|
||||
Input:
|
||||
{'subquery': ...,
|
||||
'time_cols': ['time1', 'time2', 'time3'],
|
||||
'etc': ...}
|
||||
Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT
|
||||
NULL AND idx_replace."time3" IS NOT NULL'
|
||||
"""
|
||||
attr_string = []
|
||||
template = "\"%s\" IS NOT NULL"
|
||||
if table_ref:
|
||||
template = "idx_replace." + template
|
||||
|
||||
if ('time_cols' in params) or ('ind_vars' in params):
|
||||
# markov or gwr where clauses
|
||||
attrs = (params['time_cols'] if 'time_cols' in params
|
||||
else params['ind_vars'])
|
||||
# add values to template
|
||||
for attr in attrs:
|
||||
attr_string.append(template % attr)
|
||||
else:
|
||||
# moran where clauses
|
||||
|
||||
# get keys
|
||||
attrs = [k for k in params
|
||||
if k not in ('id_col', 'geom_col', 'subquery',
|
||||
'num_ngbrs', 'subquery')]
|
||||
|
||||
# add values to template
|
||||
for attr in attrs:
|
||||
attr_string.append(template % params[attr])
|
||||
|
||||
if 'denominator' in attrs:
|
||||
attr_string.append(
|
||||
"idx_replace.\"%s\" <> 0" % params['denominator'])
|
||||
|
||||
out = " AND ".join(attr_string)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def knn(params):
|
||||
"""SQL query for k-nearest neighbors.
|
||||
@param vars: dict of values to fill template
|
||||
"""
|
||||
|
||||
attr_select = query_attr_select(params, table_ref=True)
|
||||
attr_where = query_attr_where(params, table_ref=True)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = '''
|
||||
SELECT
|
||||
i."{id_col}" As id,
|
||||
%(attr_select)s
|
||||
(SELECT ARRAY(SELECT j."{id_col}"
|
||||
FROM ({subquery}) As j
|
||||
WHERE i."{id_col}" <> j."{id_col}" AND
|
||||
%(attr_where_j)s AND
|
||||
j."{geom_col}" IS NOT NULL
|
||||
ORDER BY j."{geom_col}" <-> i."{geom_col}" ASC
|
||||
LIMIT {num_ngbrs})) As neighbors
|
||||
FROM ({subquery}) As i
|
||||
WHERE %(attr_where_i)s AND i."{geom_col}" IS NOT NULL
|
||||
ORDER BY i."{id_col}" ASC;
|
||||
''' % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
|
||||
# SQL query for finding queens neighbors (all contiguous polygons)
|
||||
def queen(params):
|
||||
"""SQL query for queen neighbors.
|
||||
@param params dict: information to fill query
|
||||
"""
|
||||
attr_select = query_attr_select(params)
|
||||
attr_where = query_attr_where(params)
|
||||
|
||||
replacements = {"attr_select": attr_select,
|
||||
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||
|
||||
query = '''
|
||||
SELECT
|
||||
i."{id_col}" As id,
|
||||
%(attr_select)s
|
||||
(SELECT ARRAY(SELECT j."{id_col}"
|
||||
FROM ({subquery}) As j
|
||||
WHERE i."{id_col}" <> j."{id_col}" AND
|
||||
ST_Touches(i."{geom_col}", j."{geom_col}") AND
|
||||
%(attr_where_j)s)) As neighbors
|
||||
FROM ({subquery}) As i
|
||||
WHERE
|
||||
%(attr_where_i)s
|
||||
ORDER BY i."{id_col}" ASC;
|
||||
''' % replacements
|
||||
|
||||
return query.format(**params)
|
||||
|
||||
|
||||
def gwr_query(params):
|
||||
"""
|
||||
GWR query
|
||||
"""
|
||||
|
||||
replacements = {"ind_vars_select": query_attr_select(params,
|
||||
table_ref=None),
|
||||
"ind_vars_where": query_attr_where(params,
|
||||
table_ref=None)}
|
||||
|
||||
query = '''
|
||||
SELECT
|
||||
array_agg(ST_X(ST_Centroid("{geom_col}"))) As x,
|
||||
array_agg(ST_Y(ST_Centroid("{geom_col}"))) As y,
|
||||
array_agg("{dep_var}") As dep_var,
|
||||
%(ind_vars_select)s
|
||||
array_agg("{id_col}") As rowid
|
||||
FROM ({subquery}) As q
|
||||
WHERE
|
||||
"{dep_var}" IS NOT NULL AND
|
||||
%(ind_vars_where)s
|
||||
''' % replacements
|
||||
|
||||
return query.format(**params).strip()
|
||||
|
||||
|
||||
def gwr_predict_query(params):
|
||||
"""
|
||||
GWR query
|
||||
"""
|
||||
|
||||
replacements = {"ind_vars_select": query_attr_select(params,
|
||||
table_ref=None),
|
||||
"ind_vars_where": query_attr_where(params,
|
||||
table_ref=None)}
|
||||
|
||||
query = '''
|
||||
SELECT
|
||||
array_agg(ST_X(ST_Centroid({geom_col}))) As x,
|
||||
array_agg(ST_Y(ST_Centroid({geom_col}))) As y,
|
||||
array_agg({dep_var}) As dep_var,
|
||||
%(ind_vars_select)s
|
||||
array_agg({id_col}) As rowid
|
||||
FROM ({subquery}) As q
|
||||
WHERE
|
||||
%(ind_vars_where)s
|
||||
''' % replacements
|
||||
|
||||
return query.format(**params).strip()
|
||||
# to add more weight methods open a ticket or pull request
|
||||
|
||||
|
||||
def get_attributes(query_res, attr_num=1):
|
||||
"""
|
||||
@param query_res: query results with attributes and neighbors
|
||||
@param attr_num: attribute number (1, 2, ...)
|
||||
"""
|
||||
return np.array([x['attr' + str(attr_num)] for x in query_res],
|
||||
dtype=np.float)
|
12
release/python/0.8.1/crankshaft/crankshaft/random_seeds.py
Normal file
12
release/python/0.8.1/crankshaft/crankshaft/random_seeds.py
Normal file
@ -0,0 +1,12 @@
|
||||
"""Random seed generator used for non-deterministic functions in crankshaft"""
|
||||
import random
|
||||
import numpy
|
||||
|
||||
|
||||
def set_random_seeds(value):
|
||||
"""
|
||||
Set the seeds of the RNGs (Random Number Generators)
|
||||
used internally.
|
||||
"""
|
||||
random.seed(value)
|
||||
numpy.random.seed(value)
|
@ -0,0 +1,3 @@
|
||||
from crankshaft.regression.gwr import *
|
||||
from crankshaft.regression.glm import *
|
||||
from crankshaft.regression.gwr_cs import *
|
@ -0,0 +1,444 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Import GLM and pysal\n",
|
||||
"import os\n",
|
||||
"import numpy as np\n",
|
||||
"os.chdir('/Users/toshan/dev/pysal/pysal/contrib/glm')\n",
|
||||
"from glm import GLM\n",
|
||||
"import pysal\n",
|
||||
"import pandas as pd\n",
|
||||
"import statsmodels.formula.api as smf\n",
|
||||
"import statsmodels.api as sm\n",
|
||||
"from family import Gaussian, Binomial, Poisson, QuasiPoisson\n",
|
||||
"\n",
|
||||
"from statsmodels.api import families"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Prepare some test data - columbus example\n",
|
||||
"db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')\n",
|
||||
"y = np.array(db.by_col(\"HOVAL\"))\n",
|
||||
"y = np.reshape(y, (49,1))\n",
|
||||
"X = []\n",
|
||||
"#X.append(np.ones(len(y)))\n",
|
||||
"X.append(db.by_col(\"INC\"))\n",
|
||||
"X.append(db.by_col(\"CRIME\"))\n",
|
||||
"X = np.array(X).T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[ 46.42818268]\n",
|
||||
" [ 0.62898397]\n",
|
||||
" [ -0.48488854]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#First fit pysal OLS model\n",
|
||||
"from pysal.spreg import ols\n",
|
||||
"OLS = ols.OLS(y, X)\n",
|
||||
"print OLS.betas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"[ 46.42818268 0.62898397 -0.48488854]\n",
|
||||
"[ 46.42818268 0.62898397 -0.48488854]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#Then fit Gaussian GLM\n",
|
||||
"\n",
|
||||
"#create Gaussian GLM model object\n",
|
||||
"model = GLM(y, X, Gaussian())\n",
|
||||
"model\n",
|
||||
"\n",
|
||||
"#Fit model to estimate coefficients and return GLMResults object\n",
|
||||
"results = model.fit()\n",
|
||||
"\n",
|
||||
"#Check coefficients - R betas [46.4282, 0.6290, -0.4849]\n",
|
||||
"print results.params\n",
|
||||
"\n",
|
||||
"# Gaussian GLM results from statsmodels\n",
|
||||
"sm_model = smf.GLM(y, sm.add_constant(X), family=families.Gaussian())\n",
|
||||
"sm_results = sm_model.fit()\n",
|
||||
"print sm_results.params"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2 2\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"<class 'family.Gaussian'>\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print results.df_model, sm_results.df_model\n",
|
||||
"print np.allclose(results.aic, sm_results.aic)\n",
|
||||
"print np.allclose(results.bic, sm_results.bic)\n",
|
||||
"print np.allclose(results.deviance, sm_results.deviance)\n",
|
||||
"print np.allclose(results.df_model, sm_results.df_model)\n",
|
||||
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
|
||||
"print np.allclose(results.llf, sm_results.llf)\n",
|
||||
"print np.allclose(results.mu, sm_results.mu)\n",
|
||||
"print np.allclose(results.n, sm_results.nobs)\n",
|
||||
"print np.allclose(results.null, sm_results.null)\n",
|
||||
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
|
||||
"print np.allclose(results.params, sm_results.params)\n",
|
||||
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
|
||||
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
|
||||
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
|
||||
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
|
||||
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
|
||||
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
|
||||
"print np.allclose(results.scale, sm_results.scale)\n",
|
||||
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
|
||||
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
|
||||
"print np.allclose(results.bse, sm_results.bse)\n",
|
||||
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
|
||||
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
|
||||
"print np.allclose(results.tvalues, sm_results.tvalues)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'family.Poisson'>\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"[ 3.92159085 0.01183491 -0.01371397]\n",
|
||||
"[ 3.92159085 0.01183491 -0.01371397]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#Now fit a Poisson GLM \n",
|
||||
"\n",
|
||||
"poisson_y = np.round(y).astype(int)\n",
|
||||
"\n",
|
||||
"#create Poisson GLM model object\n",
|
||||
"model = GLM(poisson_y, X, Poisson())\n",
|
||||
"model\n",
|
||||
"\n",
|
||||
"#Fit model to estimate coefficients and return GLMResults object\n",
|
||||
"results = model.fit()\n",
|
||||
"\n",
|
||||
"#Check coefficients - R betas [3.91926, 0.01198, -0.01371]\n",
|
||||
"print results.params.T\n",
|
||||
"\n",
|
||||
"# Poisson GLM results from statsmodels\n",
|
||||
"sm_results = smf.GLM(poisson_y, sm.add_constant(X), family=families.Poisson()).fit()\n",
|
||||
"print sm_results.params"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'family.Poisson'>\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"<class 'family.Poisson'>\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"[ 0.13049161 0.00511599 0.00193769] [ 0.13049161 0.00511599 0.00193769]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print np.allclose(results.aic, sm_results.aic)\n",
|
||||
"print np.allclose(results.bic, sm_results.bic)\n",
|
||||
"print np.allclose(results.deviance, sm_results.deviance)\n",
|
||||
"print np.allclose(results.df_model, sm_results.df_model)\n",
|
||||
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
|
||||
"print np.allclose(results.llf, sm_results.llf)\n",
|
||||
"print np.allclose(results.mu, sm_results.mu)\n",
|
||||
"print np.allclose(results.n, sm_results.nobs)\n",
|
||||
"print np.allclose(results.null, sm_results.null)\n",
|
||||
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
|
||||
"print np.allclose(results.params, sm_results.params)\n",
|
||||
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
|
||||
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
|
||||
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
|
||||
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
|
||||
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
|
||||
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
|
||||
"print np.allclose(results.scale, sm_results.scale)\n",
|
||||
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
|
||||
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
|
||||
"print np.allclose(results.bse, sm_results.bse)\n",
|
||||
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
|
||||
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
|
||||
"print np.allclose(results.tvalues, sm_results.tvalues)\n",
|
||||
"print results.bse, sm_results.bse"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[-5.33638276 0.0287754 ]\n",
|
||||
"[-5.33638276 0.0287754 ]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#Now fit a binomial GLM\n",
|
||||
"londonhp = pd.read_csv('/Users/toshan/projects/londonhp.csv')\n",
|
||||
"#londonhp = pd.read_csv('/Users/qszhao/Dropbox/pysal/pysal/contrib/gwr/londonhp.csv')\n",
|
||||
"y = londonhp['BATH2'].values\n",
|
||||
"y = np.reshape(y, (316,1))\n",
|
||||
"X = londonhp['FLOORSZ'].values\n",
|
||||
"X = np.reshape(X, (316,1))\n",
|
||||
"\n",
|
||||
"#create logistic GLM model object\n",
|
||||
"model = GLM(y, X, Binomial())\n",
|
||||
"model\n",
|
||||
"\n",
|
||||
"#Fit model to estimate coefficients and return GLMResults object\n",
|
||||
"results = model.fit()\n",
|
||||
"\n",
|
||||
"#Check coefficients - R betas [-5.33638, 0.02878]\n",
|
||||
"print results.params.T\n",
|
||||
"\n",
|
||||
"# Logistic GLM results from statsmodels\n",
|
||||
"sm_results = smf.GLM(y, sm.add_constant(X), family=families.Binomial()).fit()\n",
|
||||
"print sm_results.params"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1 1\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n",
|
||||
"True\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print results.df_model, sm_results.df_model\n",
|
||||
"print np.allclose(results.aic, sm_results.aic)\n",
|
||||
"print np.allclose(results.bic, sm_results.bic)\n",
|
||||
"print np.allclose(results.deviance, sm_results.deviance)\n",
|
||||
"print np.allclose(results.df_model, sm_results.df_model)\n",
|
||||
"print np.allclose(results.df_resid, sm_results.df_resid)\n",
|
||||
"print np.allclose(results.llf, sm_results.llf)\n",
|
||||
"print np.allclose(results.mu, sm_results.mu)\n",
|
||||
"print np.allclose(results.n, sm_results.nobs)\n",
|
||||
"print np.allclose(results.null, sm_results.null)\n",
|
||||
"print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
|
||||
"print np.allclose(results.params, sm_results.params)\n",
|
||||
"print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
|
||||
"print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
|
||||
"print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
|
||||
"print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
|
||||
"print np.allclose(results.resid_response, sm_results.resid_response)\n",
|
||||
"print np.allclose(results.resid_working, sm_results.resid_working)\n",
|
||||
"print np.allclose(results.scale, sm_results.scale)\n",
|
||||
"print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
|
||||
"print np.allclose(results.cov_params(), sm_results.cov_params())\n",
|
||||
"print np.allclose(results.bse, sm_results.bse)\n",
|
||||
"print np.allclose(results.conf_int(), sm_results.conf_int())\n",
|
||||
"print np.allclose(results.pvalues, sm_results.pvalues)\n",
|
||||
"print np.allclose(results.tvalues, sm_results.tvalues)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'family.QuasiPoisson'>\n",
|
||||
"<class 'family.QuasiPoisson'>\n",
|
||||
"<class 'family.QuasiPoisson'>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#create QUasiPoisson GLM model object\n",
|
||||
"model = GLM(poisson_y, X, QuasiPoisson())\n",
|
||||
"model\n",
|
||||
"\n",
|
||||
"#Fit model to estimate coefficients and return GLMResults object\n",
|
||||
"results = model.fit()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 2",
|
||||
"language": "python",
|
||||
"name": "python2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
import glm
|
||||
import family
|
||||
import utils
|
||||
import iwls
|
@ -0,0 +1,959 @@
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from utils import cache_readonly
|
||||
|
||||
class Results(object):
|
||||
"""
|
||||
Class to contain model results
|
||||
Parameters
|
||||
----------
|
||||
model : class instance
|
||||
the previously specified model instance
|
||||
params : array
|
||||
parameter estimates from the fit model
|
||||
"""
|
||||
def __init__(self, model, params, **kwd):
|
||||
self.__dict__.update(kwd)
|
||||
self.initialize(model, params, **kwd)
|
||||
self._data_attr = []
|
||||
|
||||
def initialize(self, model, params, **kwd):
|
||||
self.params = params
|
||||
self.model = model
|
||||
if hasattr(model, 'k_constant'):
|
||||
self.k_constant = model.k_constant
|
||||
|
||||
def predict(self, exog=None, transform=True, *args, **kwargs):
|
||||
"""
|
||||
Call self.model.predict with self.params as the first argument.
|
||||
Parameters
|
||||
----------
|
||||
exog : array-like, optional
|
||||
The values for which you want to predict.
|
||||
transform : bool, optional
|
||||
If the model was fit via a formula, do you want to pass
|
||||
exog through the formula. Default is True. E.g., if you fit
|
||||
a model y ~ log(x1) + log(x2), and transform is True, then
|
||||
you can pass a data structure that contains x1 and x2 in
|
||||
their original form. Otherwise, you'd need to log the data
|
||||
first.
|
||||
args, kwargs :
|
||||
Some models can take additional arguments or keywords, see the
|
||||
predict method of the model for the details.
|
||||
Returns
|
||||
-------
|
||||
prediction : ndarray or pandas.Series
|
||||
See self.model.predict
|
||||
"""
|
||||
if transform and hasattr(self.model, 'formula') and exog is not None:
|
||||
from patsy import dmatrix
|
||||
exog = dmatrix(self.model.data.design_info.builder,
|
||||
exog)
|
||||
|
||||
if exog is not None:
|
||||
exog = np.asarray(exog)
|
||||
if exog.ndim == 1 and (self.model.exog.ndim == 1 or
|
||||
self.model.exog.shape[1] == 1):
|
||||
exog = exog[:, None]
|
||||
exog = np.atleast_2d(exog) # needed in count model shape[1]
|
||||
|
||||
return self.model.predict(self.params, exog, *args, **kwargs)
|
||||
|
||||
|
||||
#TODO: public method?
|
||||
class LikelihoodModelResults(Results):
|
||||
"""
|
||||
Class to contain results from likelihood models
|
||||
Parameters
|
||||
-----------
|
||||
model : LikelihoodModel instance or subclass instance
|
||||
LikelihoodModelResults holds a reference to the model that is fit.
|
||||
params : 1d array_like
|
||||
parameter estimates from estimated model
|
||||
normalized_cov_params : 2d array
|
||||
Normalized (before scaling) covariance of params. (dot(X.T,X))**-1
|
||||
scale : float
|
||||
For (some subset of models) scale will typically be the
|
||||
mean square error from the estimated model (sigma^2)
|
||||
Returns
|
||||
-------
|
||||
**Attributes**
|
||||
mle_retvals : dict
|
||||
Contains the values returned from the chosen optimization method if
|
||||
full_output is True during the fit. Available only if the model
|
||||
is fit by maximum likelihood. See notes below for the output from
|
||||
the different methods.
|
||||
mle_settings : dict
|
||||
Contains the arguments passed to the chosen optimization method.
|
||||
Available if the model is fit by maximum likelihood. See
|
||||
LikelihoodModel.fit for more information.
|
||||
model : model instance
|
||||
LikelihoodResults contains a reference to the model that is fit.
|
||||
params : ndarray
|
||||
The parameters estimated for the model.
|
||||
scale : float
|
||||
The scaling factor of the model given during instantiation.
|
||||
tvalues : array
|
||||
The t-values of the standard errors.
|
||||
Notes
|
||||
-----
|
||||
The covariance of params is given by scale times normalized_cov_params.
|
||||
Return values by solver if full_output is True during fit:
|
||||
'newton'
|
||||
fopt : float
|
||||
The value of the (negative) loglikelihood at its
|
||||
minimum.
|
||||
iterations : int
|
||||
Number of iterations performed.
|
||||
score : ndarray
|
||||
The score vector at the optimum.
|
||||
Hessian : ndarray
|
||||
The Hessian at the optimum.
|
||||
warnflag : int
|
||||
1 if maxiter is exceeded. 0 if successful convergence.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
List of solutions at each iteration.
|
||||
'nm'
|
||||
fopt : float
|
||||
The value of the (negative) loglikelihood at its
|
||||
minimum.
|
||||
iterations : int
|
||||
Number of iterations performed.
|
||||
warnflag : int
|
||||
1: Maximum number of function evaluations made.
|
||||
2: Maximum number of iterations reached.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
List of solutions at each iteration.
|
||||
'bfgs'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
gopt : float
|
||||
Value of gradient at minimum, which should be near 0.
|
||||
Hinv : ndarray
|
||||
value of the inverse Hessian matrix at minimum. Note
|
||||
that this is just an approximation and will often be
|
||||
different from the value of the analytic Hessian.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
gcalls : int
|
||||
Number of calls to gradient/score.
|
||||
warnflag : int
|
||||
1: Maximum number of iterations exceeded. 2: Gradient
|
||||
and/or function calls are not changing.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
Results at each iteration.
|
||||
'lbfgs'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
gopt : float
|
||||
Value of gradient at minimum, which should be near 0.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
warnflag : int
|
||||
Warning flag:
|
||||
- 0 if converged
|
||||
- 1 if too many function evaluations or too many iterations
|
||||
- 2 if stopped for another reason
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
'powell'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
direc : ndarray
|
||||
Current direction set.
|
||||
iterations : int
|
||||
Number of iterations performed.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
warnflag : int
|
||||
1: Maximum number of function evaluations. 2: Maximum number
|
||||
of iterations.
|
||||
converged : bool
|
||||
True : converged. False: did not converge.
|
||||
allvecs : list
|
||||
Results at each iteration.
|
||||
'cg'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
gcalls : int
|
||||
Number of calls to gradient/score.
|
||||
warnflag : int
|
||||
1: Maximum number of iterations exceeded. 2: Gradient and/
|
||||
or function calls not changing.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
Results at each iteration.
|
||||
'ncg'
|
||||
fopt : float
|
||||
Value of the (negative) loglikelihood at its minimum.
|
||||
fcalls : int
|
||||
Number of calls to loglike.
|
||||
gcalls : int
|
||||
Number of calls to gradient/score.
|
||||
hcalls : int
|
||||
Number of calls to hessian.
|
||||
warnflag : int
|
||||
1: Maximum number of iterations exceeded.
|
||||
converged : bool
|
||||
True: converged. False: did not converge.
|
||||
allvecs : list
|
||||
Results at each iteration.
|
||||
"""
|
||||
|
||||
# by default we use normal distribution
|
||||
# can be overwritten by instances or subclasses
|
||||
use_t = False
|
||||
|
||||
def __init__(self, model, params, normalized_cov_params=None, scale=1.,
|
||||
**kwargs):
|
||||
super(LikelihoodModelResults, self).__init__(model, params)
|
||||
self.normalized_cov_params = normalized_cov_params
|
||||
self.scale = scale
|
||||
|
||||
# robust covariance
|
||||
# We put cov_type in kwargs so subclasses can decide in fit whether to
|
||||
# use this generic implementation
|
||||
if 'use_t' in kwargs:
|
||||
use_t = kwargs['use_t']
|
||||
if use_t is not None:
|
||||
self.use_t = use_t
|
||||
if 'cov_type' in kwargs:
|
||||
cov_type = kwargs.get('cov_type', 'nonrobust')
|
||||
cov_kwds = kwargs.get('cov_kwds', {})
|
||||
|
||||
if cov_type == 'nonrobust':
|
||||
self.cov_type = 'nonrobust'
|
||||
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
|
||||
'covariance matrix of the errors is correctly ' +
|
||||
'specified.'}
|
||||
else:
|
||||
from statsmodels.base.covtype import get_robustcov_results
|
||||
if cov_kwds is None:
|
||||
cov_kwds = {}
|
||||
use_t = self.use_t
|
||||
# TODO: we shouldn't need use_t in get_robustcov_results
|
||||
get_robustcov_results(self, cov_type=cov_type, use_self=True,
|
||||
use_t=use_t, **cov_kwds)
|
||||
|
||||
|
||||
def normalized_cov_params(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def _get_robustcov_results(self, cov_type='nonrobust', use_self=True,
|
||||
use_t=None, **cov_kwds):
|
||||
from statsmodels.base.covtype import get_robustcov_results
|
||||
if cov_kwds is None:
|
||||
cov_kwds = {}
|
||||
|
||||
if cov_type == 'nonrobust':
|
||||
self.cov_type = 'nonrobust'
|
||||
self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
|
||||
'covariance matrix of the errors is correctly ' +
|
||||
'specified.'}
|
||||
else:
|
||||
# TODO: we shouldn't need use_t in get_robustcov_results
|
||||
get_robustcov_results(self, cov_type=cov_type, use_self=True,
|
||||
use_t=use_t, **cov_kwds)
|
||||
|
||||
@cache_readonly
|
||||
def llf(self):
|
||||
return self.model.loglike(self.params)
|
||||
|
||||
@cache_readonly
|
||||
def bse(self):
|
||||
return np.sqrt(np.diag(self.cov_params()))
|
||||
|
||||
@cache_readonly
|
||||
def tvalues(self):
|
||||
"""
|
||||
Return the t-statistic for a given parameter estimate.
|
||||
"""
|
||||
return self.params / self.bse
|
||||
|
||||
@cache_readonly
|
||||
def pvalues(self):
|
||||
if self.use_t:
|
||||
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
|
||||
return stats.t.sf(np.abs(self.tvalues), df_resid)*2
|
||||
else:
|
||||
return stats.norm.sf(np.abs(self.tvalues))*2
|
||||
|
||||
|
||||
def cov_params(self, r_matrix=None, column=None, scale=None, cov_p=None,
|
||||
other=None):
|
||||
"""
|
||||
Returns the variance/covariance matrix.
|
||||
The variance/covariance matrix can be of a linear contrast
|
||||
of the estimates of params or all params multiplied by scale which
|
||||
will usually be an estimate of sigma^2. Scale is assumed to be
|
||||
a scalar.
|
||||
Parameters
|
||||
----------
|
||||
r_matrix : array-like
|
||||
Can be 1d, or 2d. Can be used alone or with other.
|
||||
column : array-like, optional
|
||||
Must be used on its own. Can be 0d or 1d see below.
|
||||
scale : float, optional
|
||||
Can be specified or not. Default is None, which means that
|
||||
the scale argument is taken from the model.
|
||||
other : array-like, optional
|
||||
Can be used when r_matrix is specified.
|
||||
Returns
|
||||
-------
|
||||
cov : ndarray
|
||||
covariance matrix of the parameter estimates or of linear
|
||||
combination of parameter estimates. See Notes.
|
||||
Notes
|
||||
-----
|
||||
(The below are assumed to be in matrix notation.)
|
||||
If no argument is specified returns the covariance matrix of a model
|
||||
``(scale)*(X.T X)^(-1)``
|
||||
If contrast is specified it pre and post-multiplies as follows
|
||||
``(scale) * r_matrix (X.T X)^(-1) r_matrix.T``
|
||||
If contrast and other are specified returns
|
||||
``(scale) * r_matrix (X.T X)^(-1) other.T``
|
||||
If column is specified returns
|
||||
``(scale) * (X.T X)^(-1)[column,column]`` if column is 0d
|
||||
OR
|
||||
``(scale) * (X.T X)^(-1)[column][:,column]`` if column is 1d
|
||||
"""
|
||||
if (hasattr(self, 'mle_settings') and
|
||||
self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
|
||||
dot_fun = nan_dot
|
||||
else:
|
||||
dot_fun = np.dot
|
||||
|
||||
if (cov_p is None and self.normalized_cov_params is None and
|
||||
not hasattr(self, 'cov_params_default')):
|
||||
raise ValueError('need covariance of parameters for computing '
|
||||
'(unnormalized) covariances')
|
||||
if column is not None and (r_matrix is not None or other is not None):
|
||||
raise ValueError('Column should be specified without other '
|
||||
'arguments.')
|
||||
if other is not None and r_matrix is None:
|
||||
raise ValueError('other can only be specified with r_matrix')
|
||||
|
||||
if cov_p is None:
|
||||
if hasattr(self, 'cov_params_default'):
|
||||
cov_p = self.cov_params_default
|
||||
else:
|
||||
if scale is None:
|
||||
scale = self.scale
|
||||
cov_p = self.normalized_cov_params * scale
|
||||
|
||||
if column is not None:
|
||||
column = np.asarray(column)
|
||||
if column.shape == ():
|
||||
return cov_p[column, column]
|
||||
else:
|
||||
#return cov_p[column][:, column]
|
||||
return cov_p[column[:, None], column]
|
||||
elif r_matrix is not None:
|
||||
r_matrix = np.asarray(r_matrix)
|
||||
if r_matrix.shape == ():
|
||||
raise ValueError("r_matrix should be 1d or 2d")
|
||||
if other is None:
|
||||
other = r_matrix
|
||||
else:
|
||||
other = np.asarray(other)
|
||||
tmp = dot_fun(r_matrix, dot_fun(cov_p, np.transpose(other)))
|
||||
return tmp
|
||||
else: # if r_matrix is None and column is None:
|
||||
return cov_p
|
||||
|
||||
#TODO: make sure this works as needed for GLMs
|
||||
def t_test(self, r_matrix, cov_p=None, scale=None,
|
||||
use_t=None):
|
||||
"""
|
||||
Compute a t-test for a each linear hypothesis of the form Rb = q
|
||||
Parameters
|
||||
----------
|
||||
r_matrix : array-like, str, tuple
|
||||
- array : If an array is given, a p x k 2d array or length k 1d
|
||||
array specifying the linear restrictions. It is assumed
|
||||
that the linear combination is equal to zero.
|
||||
- str : The full hypotheses to test can be given as a string.
|
||||
See the examples.
|
||||
- tuple : A tuple of arrays in the form (R, q). If q is given,
|
||||
can be either a scalar or a length p row vector.
|
||||
cov_p : array-like, optional
|
||||
An alternative estimate for the parameter covariance matrix.
|
||||
If None is given, self.normalized_cov_params is used.
|
||||
scale : float, optional
|
||||
An optional `scale` to use. Default is the scale specified
|
||||
by the model fit.
|
||||
use_t : bool, optional
|
||||
If use_t is None, then the default of the model is used.
|
||||
If use_t is True, then the p-values are based on the t
|
||||
distribution.
|
||||
If use_t is False, then the p-values are based on the normal
|
||||
distribution.
|
||||
Returns
|
||||
-------
|
||||
res : ContrastResults instance
|
||||
The results for the test are attributes of this results instance.
|
||||
The available results have the same elements as the parameter table
|
||||
in `summary()`.
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import statsmodels.api as sm
|
||||
>>> data = sm.datasets.longley.load()
|
||||
>>> data.exog = sm.add_constant(data.exog)
|
||||
>>> results = sm.OLS(data.endog, data.exog).fit()
|
||||
>>> r = np.zeros_like(results.params)
|
||||
>>> r[5:] = [1,-1]
|
||||
>>> print(r)
|
||||
[ 0. 0. 0. 0. 0. 1. -1.]
|
||||
r tests that the coefficients on the 5th and 6th independent
|
||||
variable are the same.
|
||||
>>> T_test = results.t_test(r)
|
||||
>>> print(T_test)
|
||||
<T contrast: effect=-1829.2025687192481, sd=455.39079425193762,
|
||||
t=-4.0167754636411717, p=0.0015163772380899498, df_denom=9>
|
||||
>>> T_test.effect
|
||||
-1829.2025687192481
|
||||
>>> T_test.sd
|
||||
455.39079425193762
|
||||
>>> T_test.tvalue
|
||||
-4.0167754636411717
|
||||
>>> T_test.pvalue
|
||||
0.0015163772380899498
|
||||
Alternatively, you can specify the hypothesis tests using a string
|
||||
>>> from statsmodels.formula.api import ols
|
||||
>>> dta = sm.datasets.longley.load_pandas().data
|
||||
>>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
|
||||
>>> results = ols(formula, dta).fit()
|
||||
>>> hypotheses = 'GNPDEFL = GNP, UNEMP = 2, YEAR/1829 = 1'
|
||||
>>> t_test = results.t_test(hypotheses)
|
||||
>>> print(t_test)
|
||||
See Also
|
||||
---------
|
||||
tvalues : individual t statistics
|
||||
f_test : for F tests
|
||||
patsy.DesignInfo.linear_constraint
|
||||
"""
|
||||
from patsy import DesignInfo
|
||||
names = self.model.data.param_names
|
||||
LC = DesignInfo(names).linear_constraint(r_matrix)
|
||||
r_matrix, q_matrix = LC.coefs, LC.constants
|
||||
num_ttests = r_matrix.shape[0]
|
||||
num_params = r_matrix.shape[1]
|
||||
|
||||
if (cov_p is None and self.normalized_cov_params is None and
|
||||
not hasattr(self, 'cov_params_default')):
|
||||
raise ValueError('Need covariance of parameters for computing '
|
||||
'T statistics')
|
||||
if num_params != self.params.shape[0]:
|
||||
raise ValueError('r_matrix and params are not aligned')
|
||||
if q_matrix is None:
|
||||
q_matrix = np.zeros(num_ttests)
|
||||
else:
|
||||
q_matrix = np.asarray(q_matrix)
|
||||
q_matrix = q_matrix.squeeze()
|
||||
if q_matrix.size > 1:
|
||||
if q_matrix.shape[0] != num_ttests:
|
||||
raise ValueError("r_matrix and q_matrix must have the same "
|
||||
"number of rows")
|
||||
|
||||
if use_t is None:
|
||||
#switch to use_t false if undefined
|
||||
use_t = (hasattr(self, 'use_t') and self.use_t)
|
||||
|
||||
_t = _sd = None
|
||||
|
||||
_effect = np.dot(r_matrix, self.params)
|
||||
# nan_dot multiplies with the convention nan * 0 = 0
|
||||
|
||||
# Perform the test
|
||||
if num_ttests > 1:
|
||||
_sd = np.sqrt(np.diag(self.cov_params(
|
||||
r_matrix=r_matrix, cov_p=cov_p)))
|
||||
else:
|
||||
_sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))
|
||||
_t = (_effect - q_matrix) * recipr(_sd)
|
||||
|
||||
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
|
||||
|
||||
if use_t:
|
||||
return ContrastResults(effect=_effect, t=_t, sd=_sd,
|
||||
df_denom=df_resid)
|
||||
else:
|
||||
return ContrastResults(effect=_effect, statistic=_t, sd=_sd,
|
||||
df_denom=df_resid,
|
||||
distribution='norm')
|
||||
|
||||
def f_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None):
|
||||
"""
|
||||
Compute the F-test for a joint linear hypothesis.
|
||||
This is a special case of `wald_test` that always uses the F
|
||||
distribution.
|
||||
Parameters
|
||||
----------
|
||||
r_matrix : array-like, str, or tuple
|
||||
- array : An r x k array where r is the number of restrictions to
|
||||
test and k is the number of regressors. It is assumed
|
||||
that the linear combination is equal to zero.
|
||||
- str : The full hypotheses to test can be given as a string.
|
||||
See the examples.
|
||||
- tuple : A tuple of arrays in the form (R, q), ``q`` can be
|
||||
either a scalar or a length k row vector.
|
||||
cov_p : array-like, optional
|
||||
An alternative estimate for the parameter covariance matrix.
|
||||
If None is given, self.normalized_cov_params is used.
|
||||
scale : float, optional
|
||||
Default is 1.0 for no scaling.
|
||||
invcov : array-like, optional
|
||||
A q x q array to specify an inverse covariance matrix based on a
|
||||
restrictions matrix.
|
||||
Returns
|
||||
-------
|
||||
res : ContrastResults instance
|
||||
The results for the test are attributes of this results instance.
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import statsmodels.api as sm
|
||||
>>> data = sm.datasets.longley.load()
|
||||
>>> data.exog = sm.add_constant(data.exog)
|
||||
>>> results = sm.OLS(data.endog, data.exog).fit()
|
||||
>>> A = np.identity(len(results.params))
|
||||
>>> A = A[1:,:]
|
||||
This tests that each coefficient is jointly statistically
|
||||
significantly different from zero.
|
||||
>>> print(results.f_test(A))
|
||||
<F contrast: F=330.28533923463488, p=4.98403052872e-10,
|
||||
df_denom=9, df_num=6>
|
||||
Compare this to
|
||||
>>> results.fvalue
|
||||
330.2853392346658
|
||||
>>> results.f_pvalue
|
||||
4.98403096572e-10
|
||||
>>> B = np.array(([0,0,1,-1,0,0,0],[0,0,0,0,0,1,-1]))
|
||||
This tests that the coefficient on the 2nd and 3rd regressors are
|
||||
equal and jointly that the coefficient on the 5th and 6th regressors
|
||||
are equal.
|
||||
>>> print(results.f_test(B))
|
||||
<F contrast: F=9.740461873303655, p=0.00560528853174, df_denom=9,
|
||||
df_num=2>
|
||||
Alternatively, you can specify the hypothesis tests using a string
|
||||
>>> from statsmodels.datasets import longley
|
||||
>>> from statsmodels.formula.api import ols
|
||||
>>> dta = longley.load_pandas().data
|
||||
>>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
|
||||
>>> results = ols(formula, dta).fit()
|
||||
>>> hypotheses = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
|
||||
>>> f_test = results.f_test(hypotheses)
|
||||
>>> print(f_test)
|
||||
See Also
|
||||
--------
|
||||
statsmodels.stats.contrast.ContrastResults
|
||||
wald_test
|
||||
t_test
|
||||
patsy.DesignInfo.linear_constraint
|
||||
Notes
|
||||
-----
|
||||
The matrix `r_matrix` is assumed to be non-singular. More precisely,
|
||||
r_matrix (pX pX.T) r_matrix.T
|
||||
is assumed invertible. Here, pX is the generalized inverse of the
|
||||
design matrix of the model. There can be problems in non-OLS models
|
||||
where the rank of the covariance of the noise is not full.
|
||||
"""
|
||||
res = self.wald_test(r_matrix, cov_p=cov_p, scale=scale,
|
||||
invcov=invcov, use_f=True)
|
||||
return res
|
||||
|
||||
#TODO: untested for GLMs?
|
||||
def wald_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None,
|
||||
use_f=None):
|
||||
"""
|
||||
Compute a Wald-test for a joint linear hypothesis.
|
||||
Parameters
|
||||
----------
|
||||
r_matrix : array-like, str, or tuple
|
||||
- array : An r x k array where r is the number of restrictions to
|
||||
test and k is the number of regressors. It is assumed that the
|
||||
linear combination is equal to zero.
|
||||
- str : The full hypotheses to test can be given as a string.
|
||||
See the examples.
|
||||
- tuple : A tuple of arrays in the form (R, q), ``q`` can be
|
||||
either a scalar or a length p row vector.
|
||||
cov_p : array-like, optional
|
||||
An alternative estimate for the parameter covariance matrix.
|
||||
If None is given, self.normalized_cov_params is used.
|
||||
scale : float, optional
|
||||
Default is 1.0 for no scaling.
|
||||
invcov : array-like, optional
|
||||
A q x q array to specify an inverse covariance matrix based on a
|
||||
restrictions matrix.
|
||||
use_f : bool
|
||||
If True, then the F-distribution is used. If False, then the
|
||||
asymptotic distribution, chisquare is used. If use_f is None, then
|
||||
the F distribution is used if the model specifies that use_t is True.
|
||||
The test statistic is proportionally adjusted for the distribution
|
||||
by the number of constraints in the hypothesis.
|
||||
Returns
|
||||
-------
|
||||
res : ContrastResults instance
|
||||
The results for the test are attributes of this results instance.
|
||||
See also
|
||||
--------
|
||||
statsmodels.stats.contrast.ContrastResults
|
||||
f_test
|
||||
t_test
|
||||
patsy.DesignInfo.linear_constraint
|
||||
Notes
|
||||
-----
|
||||
The matrix `r_matrix` is assumed to be non-singular. More precisely,
|
||||
r_matrix (pX pX.T) r_matrix.T
|
||||
is assumed invertible. Here, pX is the generalized inverse of the
|
||||
design matrix of the model. There can be problems in non-OLS models
|
||||
where the rank of the covariance of the noise is not full.
|
||||
"""
|
||||
if use_f is None:
|
||||
#switch to use_t false if undefined
|
||||
use_f = (hasattr(self, 'use_t') and self.use_t)
|
||||
|
||||
from patsy import DesignInfo
|
||||
names = self.model.data.param_names
|
||||
LC = DesignInfo(names).linear_constraint(r_matrix)
|
||||
r_matrix, q_matrix = LC.coefs, LC.constants
|
||||
|
||||
if (self.normalized_cov_params is None and cov_p is None and
|
||||
invcov is None and not hasattr(self, 'cov_params_default')):
|
||||
raise ValueError('need covariance of parameters for computing '
|
||||
'F statistics')
|
||||
|
||||
cparams = np.dot(r_matrix, self.params[:, None])
|
||||
J = float(r_matrix.shape[0]) # number of restrictions
|
||||
if q_matrix is None:
|
||||
q_matrix = np.zeros(J)
|
||||
else:
|
||||
q_matrix = np.asarray(q_matrix)
|
||||
if q_matrix.ndim == 1:
|
||||
q_matrix = q_matrix[:, None]
|
||||
if q_matrix.shape[0] != J:
|
||||
raise ValueError("r_matrix and q_matrix must have the same "
|
||||
"number of rows")
|
||||
Rbq = cparams - q_matrix
|
||||
if invcov is None:
|
||||
cov_p = self.cov_params(r_matrix=r_matrix, cov_p=cov_p)
|
||||
if np.isnan(cov_p).max():
|
||||
raise ValueError("r_matrix performs f_test for using "
|
||||
"dimensions that are asymptotically "
|
||||
"non-normal")
|
||||
invcov = np.linalg.inv(cov_p)
|
||||
|
||||
if (hasattr(self, 'mle_settings') and
|
||||
self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
|
||||
F = nan_dot(nan_dot(Rbq.T, invcov), Rbq)
|
||||
else:
|
||||
F = np.dot(np.dot(Rbq.T, invcov), Rbq)
|
||||
|
||||
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
|
||||
if use_f:
|
||||
F /= J
|
||||
return ContrastResults(F=F, df_denom=df_resid,
|
||||
df_num=invcov.shape[0])
|
||||
else:
|
||||
return ContrastResults(chi2=F, df_denom=J, statistic=F,
|
||||
distribution='chi2', distargs=(J,))
|
||||
|
||||
|
||||
def wald_test_terms(self, skip_single=False, extra_constraints=None,
|
||||
combine_terms=None):
|
||||
"""
|
||||
Compute a sequence of Wald tests for terms over multiple columns
|
||||
This computes joined Wald tests for the hypothesis that all
|
||||
coefficients corresponding to a `term` are zero.
|
||||
`Terms` are defined by the underlying formula or by string matching.
|
||||
Parameters
|
||||
----------
|
||||
skip_single : boolean
|
||||
If true, then terms that consist only of a single column and,
|
||||
therefore, refers only to a single parameter is skipped.
|
||||
If false, then all terms are included.
|
||||
extra_constraints : ndarray
|
||||
not tested yet
|
||||
combine_terms : None or list of strings
|
||||
Each string in this list is matched to the name of the terms or
|
||||
the name of the exogenous variables. All columns whose name
|
||||
includes that string are combined in one joint test.
|
||||
Returns
|
||||
-------
|
||||
test_result : result instance
|
||||
The result instance contains `table` which is a pandas DataFrame
|
||||
with the test results: test statistic, degrees of freedom and
|
||||
pvalues.
|
||||
Examples
|
||||
--------
|
||||
>>> res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
|
||||
data).fit()
|
||||
>>> res_ols.wald_test_terms()
|
||||
<class 'statsmodels.stats.contrast.WaldTestResults'>
|
||||
F P>F df constraint df denom
|
||||
Intercept 279.754525 2.37985521351e-22 1 51
|
||||
C(Duration, Sum) 5.367071 0.0245738436636 1 51
|
||||
C(Weight, Sum) 12.432445 3.99943118767e-05 2 51
|
||||
C(Duration, Sum):C(Weight, Sum) 0.176002 0.83912310946 2 51
|
||||
>>> res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)",
|
||||
data).fit(cov_type='HC0')
|
||||
>>> wt = res_poi.wald_test_terms(skip_single=False,
|
||||
combine_terms=['Duration', 'Weight'])
|
||||
>>> print(wt)
|
||||
chi2 P>chi2 df constraint
|
||||
Intercept 15.695625 7.43960374424e-05 1
|
||||
C(Weight) 16.132616 0.000313940174705 2
|
||||
C(Duration) 1.009147 0.315107378931 1
|
||||
C(Weight):C(Duration) 0.216694 0.897315972824 2
|
||||
Duration 11.187849 0.010752286833 3
|
||||
Weight 30.263368 4.32586407145e-06 4
|
||||
"""
|
||||
# lazy import
|
||||
from collections import defaultdict
|
||||
|
||||
result = self
|
||||
if extra_constraints is None:
|
||||
extra_constraints = []
|
||||
if combine_terms is None:
|
||||
combine_terms = []
|
||||
design_info = getattr(result.model.data.orig_exog, 'design_info', None)
|
||||
|
||||
if design_info is None and extra_constraints is None:
|
||||
raise ValueError('no constraints, nothing to do')
|
||||
|
||||
|
||||
identity = np.eye(len(result.params))
|
||||
constraints = []
|
||||
combined = defaultdict(list)
|
||||
if design_info is not None:
|
||||
for term in design_info.terms:
|
||||
cols = design_info.slice(term)
|
||||
name = term.name()
|
||||
constraint_matrix = identity[cols]
|
||||
|
||||
# check if in combined
|
||||
for cname in combine_terms:
|
||||
if cname in name:
|
||||
combined[cname].append(constraint_matrix)
|
||||
|
||||
k_constraint = constraint_matrix.shape[0]
|
||||
if skip_single:
|
||||
if k_constraint == 1:
|
||||
continue
|
||||
|
||||
constraints.append((name, constraint_matrix))
|
||||
|
||||
combined_constraints = []
|
||||
for cname in combine_terms:
|
||||
combined_constraints.append((cname, np.vstack(combined[cname])))
|
||||
else:
|
||||
# check by exog/params names if there is no formula info
|
||||
for col, name in enumerate(result.model.exog_names):
|
||||
constraint_matrix = identity[col]
|
||||
|
||||
# check if in combined
|
||||
for cname in combine_terms:
|
||||
if cname in name:
|
||||
combined[cname].append(constraint_matrix)
|
||||
|
||||
if skip_single:
|
||||
continue
|
||||
|
||||
constraints.append((name, constraint_matrix))
|
||||
|
||||
combined_constraints = []
|
||||
for cname in combine_terms:
|
||||
combined_constraints.append((cname, np.vstack(combined[cname])))
|
||||
|
||||
use_t = result.use_t
|
||||
distribution = ['chi2', 'F'][use_t]
|
||||
|
||||
res_wald = []
|
||||
index = []
|
||||
for name, constraint in constraints + combined_constraints + extra_constraints:
|
||||
wt = result.wald_test(constraint)
|
||||
row = [wt.statistic.item(), wt.pvalue, constraint.shape[0]]
|
||||
if use_t:
|
||||
row.append(wt.df_denom)
|
||||
res_wald.append(row)
|
||||
index.append(name)
|
||||
|
||||
# distribution nerutral names
|
||||
col_names = ['statistic', 'pvalue', 'df_constraint']
|
||||
if use_t:
|
||||
col_names.append('df_denom')
|
||||
# TODO: maybe move DataFrame creation to results class
|
||||
from pandas import DataFrame
|
||||
table = DataFrame(res_wald, index=index, columns=col_names)
|
||||
res = WaldTestResults(None, distribution, None, table=table)
|
||||
# TODO: remove temp again, added for testing
|
||||
res.temp = constraints + combined_constraints + extra_constraints
|
||||
return res
|
||||
|
||||
|
||||
def conf_int(self, alpha=.05, cols=None, method='default'):
|
||||
"""
|
||||
Returns the confidence interval of the fitted parameters.
|
||||
Parameters
|
||||
----------
|
||||
alpha : float, optional
|
||||
The significance level for the confidence interval.
|
||||
ie., The default `alpha` = .05 returns a 95% confidence interval.
|
||||
cols : array-like, optional
|
||||
`cols` specifies which confidence intervals to return
|
||||
method : string
|
||||
Not Implemented Yet
|
||||
Method to estimate the confidence_interval.
|
||||
"Default" : uses self.bse which is based on inverse Hessian for MLE
|
||||
"hjjh" :
|
||||
"jac" :
|
||||
"boot-bse"
|
||||
"boot_quant"
|
||||
"profile"
|
||||
Returns
|
||||
--------
|
||||
conf_int : array
|
||||
Each row contains [lower, upper] limits of the confidence interval
|
||||
for the corresponding parameter. The first column contains all
|
||||
lower, the second column contains all upper limits.
|
||||
Examples
|
||||
--------
|
||||
>>> import statsmodels.api as sm
|
||||
>>> data = sm.datasets.longley.load()
|
||||
>>> data.exog = sm.add_constant(data.exog)
|
||||
>>> results = sm.OLS(data.endog, data.exog).fit()
|
||||
>>> results.conf_int()
|
||||
array([[-5496529.48322745, -1467987.78596704],
|
||||
[ -177.02903529, 207.15277984],
|
||||
[ -0.1115811 , 0.03994274],
|
||||
[ -3.12506664, -0.91539297],
|
||||
[ -1.5179487 , -0.54850503],
|
||||
[ -0.56251721, 0.460309 ],
|
||||
[ 798.7875153 , 2859.51541392]])
|
||||
>>> results.conf_int(cols=(2,3))
|
||||
array([[-0.1115811 , 0.03994274],
|
||||
[-3.12506664, -0.91539297]])
|
||||
Notes
|
||||
-----
|
||||
The confidence interval is based on the standard normal distribution.
|
||||
Models wish to use a different distribution should overwrite this
|
||||
method.
|
||||
"""
|
||||
bse = self.bse
|
||||
|
||||
if self.use_t:
|
||||
dist = stats.t
|
||||
df_resid = getattr(self, 'df_resid_inference', self.df_resid)
|
||||
q = dist.ppf(1 - alpha / 2, df_resid)
|
||||
else:
|
||||
dist = stats.norm
|
||||
q = dist.ppf(1 - alpha / 2)
|
||||
|
||||
if cols is None:
|
||||
lower = self.params - q * bse
|
||||
upper = self.params + q * bse
|
||||
else:
|
||||
cols = np.asarray(cols)
|
||||
lower = self.params[cols] - q * bse[cols]
|
||||
upper = self.params[cols] + q * bse[cols]
|
||||
return np.asarray(lzip(lower, upper))
|
||||
|
||||
def save(self, fname, remove_data=False):
|
||||
'''
|
||||
save a pickle of this instance
|
||||
Parameters
|
||||
----------
|
||||
fname : string or filehandle
|
||||
fname can be a string to a file path or filename, or a filehandle.
|
||||
remove_data : bool
|
||||
If False (default), then the instance is pickled without changes.
|
||||
If True, then all arrays with length nobs are set to None before
|
||||
pickling. See the remove_data method.
|
||||
In some cases not all arrays will be set to None.
|
||||
Notes
|
||||
-----
|
||||
If remove_data is true and the model result does not implement a
|
||||
remove_data method then this will raise an exception.
|
||||
'''
|
||||
|
||||
from statsmodels.iolib.smpickle import save_pickle
|
||||
|
||||
if remove_data:
|
||||
self.remove_data()
|
||||
|
||||
save_pickle(self, fname)
|
||||
|
||||
@classmethod
|
||||
def load(cls, fname):
|
||||
'''
|
||||
load a pickle, (class method)
|
||||
Parameters
|
||||
----------
|
||||
fname : string or filehandle
|
||||
fname can be a string to a file path or filename, or a filehandle.
|
||||
Returns
|
||||
-------
|
||||
unpickled instance
|
||||
'''
|
||||
|
||||
from statsmodels.iolib.smpickle import load_pickle
|
||||
return load_pickle(fname)
|
||||
|
||||
def remove_data(self):
|
||||
'''remove data arrays, all nobs arrays from result and model
|
||||
This reduces the size of the instance, so it can be pickled with less
|
||||
memory. Currently tested for use with predict from an unpickled
|
||||
results and model instance.
|
||||
.. warning:: Since data and some intermediate results have been removed
|
||||
calculating new statistics that require them will raise exceptions.
|
||||
The exception will occur the first time an attribute is accessed
|
||||
that has been set to None.
|
||||
Not fully tested for time series models, tsa, and might delete too much
|
||||
for prediction or not all that would be possible.
|
||||
The list of arrays to delete is maintained as an attribute of the
|
||||
result and model instance, except for cached values. These lists could
|
||||
be changed before calling remove_data.
|
||||
'''
|
||||
def wipe(obj, att):
|
||||
#get to last element in attribute path
|
||||
p = att.split('.')
|
||||
att_ = p.pop(-1)
|
||||
try:
|
||||
obj_ = reduce(getattr, [obj] + p)
|
||||
|
||||
#print(repr(obj), repr(att))
|
||||
#print(hasattr(obj_, att_))
|
||||
if hasattr(obj_, att_):
|
||||
#print('removing3', att_)
|
||||
setattr(obj_, att_, None)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
model_attr = ['model.' + i for i in self.model._data_attr]
|
||||
for att in self._data_attr + model_attr:
|
||||
#print('removing', att)
|
||||
wipe(self, att)
|
||||
|
||||
data_in_cache = getattr(self, 'data_in_cache', [])
|
||||
data_in_cache += ['fittedvalues', 'resid', 'wresid']
|
||||
for key in data_in_cache:
|
||||
try:
|
||||
self._cache[key] = None
|
||||
except (AttributeError, KeyError):
|
||||
pass
|
||||
|
||||
def lzip(*args, **kwargs):
|
||||
return list(zip(*args, **kwargs))
|
1845
release/python/0.8.1/crankshaft/crankshaft/regression/glm/family.py
Normal file
1845
release/python/0.8.1/crankshaft/crankshaft/regression/glm/family.py
Normal file
File diff suppressed because it is too large
Load Diff
326
release/python/0.8.1/crankshaft/crankshaft/regression/glm/glm.py
Normal file
326
release/python/0.8.1/crankshaft/crankshaft/regression/glm/glm.py
Normal file
@ -0,0 +1,326 @@
|
||||
|
||||
import numpy as np
|
||||
import numpy.linalg as la
|
||||
from pysal.spreg.utils import RegressionPropsY, spdot
|
||||
import pysal.spreg.user_output as USER
|
||||
from utils import cache_readonly
|
||||
from base import LikelihoodModelResults
|
||||
import family
|
||||
from iwls import iwls
|
||||
|
||||
__all__ = ['GLM']
|
||||
|
||||
class GLM(RegressionPropsY):
|
||||
"""
|
||||
Generalised linear models. Can currently estimate Guassian, Poisson and
|
||||
Logisitc regression coefficients. GLM object prepares model input and fit
|
||||
method performs estimation which then returns a GLMResults object.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
X : array
|
||||
n*k, independent variable, exlcuding the constant.
|
||||
family : string
|
||||
Model type: 'Gaussian', 'Poisson', 'Binomial'
|
||||
|
||||
Attributes
|
||||
----------
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
X : array
|
||||
n*k, independent variable, including constant.
|
||||
family : string
|
||||
Model type: 'Gaussian', 'Poisson', 'logistic'
|
||||
n : integer
|
||||
Number of observations
|
||||
k : integer
|
||||
Number of independent variables
|
||||
df_model : float
|
||||
k-1, where k is the number of variables (including
|
||||
intercept)
|
||||
df_residual : float
|
||||
observations minus variables (n-k)
|
||||
mean_y : float
|
||||
Mean of y
|
||||
std_y : float
|
||||
Standard deviation of y
|
||||
fit_params : dict
|
||||
Parameters passed into fit method to define estimation
|
||||
routine.
|
||||
normalized_cov_params : array
|
||||
k*k, approximates [X.T*X]-1
|
||||
"""
|
||||
def __init__(self, y, X, family=family.Gaussian(), constant=True):
|
||||
"""
|
||||
Initialize class
|
||||
"""
|
||||
self.n = USER.check_arrays(y, X)
|
||||
USER.check_y(y, self.n)
|
||||
self.y = y
|
||||
if constant:
|
||||
self.X = USER.check_constant(X)
|
||||
else:
|
||||
self.X = X
|
||||
self.family = family
|
||||
self.k = self.X.shape[1]
|
||||
self.fit_params = {}
|
||||
|
||||
def fit(self, ini_betas=None, tol=1.0e-6, max_iter=200, solve='iwls'):
|
||||
"""
|
||||
Method that fits a model with a particular estimation routine.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
ini_betas : array
|
||||
k*1, initial coefficient values, including constant.
|
||||
Default is None, which calculates initial values during
|
||||
estimation.
|
||||
tol: float
|
||||
Tolerence for estimation convergence.
|
||||
max_iter : integer
|
||||
Maximum number of iterations if convergence not
|
||||
achieved.
|
||||
solve :string
|
||||
Technique to solve MLE equations.
|
||||
'iwls' = iteratively (re)weighted least squares (default)
|
||||
"""
|
||||
self.fit_params['ini_betas'] = ini_betas
|
||||
self.fit_params['tol'] = tol
|
||||
self.fit_params['max_iter'] = max_iter
|
||||
self.fit_params['solve']=solve
|
||||
if solve.lower() == 'iwls':
|
||||
params, predy, w, n_iter = iwls(self.y, self.X, self.family,
|
||||
ini_betas=ini_betas, tol=tol, max_iter=max_iter)
|
||||
self.fit_params['n_iter'] = n_iter
|
||||
return GLMResults(self, params.flatten(), predy, w)
|
||||
|
||||
@cache_readonly
|
||||
def df_model(self):
|
||||
return self.X.shape[1] - 1
|
||||
|
||||
@cache_readonly
|
||||
def df_resid(self):
|
||||
return self.n - self.df_model - 1
|
||||
|
||||
class GLMResults(LikelihoodModelResults):
|
||||
"""
|
||||
Results of estimated GLM and diagnostics.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model : GLM object
|
||||
Pointer to GLM object with estimation parameters.
|
||||
params : array
|
||||
k*1, estimared coefficients
|
||||
mu : array
|
||||
n*1, predicted y values.
|
||||
w : array
|
||||
n*1, final weight used for iwls
|
||||
|
||||
Attributes
|
||||
----------
|
||||
model : GLM Object
|
||||
Points to GLM object for which parameters have been
|
||||
estimated.
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
x : array
|
||||
n*k, independent variable, including constant.
|
||||
family : string
|
||||
Model type: 'Gaussian', 'Poisson', 'Logistic'
|
||||
n : integer
|
||||
Number of observations
|
||||
k : integer
|
||||
Number of independent variables
|
||||
df_model : float
|
||||
k-1, where k is the number of variables (including
|
||||
intercept)
|
||||
df_residual : float
|
||||
observations minus variables (n-k)
|
||||
fit_params : dict
|
||||
parameters passed into fit method to define estimation
|
||||
routine.
|
||||
scale : float
|
||||
sigma squared used for subsequent computations.
|
||||
params : array
|
||||
n*k, estimared beta coefficients
|
||||
w : array
|
||||
n*1, final weight values of x
|
||||
mu : array
|
||||
n*1, predicted value of y (i.e., fittedvalues)
|
||||
cov_params : array
|
||||
Variance covariance matrix (kxk) of betas which has been
|
||||
appropriately scaled by sigma-squared
|
||||
bse : array
|
||||
k*1, standard errors of betas
|
||||
pvalues : array
|
||||
k*1, two-tailed pvalues of parameters
|
||||
tvalues : array
|
||||
k*1, the tvalues of the standard errors
|
||||
null : array
|
||||
n*1, predicted values of y for null model
|
||||
deviance : float
|
||||
value of the deviance function evalued at params;
|
||||
see family.py for distribution-specific deviance
|
||||
null_deviance : float
|
||||
value of the deviance function for the model fit with
|
||||
a constant as the only regressor
|
||||
llf : float
|
||||
value of the loglikelihood function evalued at params;
|
||||
see family.py for distribution-specific loglikelihoods
|
||||
llnull : float
|
||||
value of log-likelihood function evaluated at null
|
||||
aic : float
|
||||
AIC
|
||||
bic : float
|
||||
BIC
|
||||
D2 : float
|
||||
percent deviance explained
|
||||
adj_D2 : float
|
||||
adjusted percent deviance explained
|
||||
pseudo_R2 : float
|
||||
McFadden's pseudo R2 (coefficient of determination)
|
||||
adj_pseudoR2 : float
|
||||
adjusted McFadden's pseudo R2
|
||||
resid_response : array
|
||||
response residuals; defined as y-mu
|
||||
resid_pearson : array
|
||||
Pearson residuals; defined as (y-mu)/sqrt(VAR(mu))
|
||||
where VAR is the distribution specific variance
|
||||
function; see family.py and varfuncs.py for more information.
|
||||
resid_working : array
|
||||
Working residuals; the working residuals are defined as
|
||||
resid_response/link'(mu); see links.py for the
|
||||
derivatives of the link functions.
|
||||
|
||||
resid_anscombe : array
|
||||
Anscombe residuals; see family.py for
|
||||
distribution-specific Anscombe residuals.
|
||||
|
||||
resid_deviance : array
|
||||
deviance residuals; see family.py for
|
||||
distribution-specific deviance residuals.
|
||||
|
||||
pearson_chi2 : float
|
||||
chi-Squared statistic is defined as the sum
|
||||
of the squares of the Pearson residuals
|
||||
|
||||
normalized_cov_params : array
|
||||
k*k, approximates [X.T*X]-1
|
||||
"""
|
||||
def __init__(self, model, params, mu, w):
|
||||
self.model = model
|
||||
self.n = model.n
|
||||
self.y = model.y.T.flatten()
|
||||
self.X = model.X
|
||||
self.k = model.k
|
||||
self.family = model.family
|
||||
self.fit_params = model.fit_params
|
||||
self.params = params
|
||||
self.w = w
|
||||
self.mu = mu.flatten()
|
||||
self._cache = {}
|
||||
|
||||
@cache_readonly
|
||||
def df_model(self):
|
||||
return self.model.df_model
|
||||
|
||||
@cache_readonly
|
||||
def df_resid(self):
|
||||
return self.model.df_resid
|
||||
|
||||
@cache_readonly
|
||||
def normalized_cov_params(self):
|
||||
return la.inv(spdot(self.w.T, self.w))
|
||||
|
||||
@cache_readonly
|
||||
def resid_response(self):
|
||||
return (self.y-self.mu)
|
||||
|
||||
@cache_readonly
|
||||
def resid_pearson(self):
|
||||
return ((self.y-self.mu) /
|
||||
np.sqrt(self.family.variance(self.mu)))
|
||||
|
||||
@cache_readonly
|
||||
def resid_working(self):
|
||||
return (self.resid_response / self.family.link.deriv(self.mu))
|
||||
|
||||
@cache_readonly
|
||||
def resid_anscombe(self):
|
||||
return (self.family.resid_anscombe(self.y, self.mu))
|
||||
|
||||
@cache_readonly
|
||||
def resid_deviance(self):
|
||||
return (self.family.resid_dev(self.y, self.mu))
|
||||
|
||||
@cache_readonly
|
||||
def pearson_chi2(self):
|
||||
chisq = (self.y - self.mu)**2 / self.family.variance(self.mu)
|
||||
chisqsum = np.sum(chisq)
|
||||
return chisqsum
|
||||
|
||||
@cache_readonly
|
||||
def null(self):
|
||||
y = np.reshape(self.y, (-1,1))
|
||||
model = self.model
|
||||
X = np.ones((len(y), 1))
|
||||
null_mod = GLM(y, X, family=self.family, constant=False)
|
||||
return null_mod.fit().mu
|
||||
|
||||
@cache_readonly
|
||||
def scale(self):
|
||||
if isinstance(self.family, (family.Binomial, family.Poisson)):
|
||||
return 1.
|
||||
else:
|
||||
return (((np.power(self.resid_response, 2) /
|
||||
self.family.variance(self.mu))).sum() /
|
||||
(self.df_resid))
|
||||
@cache_readonly
|
||||
def deviance(self):
|
||||
return self.family.deviance(self.y, self.mu)
|
||||
|
||||
@cache_readonly
|
||||
def null_deviance(self):
|
||||
return self.family.deviance(self.y, self.null)
|
||||
|
||||
@cache_readonly
|
||||
def llnull(self):
|
||||
return self.family.loglike(self.y, self.null, scale=self.scale)
|
||||
|
||||
@cache_readonly
|
||||
def llf(self):
|
||||
return self.family.loglike(self.y, self.mu, scale=self.scale)
|
||||
|
||||
@cache_readonly
|
||||
def aic(self):
|
||||
if isinstance(self.family, family.QuasiPoisson):
|
||||
return np.nan
|
||||
else:
|
||||
return -2 * self.llf + 2*(self.df_model+1)
|
||||
|
||||
@cache_readonly
|
||||
def bic(self):
|
||||
return (self.deviance -
|
||||
(self.model.n - self.df_model - 1) *
|
||||
np.log(self.model.n))
|
||||
|
||||
@cache_readonly
|
||||
def D2(self):
|
||||
return 1 - (self.deviance / self.null_deviance)
|
||||
|
||||
@cache_readonly
|
||||
def adj_D2(self):
|
||||
return 1.0 - (float(self.n) - 1.0)/(float(self.n) - float(self.k)) * (1.0-self.D2)
|
||||
|
||||
@cache_readonly
|
||||
def pseudoR2(self):
|
||||
return 1 - (self.llf/self.llnull)
|
||||
|
||||
@cache_readonly
|
||||
def adj_pseudoR2(self):
|
||||
return 1 - ((self.llf-self.k)/self.llnull)
|
||||
|
@ -0,0 +1,84 @@
|
||||
import numpy as np
|
||||
import numpy.linalg as la
|
||||
from scipy import sparse as sp
|
||||
from scipy.sparse import linalg as spla
|
||||
from pysal.spreg.utils import spdot, spmultiply
|
||||
from family import Binomial, Poisson
|
||||
|
||||
def _compute_betas(y, x):
|
||||
"""
|
||||
compute MLE coefficients using iwls routine
|
||||
|
||||
Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
"""
|
||||
xT = x.T
|
||||
xtx = spdot(xT, x)
|
||||
xtx_inv = la.inv(xtx)
|
||||
xtx_inv = sp.csr_matrix(xtx_inv)
|
||||
xTy = spdot(xT, y, array_out=False)
|
||||
betas = spdot(xtx_inv, xTy)
|
||||
return betas
|
||||
|
||||
def _compute_betas_gwr(y, x, wi):
|
||||
"""
|
||||
compute MLE coefficients using iwls routine
|
||||
|
||||
Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
"""
|
||||
xT = (x * wi).T
|
||||
xtx = np.dot(xT, x)
|
||||
xtx_inv = la.inv(xtx)
|
||||
xtx_inv_xt = np.dot(xtx_inv, xT)
|
||||
betas = np.dot(xtx_inv_xt, y)
|
||||
return betas, xtx_inv_xt
|
||||
|
||||
def iwls(y, x, family, offset=1.0, ini_betas=None, tol=1.0e-8, max_iter=200, wi=None):
|
||||
"""
|
||||
Iteratively re-weighted least squares estimation routine
|
||||
"""
|
||||
n_iter = 0
|
||||
diff = 1.0e6
|
||||
if ini_betas is None:
|
||||
betas = np.zeros((x.shape[1], 1), np.float)
|
||||
else:
|
||||
betas = ini_betas
|
||||
if isinstance(family, Binomial):
|
||||
y = family.link._clean(y)
|
||||
if isinstance(family, Poisson):
|
||||
y_off = y/offset
|
||||
y_off = family.starting_mu(y_off)
|
||||
v = family.predict(y_off)
|
||||
mu = family.starting_mu(y)
|
||||
else:
|
||||
mu = family.starting_mu(y)
|
||||
v = family.predict(mu)
|
||||
|
||||
while diff > tol and n_iter < max_iter:
|
||||
n_iter += 1
|
||||
w = family.weights(mu)
|
||||
z = v + (family.link.deriv(mu)*(y-mu))
|
||||
w = np.sqrt(w)
|
||||
if type(x) != np.ndarray:
|
||||
w = sp.csr_matrix(w)
|
||||
z = sp.csr_matrix(z)
|
||||
wx = spmultiply(x, w, array_out=False)
|
||||
wz = spmultiply(z, w, array_out=False)
|
||||
if wi is None:
|
||||
n_betas = _compute_betas(wz, wx)
|
||||
else:
|
||||
n_betas, xtx_inv_xt = _compute_betas_gwr(wz, wx, wi)
|
||||
v = spdot(x, n_betas)
|
||||
mu = family.fitted(v)
|
||||
if isinstance(family, Poisson):
|
||||
mu = mu * offset
|
||||
diff = min(abs(n_betas-betas))
|
||||
betas = n_betas
|
||||
|
||||
if wi is None:
|
||||
return betas, mu, wx, n_iter
|
||||
else:
|
||||
return betas, mu, v, w, z, xtx_inv_xt, n_iter
|
@ -0,0 +1,953 @@
|
||||
'''
|
||||
Defines the link functions to be used with GLM and GEE families.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import scipy.stats
|
||||
FLOAT_EPS = np.finfo(float).eps
|
||||
|
||||
|
||||
class Link(object):
|
||||
"""
|
||||
A generic link function for one-parameter exponential family.
|
||||
|
||||
`Link` does nothing, but lays out the methods expected of any subclass.
|
||||
"""
|
||||
|
||||
def __call__(self, p):
|
||||
"""
|
||||
Return the value of the link function. This is just a placeholder.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
g(p) : array-like
|
||||
The value of the link function g(p) = z
|
||||
"""
|
||||
return NotImplementedError
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of the link function. Just a placeholder.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
`z` is usually the linear predictor of the transformed variable
|
||||
in the IRLS algorithm for GLM.
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)(z) : array
|
||||
The value of the inverse of the link function g^(-1)(z) = p
|
||||
|
||||
|
||||
"""
|
||||
return NotImplementedError
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of the link function g'(p). Just a placeholder.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
The value of the derivative of the link function g'(p)
|
||||
"""
|
||||
return NotImplementedError
|
||||
|
||||
def deriv2(self, p):
|
||||
"""Second derivative of the link function g''(p)
|
||||
|
||||
implemented through numerical differentiation
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime_cs
|
||||
# TODO: workaround proplem with numdiff for 1d
|
||||
return np.diag(approx_fprime_cs(p, self.deriv))
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse link function g^(-1)(z).
|
||||
|
||||
Notes
|
||||
-----
|
||||
This reference implementation gives the correct result but is
|
||||
inefficient, so it can be overriden in subclasses.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
`z` is usually the linear predictor for a GLM or GEE model.
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'^(-1)(z) : array
|
||||
The value of the derivative of the inverse of the link function
|
||||
|
||||
"""
|
||||
return 1 / self.deriv(self.inverse(z))
|
||||
|
||||
|
||||
class Logit(Link):
|
||||
"""
|
||||
The logit transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
call and derivative use a private method _clean to make trim p by
|
||||
machine epsilon so that p is in (0,1)
|
||||
|
||||
Alias of Logit:
|
||||
logit = Logit()
|
||||
"""
|
||||
|
||||
def _clean(self, p):
|
||||
"""
|
||||
Clip logistic values to range (eps, 1-eps)
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
p : array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
--------
|
||||
pclip : array
|
||||
Clipped probabilities
|
||||
"""
|
||||
return np.clip(p, FLOAT_EPS, 1. - FLOAT_EPS)
|
||||
|
||||
def __call__(self, p):
|
||||
"""
|
||||
The logit transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
Logit transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(p / (1 - p))
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return np.log(p / (1. - p))
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of the logit transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
The value of the logit transform at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
Probabilities
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(z) = exp(z)/(1+exp(z))
|
||||
"""
|
||||
z = np.asarray(z)
|
||||
t = np.exp(-z)
|
||||
return 1. / (1. + t)
|
||||
|
||||
def deriv(self, p):
|
||||
|
||||
"""
|
||||
Derivative of the logit transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p: array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
Value of the derivative of logit transform at `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(p) = 1 / (p * (1 - p))
|
||||
|
||||
Alias for `Logit`:
|
||||
logit = Logit()
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return 1. / (p * (1 - p))
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the logit transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
`z` is usually the linear predictor for a GLM or GEE model.
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'^(-1)(z) : array
|
||||
The value of the derivative of the inverse of the logit function
|
||||
|
||||
"""
|
||||
t = np.exp(z)
|
||||
return t/(1 + t)**2
|
||||
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the logit function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(z) : array
|
||||
The value of the second derivative of the logit function
|
||||
"""
|
||||
v = p * (1 - p)
|
||||
return (2*p - 1) / v**2
|
||||
|
||||
class logit(Logit):
|
||||
pass
|
||||
|
||||
|
||||
class Power(Link):
|
||||
"""
|
||||
The power transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
power : float
|
||||
The exponent of the power transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
Aliases of Power:
|
||||
inverse = Power(power=-1)
|
||||
sqrt = Power(power=.5)
|
||||
inverse_squared = Power(power=-2.)
|
||||
identity = Power(power=1.)
|
||||
"""
|
||||
|
||||
def __init__(self, power=1.):
|
||||
self.power = power
|
||||
|
||||
def __call__(self, p):
|
||||
"""
|
||||
Power transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array-like
|
||||
Power transform of x
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = x**self.power
|
||||
"""
|
||||
|
||||
z = np.power(p, self.power)
|
||||
return z
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of the power transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
`z` : array-like
|
||||
Value of the transformed mean parameters at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
`p` : array
|
||||
Mean parameters
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(z`) = `z`**(1/`power`)
|
||||
"""
|
||||
|
||||
p = np.power(z, 1. / self.power)
|
||||
return p
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of the power transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
--------
|
||||
g'(p) : array
|
||||
Derivative of power transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(`p`) = `power` * `p`**(`power` - 1)
|
||||
"""
|
||||
return self.power * np.power(p, self.power - 1)
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the power transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
--------
|
||||
g''(p) : array
|
||||
Second derivative of the power transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g''(`p`) = `power` * (`power` - 1) * `p`**(`power` - 2)
|
||||
"""
|
||||
return self.power * (self.power - 1) * np.power(p, self.power - 2)
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the power transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
`z` is usually the linear predictor for a GLM or GEE model.
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The value of the derivative of the inverse of the power transform
|
||||
function
|
||||
"""
|
||||
return np.power(z, (1 - self.power)/self.power) / self.power
|
||||
|
||||
|
||||
class inverse_power(Power):
|
||||
"""
|
||||
The inverse transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = 1/p
|
||||
|
||||
Alias of statsmodels.family.links.Power(power=-1.)
|
||||
"""
|
||||
def __init__(self):
|
||||
super(inverse_power, self).__init__(power=-1.)
|
||||
|
||||
|
||||
class sqrt(Power):
|
||||
"""
|
||||
The square-root transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = sqrt(`p`)
|
||||
|
||||
Alias of statsmodels.family.links.Power(power=.5)
|
||||
"""
|
||||
def __init__(self):
|
||||
super(sqrt, self).__init__(power=.5)
|
||||
|
||||
|
||||
class inverse_squared(Power):
|
||||
"""
|
||||
The inverse squared transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = 1/(`p`\ \*\*2)
|
||||
|
||||
Alias of statsmodels.family.links.Power(power=2.)
|
||||
"""
|
||||
def __init__(self):
|
||||
super(inverse_squared, self).__init__(power=-2.)
|
||||
|
||||
|
||||
class identity(Power):
|
||||
"""
|
||||
The identity transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = `p`
|
||||
|
||||
Alias of statsmodels.family.links.Power(power=1.)
|
||||
"""
|
||||
def __init__(self):
|
||||
super(identity, self).__init__(power=1.)
|
||||
|
||||
|
||||
class Log(Link):
|
||||
"""
|
||||
The log transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
call and derivative call a private method _clean to trim the data by
|
||||
machine epsilon so that p is in (0,1). log is an alias of Log.
|
||||
"""
|
||||
|
||||
def _clean(self, x):
|
||||
return np.clip(x, FLOAT_EPS, np.inf)
|
||||
|
||||
def __call__(self, p, **extra):
|
||||
"""
|
||||
Log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
log(x)
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(p)
|
||||
"""
|
||||
x = self._clean(p)
|
||||
return np.log(x)
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array
|
||||
The inverse of the link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
The mean probabilities given the value of the inverse `z`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^{-1}(z) = exp(z)
|
||||
"""
|
||||
return np.exp(z)
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
derivative of log transform of x
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(x) = 1/x
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return 1. / p
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(p) : array
|
||||
Second derivative of log transform of x
|
||||
|
||||
Notes
|
||||
-----
|
||||
g''(x) = -1/x^2
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return -1. / p**2
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array
|
||||
The inverse of the link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The value of the derivative of the inverse of the log function,
|
||||
the exponential function
|
||||
"""
|
||||
return np.exp(z)
|
||||
|
||||
|
||||
class log(Log):
|
||||
"""
|
||||
The log transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
log is a an alias of Log.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# TODO: the CDFLink is untested
|
||||
class CDFLink(Logit):
|
||||
"""
|
||||
The use the CDF of a scipy.stats distribution
|
||||
|
||||
CDFLink is a subclass of logit in order to use its _clean method
|
||||
for the link and its derivative.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dbn : scipy.stats distribution
|
||||
Default is dbn=scipy.stats.norm
|
||||
|
||||
Notes
|
||||
-----
|
||||
The CDF link is untested.
|
||||
"""
|
||||
|
||||
def __init__(self, dbn=scipy.stats.norm):
|
||||
self.dbn = dbn
|
||||
|
||||
def __call__(self, p):
|
||||
"""
|
||||
CDF link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
(ppf) inverse of CDF transform of p
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = `dbn`.ppf(`p`)
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return self.dbn.ppf(p)
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
The inverse of the CDF link
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
The value of the inverse of the link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
Mean probabilities. The value of the inverse of CDF link of `z`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(`z`) = `dbn`.cdf(`z`)
|
||||
"""
|
||||
return self.dbn.cdf(z)
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of CDF link
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
The derivative of CDF transform at `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(`p`) = 1./ `dbn`.pdf(`dbn`.ppf(`p`))
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return 1. / self.dbn.pdf(self.dbn.ppf(p))
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the link function g''(p)
|
||||
|
||||
implemented through numerical differentiation
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime
|
||||
p = np.atleast_1d(p)
|
||||
# Note: special function for norm.ppf does not support complex
|
||||
return np.diag(approx_fprime(p, self.deriv, centered=True))
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the CDF transformation link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array
|
||||
The inverse of the link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The value of the derivative of the inverse of the logit function
|
||||
"""
|
||||
return 1/self.deriv(self.inverse(z))
|
||||
|
||||
|
||||
class probit(CDFLink):
|
||||
"""
|
||||
The probit (standard normal CDF) transform
|
||||
|
||||
Notes
|
||||
--------
|
||||
g(p) = scipy.stats.norm.ppf(p)
|
||||
|
||||
probit is an alias of CDFLink.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class cauchy(CDFLink):
|
||||
"""
|
||||
The Cauchy (standard Cauchy CDF) transform
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = scipy.stats.cauchy.ppf(p)
|
||||
|
||||
cauchy is an alias of CDFLink with dbn=scipy.stats.cauchy
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(cauchy, self).__init__(dbn=scipy.stats.cauchy)
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the Cauchy link function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p: array-like
|
||||
Probabilities
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(p) : array
|
||||
Value of the second derivative of Cauchy link function at `p`
|
||||
"""
|
||||
a = np.pi * (p - 0.5)
|
||||
d2 = 2 * np.pi**2 * np.sin(a) / np.cos(a)**3
|
||||
return d2
|
||||
|
||||
class CLogLog(Logit):
|
||||
"""
|
||||
The complementary log-log transform
|
||||
|
||||
CLogLog inherits from Logit in order to have access to its _clean method
|
||||
for the link and its derivative.
|
||||
|
||||
Notes
|
||||
-----
|
||||
CLogLog is untested.
|
||||
"""
|
||||
def __call__(self, p):
|
||||
"""
|
||||
C-Log-Log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
The CLogLog transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(-log(1-p))
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return np.log(-np.log(1 - p))
|
||||
|
||||
def inverse(self, z):
|
||||
"""
|
||||
Inverse of C-Log-Log transform link function
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
The value of the inverse of the CLogLog link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
Mean parameters
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(`z`) = 1-exp(-exp(`z`))
|
||||
"""
|
||||
return 1 - np.exp(-np.exp(z))
|
||||
|
||||
def deriv(self, p):
|
||||
"""
|
||||
Derivative of C-Log-Log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
The derivative of the CLogLog transform link function
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(p) = - 1 / ((p-1)*log(1-p))
|
||||
"""
|
||||
p = self._clean(p)
|
||||
return 1. / ((p - 1) * (np.log(1 - p)))
|
||||
|
||||
def deriv2(self, p):
|
||||
"""
|
||||
Second derivative of the C-Log-Log ink function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(p) : array
|
||||
The second derivative of the CLogLog link function
|
||||
"""
|
||||
p = self._clean(p)
|
||||
fl = np.log(1 - p)
|
||||
d2 = -1 / ((1 - p)**2 * fl)
|
||||
d2 *= 1 + 1 / fl
|
||||
return d2
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
"""
|
||||
Derivative of the inverse of the C-Log-Log transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
z : array-like
|
||||
The value of the inverse of the CLogLog link function at `p`
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The derivative of the inverse of the CLogLog link function
|
||||
"""
|
||||
return np.exp(z - np.exp(z))
|
||||
|
||||
|
||||
class cloglog(CLogLog):
|
||||
"""
|
||||
The CLogLog transform link function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(`p`) = log(-log(1-`p`))
|
||||
|
||||
cloglog is an alias for CLogLog
|
||||
cloglog = CLogLog()
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class NegativeBinomial(object):
|
||||
'''
|
||||
The negative binomial link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float, optional
|
||||
Alpha is the ancillary parameter of the Negative Binomial link
|
||||
function. It is assumed to be nonstochastic. The default value is 1.
|
||||
Permissible values are usually assumed to be in (.01, 2).
|
||||
'''
|
||||
|
||||
def __init__(self, alpha=1.):
|
||||
self.alpha = alpha
|
||||
|
||||
def _clean(self, x):
|
||||
return np.clip(x, FLOAT_EPS, np.inf)
|
||||
|
||||
def __call__(self, p):
|
||||
'''
|
||||
Negative Binomial transform link function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : array
|
||||
The negative binomial transform of `p`
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(p/(p + 1/alpha))
|
||||
'''
|
||||
p = self._clean(p)
|
||||
return np.log(p/(p + 1/self.alpha))
|
||||
|
||||
def inverse(self, z):
|
||||
'''
|
||||
Inverse of the negative binomial transform
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
z : array-like
|
||||
The value of the inverse of the negative binomial link at `p`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array
|
||||
Mean parameters
|
||||
|
||||
Notes
|
||||
-----
|
||||
g^(-1)(z) = exp(z)/(alpha*(1-exp(z)))
|
||||
'''
|
||||
return -1/(self.alpha * (1 - np.exp(-z)))
|
||||
|
||||
def deriv(self, p):
|
||||
'''
|
||||
Derivative of the negative binomial transform
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g'(p) : array
|
||||
The derivative of the negative binomial transform link function
|
||||
|
||||
Notes
|
||||
-----
|
||||
g'(x) = 1/(x+alpha*x^2)
|
||||
'''
|
||||
return 1/(p + self.alpha * p**2)
|
||||
|
||||
def deriv2(self,p):
|
||||
'''
|
||||
Second derivative of the negative binomial link function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array-like
|
||||
Mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
g''(p) : array
|
||||
The second derivative of the negative binomial transform link
|
||||
function
|
||||
|
||||
Notes
|
||||
-----
|
||||
g''(x) = -(1+2*alpha*x)/(x+alpha*x^2)^2
|
||||
'''
|
||||
numer = -(1 + 2 * self.alpha * p)
|
||||
denom = (p + self.alpha * p**2)**2
|
||||
return numer / denom
|
||||
|
||||
def inverse_deriv(self, z):
|
||||
'''
|
||||
Derivative of the inverse of the negative binomial transform
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
z : array-like
|
||||
Usually the linear predictor for a GLM or GEE model
|
||||
|
||||
Returns
|
||||
-------
|
||||
g^(-1)'(z) : array
|
||||
The value of the derivative of the inverse of the negative
|
||||
binomial link
|
||||
'''
|
||||
t = np.exp(z)
|
||||
return t / (self.alpha * (1-t)**2)
|
||||
|
||||
|
||||
class nbinom(NegativeBinomial):
|
||||
"""
|
||||
The negative binomial link function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
g(p) = log(p/(p + 1/alpha))
|
||||
|
||||
nbinom is an alias of NegativeBinomial.
|
||||
nbinom = NegativeBinomial(alpha=1.)
|
||||
"""
|
||||
pass
|
@ -0,0 +1,993 @@
|
||||
"""
|
||||
Tests for generalized linear models. Majority of code either directly borrowed
|
||||
or closely adapted from statsmodels package. Model results verfiied using glm
|
||||
function in R and GLM function in statsmodels.
|
||||
"""
|
||||
|
||||
__author__ = 'Taylor Oshan tayoshan@gmail.com'
|
||||
|
||||
from pysal.contrib.glm.glm import GLM
|
||||
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial, QuasiPoisson
|
||||
import numpy as np
|
||||
import pysal
|
||||
import unittest
|
||||
import math
|
||||
|
||||
|
||||
class TestGaussian(unittest.TestCase):
|
||||
"""
|
||||
Tests for Poisson GLM
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')
|
||||
y = np.array(db.by_col("HOVAL"))
|
||||
self.y = np.reshape(y, (49,1))
|
||||
X = []
|
||||
X.append(db.by_col("INC"))
|
||||
X.append(db.by_col("CRIME"))
|
||||
self.X = np.array(X).T
|
||||
|
||||
def testIWLS(self):
|
||||
model = GLM(self.y, self.X, family=Gaussian())
|
||||
results = model.fit()
|
||||
self.assertEqual(results.n, 49)
|
||||
self.assertEqual(results.df_model, 2)
|
||||
self.assertEqual(results.df_resid, 46)
|
||||
self.assertEqual(results.aic, 408.73548964604873)
|
||||
self.assertEqual(results.bic, 10467.991340493107)
|
||||
self.assertEqual(results.deviance, 10647.015074206196)
|
||||
self.assertEqual(results.llf, -201.36774482302437)
|
||||
self.assertEqual(results.null_deviance, 16367.794631703124)
|
||||
self.assertEqual(results.scale, 231.45684943926514)
|
||||
np.testing.assert_allclose(results.params, [ 46.42818268, 0.62898397,
|
||||
-0.48488854])
|
||||
np.testing.assert_allclose(results.bse, [ 13.19175703, 0.53591045,
|
||||
0.18267291])
|
||||
np.testing.assert_allclose(results.cov_params(),
|
||||
[[ 1.74022453e+02, -6.52060364e+00, -2.15109867e+00],
|
||||
[ -6.52060364e+00, 2.87200008e-01, 6.80956787e-02],
|
||||
[ -2.15109867e+00, 6.80956787e-02, 3.33693910e-02]])
|
||||
np.testing.assert_allclose(results.tvalues, [ 3.51948437, 1.17367365,
|
||||
-2.65440864])
|
||||
np.testing.assert_allclose(results.pvalues, [ 0.00043239, 0.24052577,
|
||||
0.00794475], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.conf_int(),
|
||||
[[ 20.57281401, 72.28355135],
|
||||
[ -0.42138121, 1.67934915],
|
||||
[ -0.84292086, -0.12685622]])
|
||||
np.testing.assert_allclose(results.normalized_cov_params,
|
||||
[[ 7.51857004e-01, -2.81720055e-02, -9.29373521e-03],
|
||||
[ -2.81720055e-02, 1.24083607e-03, 2.94204638e-04],
|
||||
[ -9.29373521e-03, 2.94204638e-04, 1.44171110e-04]])
|
||||
np.testing.assert_allclose(results.mu,
|
||||
[ 51.08752105, 50.66601521, 41.61367567, 33.53969014,
|
||||
28.90638232, 43.87074227, 51.64910882, 34.92671563,
|
||||
42.69267622, 38.49449134, 20.92815471, 25.25228436,
|
||||
29.78223486, 25.02403635, 29.07959539, 24.63352275,
|
||||
34.71372149, 33.40443052, 27.29864225, 65.86219802,
|
||||
33.69854751, 37.44976435, 50.01304928, 36.81219959,
|
||||
22.02674837, 31.64775955, 27.63563294, 23.7697291 ,
|
||||
22.43119725, 21.76987089, 48.51169321, 49.05891819,
|
||||
32.31656426, 44.20550354, 35.49244888, 51.27811308,
|
||||
36.55047181, 27.37048914, 48.78812922, 57.31744163,
|
||||
51.22914162, 54.70515578, 37.06622277, 44.5075759 ,
|
||||
41.24328983, 49.93821824, 44.85644299, 40.93838609, 47.32045464])
|
||||
self.assertEqual(results.pearson_chi2, 10647.015074206196)
|
||||
np.testing.assert_allclose(results.resid_response,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.resid_working,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.resid_pearson,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.resid_anscombe,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.resid_deviance,
|
||||
[ 29.37948195, -6.09901421, -15.26367567, -0.33968914,
|
||||
-5.68138232, -15.12074227, 23.35089118, 2.19828437,
|
||||
9.90732178, 57.90551066, -1.22815371, -5.35228436,
|
||||
11.91776614, 17.87596565, -11.07959539, -5.83352375,
|
||||
7.03627851, 26.59556948, 3.30135775, 15.40479998,
|
||||
-13.72354751, -6.99976335, -2.28004728, 16.38780141,
|
||||
-4.12674837, -11.34776055, 6.46436506, -0.9197291 ,
|
||||
10.06880275, 0.73012911, -16.71169421, -8.75891919,
|
||||
-8.71656426, -15.75550254, -8.49244888, -14.97811408,
|
||||
6.74952719, -4.67048814, -9.18813122, 4.63255937,
|
||||
-9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
|
||||
-13.51028983, 26.16177976, -2.35644299, -14.13838709, -11.52045564])
|
||||
np.testing.assert_allclose(results.null,
|
||||
[ 38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447,
|
||||
38.43622447, 38.43622447, 38.43622447, 38.43622447, 38.43622447])
|
||||
self.assertAlmostEqual(results.D2, .349514377851)
|
||||
self.assertAlmostEqual(results.adj_D2, 0.32123239427957673)
|
||||
|
||||
class TestPoisson(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')
|
||||
y = np.array(db.by_col("HOVAL"))
|
||||
y = np.reshape(y, (49,1))
|
||||
self.y = np.round(y).astype(int)
|
||||
X = []
|
||||
X.append(db.by_col("INC"))
|
||||
X.append(db.by_col("CRIME"))
|
||||
self.X = np.array(X).T
|
||||
|
||||
def testIWLS(self):
|
||||
model = GLM(self.y, self.X, family=Poisson())
|
||||
results = model.fit()
|
||||
self.assertEqual(results.n, 49)
|
||||
self.assertEqual(results.df_model, 2)
|
||||
self.assertEqual(results.df_resid, 46)
|
||||
self.assertAlmostEqual(results.aic, 500.85184179938756)
|
||||
self.assertAlmostEqual(results.bic, 51.436404535087661)
|
||||
self.assertAlmostEqual(results.deviance, 230.46013824817649)
|
||||
self.assertAlmostEqual(results.llf, -247.42592089969378)
|
||||
self.assertAlmostEqual(results.null_deviance, 376.97293610347361)
|
||||
self.assertEqual(results.scale, 1.0)
|
||||
np.testing.assert_allclose(results.params, [ 3.92159085, 0.01183491,
|
||||
-0.01371397], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.bse, [ 0.13049161, 0.00511599,
|
||||
0.00193769], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.cov_params(),
|
||||
[[ 1.70280610e-02, -6.18628383e-04, -2.21386966e-04],
|
||||
[ -6.18628383e-04, 2.61733917e-05, 6.77496445e-06],
|
||||
[ -2.21386966e-04, 6.77496445e-06, 3.75463502e-06]])
|
||||
np.testing.assert_allclose(results.tvalues, [ 30.0524361 , 2.31331634,
|
||||
-7.07748998])
|
||||
np.testing.assert_allclose(results.pvalues, [ 2.02901657e-198,
|
||||
2.07052532e-002, 1.46788805e-012])
|
||||
np.testing.assert_allclose(results.conf_int(),
|
||||
[[ 3.66583199e+00, 4.17734972e+00],
|
||||
[ 1.80774841e-03, 2.18620753e-02],
|
||||
[ -1.75117666e-02, -9.91616901e-03]])
|
||||
np.testing.assert_allclose(results.normalized_cov_params,
|
||||
[[ 1.70280610e-02, -6.18628383e-04, -2.21386966e-04],
|
||||
[ -6.18628383e-04, 2.61733917e-05, 6.77496445e-06],
|
||||
[ -2.21386966e-04, 6.77496445e-06, 3.75463502e-06]])
|
||||
np.testing.assert_allclose(results.mu,
|
||||
[ 51.26831574, 50.15022766, 40.06142973, 34.13799739,
|
||||
28.76119226, 42.6836241 , 55.64593703, 34.08277997,
|
||||
40.90389582, 37.19727958, 23.47459217, 26.12384057,
|
||||
29.78303507, 25.96888223, 29.14073823, 26.04369592,
|
||||
34.18996367, 32.28924005, 27.42284396, 72.69207879,
|
||||
33.05316347, 36.52276972, 49.2551479 , 35.33439632,
|
||||
24.07252457, 31.67153709, 27.81699478, 25.38021219,
|
||||
24.31759259, 23.13586161, 48.40724678, 48.57969818,
|
||||
31.92596006, 43.3679231 , 34.32925819, 51.78908089,
|
||||
34.49778584, 27.56236198, 48.34273194, 57.50829097,
|
||||
50.66038226, 54.68701352, 35.77103116, 43.21886784,
|
||||
40.07615759, 49.98658004, 43.13352883, 40.28520774, 46.28910294])
|
||||
self.assertAlmostEqual(results.pearson_chi2, 264.62262932090221)
|
||||
np.testing.assert_allclose(results.resid_response,
|
||||
[ 28.73168426, -5.15022766, -14.06142973, -1.13799739,
|
||||
-5.76119226, -13.6836241 , 19.35406297, 2.91722003,
|
||||
12.09610418, 58.80272042, -3.47459217, -6.12384057,
|
||||
12.21696493, 17.03111777, -11.14073823, -7.04369592,
|
||||
7.81003633, 27.71075995, 3.57715604, 8.30792121,
|
||||
-13.05316347, -6.52276972, -1.2551479 , 17.66560368,
|
||||
-6.07252457, -11.67153709, 6.18300522, -2.38021219,
|
||||
7.68240741, -1.13586161, -16.40724678, -8.57969818,
|
||||
-7.92596006, -15.3679231 , -7.32925819, -15.78908089,
|
||||
8.50221416, -4.56236198, -8.34273194, 4.49170903,
|
||||
-8.66038226, -10.68701352, -9.77103116, -9.21886784,
|
||||
-12.07615759, 26.01341996, -1.13352883, -13.28520774, -10.28910294])
|
||||
np.testing.assert_allclose(results.resid_working,
|
||||
[ 1473.02506034, -258.28508941, -563.32097891, -38.84895192,
|
||||
-165.69875817, -584.06666725, 1076.97496919, 99.42696848,
|
||||
494.77778514, 2187.30123163, -81.56463405, -159.97823479,
|
||||
363.858295 , 442.27909165, -324.64933645, -183.44387481,
|
||||
267.02485844, 894.75938 , 98.09579187, 603.9200634 ,
|
||||
-431.44834594, -238.2296165 , -61.82249568, 624.20344168,
|
||||
-146.18099686, -369.65551968, 171.99262399, -60.41029031,
|
||||
186.81765356, -26.27913713, -794.22964417, -416.79914795,
|
||||
-253.04388425, -666.47490701, -251.6079969 , -817.70198717,
|
||||
293.30756327, -125.74947222, -403.31045369, 258.31051005,
|
||||
-438.73827602, -584.440853 , -349.51985996, -398.42903071,
|
||||
-483.96599444, 1300.32189904, -48.89309853, -535.19735391,
|
||||
-476.27334527])
|
||||
np.testing.assert_allclose(results.resid_pearson,
|
||||
[ 4.01269878, -0.72726045, -2.221602 , -0.19477008, -1.07425881,
|
||||
-2.09445239, 2.59451042, 0.49969118, 1.89131202, 9.64143836,
|
||||
-0.71714142, -1.19813392, 2.23861212, 3.34207756, -2.0637814 ,
|
||||
-1.3802231 , 1.33568403, 4.87662684, 0.68309584, 0.97442591,
|
||||
-2.27043598, -1.07931992, -0.17884182, 2.97186889, -1.23768025,
|
||||
-2.07392709, 1.1723155 , -0.47246327, 1.55789092, -0.23614708,
|
||||
-2.35819937, -1.23096188, -1.40274877, -2.33362391, -1.25091503,
|
||||
-2.19400568, 1.44755952, -0.8690235 , -1.19989348, 0.59230634,
|
||||
-1.21675413, -1.44515442, -1.63370888, -1.40229988, -1.90759306,
|
||||
3.67934693, -0.17259375, -2.09312684, -1.51230062])
|
||||
np.testing.assert_allclose(results.resid_anscombe,
|
||||
[ 3.70889134, -0.74031295, -2.37729865, -0.19586855, -1.11374751,
|
||||
-2.22611959, 2.46352013, 0.49282126, 1.80857757, 8.06444452,
|
||||
-0.73610811, -1.25061371, 2.10820431, 3.05467547, -2.22437611,
|
||||
-1.45136173, 1.28939698, 4.35942058, 0.66904552, 0.95674923,
|
||||
-2.45438937, -1.11429881, -0.17961012, 2.76715848, -1.29658591,
|
||||
-2.22816691, 1.13269136, -0.48017382, 1.48562248, -0.23812278,
|
||||
-2.51664399, -1.2703721 , -1.4683091 , -2.49907536, -1.30026484,
|
||||
-2.32398309, 1.39380683, -0.89495368, -1.23735395, 0.58485202,
|
||||
-1.25435224, -1.4968484 , -1.71888038, -1.45756652, -2.01906267,
|
||||
3.41729922, -0.17335867, -2.22921828, -1.57470549])
|
||||
np.testing.assert_allclose(results.resid_deviance,
|
||||
[ 3.70529668, -0.74027329, -2.37536322, -0.19586751, -1.11349765,
|
||||
-2.22466106, 2.46246446, 0.4928057 , 1.80799655, 8.02696525,
|
||||
-0.73602255, -1.25021555, 2.10699958, 3.05084608, -2.22214376,
|
||||
-1.45072221, 1.28913747, 4.35106213, 0.6689982 , 0.95669662,
|
||||
-2.45171913, -1.11410444, -0.17960956, 2.76494217, -1.29609865,
|
||||
-2.22612429, 1.13247453, -0.48015254, 1.48508549, -0.23812 ,
|
||||
-2.51476072, -1.27015583, -1.46777697, -2.49699318, -1.29992892,
|
||||
-2.32263069, 1.39348459, -0.89482132, -1.23715363, 0.58483655,
|
||||
-1.25415329, -1.49653039, -1.7181055 , -1.45719072, -2.01791949,
|
||||
3.41437156, -0.1733581 , -2.22765605, -1.57426046])
|
||||
np.testing.assert_allclose(results.null,
|
||||
[ 38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143, 38.42857143])
|
||||
self.assertAlmostEqual(results.D2, .388656011675)
|
||||
self.assertAlmostEqual(results.adj_D2, 0.36207583826952761)#.375648692774)
|
||||
|
||||
def testQuasi(self):
|
||||
model = GLM(self.y, self.X, family=QuasiPoisson())
|
||||
results = model.fit()
|
||||
self.assertEqual(results.n, 49)
|
||||
self.assertEqual(results.df_model, 2)
|
||||
self.assertEqual(results.df_resid, 46)
|
||||
self.assertTrue(math.isnan(results.aic))
|
||||
self.assertAlmostEqual(results.bic, 51.436404535087661)
|
||||
self.assertAlmostEqual(results.deviance, 230.46013824817649)
|
||||
self.assertTrue(math.isnan(results.llf))
|
||||
self.assertAlmostEqual(results.null_deviance, 376.97293610347361)
|
||||
self.assertAlmostEqual(results.scale, 5.7526658548022223)
|
||||
np.testing.assert_allclose(results.params, [ 3.92159085, 0.01183491,
|
||||
-0.01371397], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.bse, [ 0.31298042, 0.01227057,
|
||||
0.00464749], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.cov_params(),
|
||||
[[ 9.79567451e-02, -3.55876238e-03, -1.27356524e-03],
|
||||
[ -3.55876238e-03, 1.50566777e-04, 3.89741067e-05],
|
||||
[ -1.27356524e-03, 3.89741067e-05, 2.15991606e-05]])
|
||||
np.testing.assert_allclose(results.tvalues, [ 12.52982796, 0.96449604,
|
||||
-2.95083339])
|
||||
np.testing.assert_allclose(results.pvalues, [ 5.12737770e-36,
|
||||
3.34797291e-01, 3.16917819e-03])
|
||||
np.testing.assert_allclose(results.conf_int(),
|
||||
[[ 3.3081605 , 4.53502121],
|
||||
[-0.01221495, 0.03588478],
|
||||
[-0.02282288, -0.00460506]], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.normalized_cov_params,
|
||||
[[ 1.70280610e-02, -6.18628383e-04, -2.21386966e-04],
|
||||
[ -6.18628383e-04, 2.61733917e-05, 6.77496445e-06],
|
||||
[ -2.21386966e-04, 6.77496445e-06, 3.75463502e-06]])
|
||||
np.testing.assert_allclose(results.mu,
|
||||
[ 51.26831574, 50.15022766, 40.06142973, 34.13799739,
|
||||
28.76119226, 42.6836241 , 55.64593703, 34.08277997,
|
||||
40.90389582, 37.19727958, 23.47459217, 26.12384057,
|
||||
29.78303507, 25.96888223, 29.14073823, 26.04369592,
|
||||
34.18996367, 32.28924005, 27.42284396, 72.69207879,
|
||||
33.05316347, 36.52276972, 49.2551479 , 35.33439632,
|
||||
24.07252457, 31.67153709, 27.81699478, 25.38021219,
|
||||
24.31759259, 23.13586161, 48.40724678, 48.57969818,
|
||||
31.92596006, 43.3679231 , 34.32925819, 51.78908089,
|
||||
34.49778584, 27.56236198, 48.34273194, 57.50829097,
|
||||
50.66038226, 54.68701352, 35.77103116, 43.21886784,
|
||||
40.07615759, 49.98658004, 43.13352883, 40.28520774, 46.28910294])
|
||||
self.assertAlmostEqual(results.pearson_chi2, 264.62262932090221)
|
||||
np.testing.assert_allclose(results.resid_response,
|
||||
[ 28.73168426, -5.15022766, -14.06142973, -1.13799739,
|
||||
-5.76119226, -13.6836241 , 19.35406297, 2.91722003,
|
||||
12.09610418, 58.80272042, -3.47459217, -6.12384057,
|
||||
12.21696493, 17.03111777, -11.14073823, -7.04369592,
|
||||
7.81003633, 27.71075995, 3.57715604, 8.30792121,
|
||||
-13.05316347, -6.52276972, -1.2551479 , 17.66560368,
|
||||
-6.07252457, -11.67153709, 6.18300522, -2.38021219,
|
||||
7.68240741, -1.13586161, -16.40724678, -8.57969818,
|
||||
-7.92596006, -15.3679231 , -7.32925819, -15.78908089,
|
||||
8.50221416, -4.56236198, -8.34273194, 4.49170903,
|
||||
-8.66038226, -10.68701352, -9.77103116, -9.21886784,
|
||||
-12.07615759, 26.01341996, -1.13352883, -13.28520774, -10.28910294])
|
||||
np.testing.assert_allclose(results.resid_working,
|
||||
[ 1473.02506034, -258.28508941, -563.32097891, -38.84895192,
|
||||
-165.69875817, -584.06666725, 1076.97496919, 99.42696848,
|
||||
494.77778514, 2187.30123163, -81.56463405, -159.97823479,
|
||||
363.858295 , 442.27909165, -324.64933645, -183.44387481,
|
||||
267.02485844, 894.75938 , 98.09579187, 603.9200634 ,
|
||||
-431.44834594, -238.2296165 , -61.82249568, 624.20344168,
|
||||
-146.18099686, -369.65551968, 171.99262399, -60.41029031,
|
||||
186.81765356, -26.27913713, -794.22964417, -416.79914795,
|
||||
-253.04388425, -666.47490701, -251.6079969 , -817.70198717,
|
||||
293.30756327, -125.74947222, -403.31045369, 258.31051005,
|
||||
-438.73827602, -584.440853 , -349.51985996, -398.42903071,
|
||||
-483.96599444, 1300.32189904, -48.89309853, -535.19735391,
|
||||
-476.27334527])
|
||||
np.testing.assert_allclose(results.resid_pearson,
|
||||
[ 4.01269878, -0.72726045, -2.221602 , -0.19477008, -1.07425881,
|
||||
-2.09445239, 2.59451042, 0.49969118, 1.89131202, 9.64143836,
|
||||
-0.71714142, -1.19813392, 2.23861212, 3.34207756, -2.0637814 ,
|
||||
-1.3802231 , 1.33568403, 4.87662684, 0.68309584, 0.97442591,
|
||||
-2.27043598, -1.07931992, -0.17884182, 2.97186889, -1.23768025,
|
||||
-2.07392709, 1.1723155 , -0.47246327, 1.55789092, -0.23614708,
|
||||
-2.35819937, -1.23096188, -1.40274877, -2.33362391, -1.25091503,
|
||||
-2.19400568, 1.44755952, -0.8690235 , -1.19989348, 0.59230634,
|
||||
-1.21675413, -1.44515442, -1.63370888, -1.40229988, -1.90759306,
|
||||
3.67934693, -0.17259375, -2.09312684, -1.51230062])
|
||||
np.testing.assert_allclose(results.resid_anscombe,
|
||||
[ 3.70889134, -0.74031295, -2.37729865, -0.19586855, -1.11374751,
|
||||
-2.22611959, 2.46352013, 0.49282126, 1.80857757, 8.06444452,
|
||||
-0.73610811, -1.25061371, 2.10820431, 3.05467547, -2.22437611,
|
||||
-1.45136173, 1.28939698, 4.35942058, 0.66904552, 0.95674923,
|
||||
-2.45438937, -1.11429881, -0.17961012, 2.76715848, -1.29658591,
|
||||
-2.22816691, 1.13269136, -0.48017382, 1.48562248, -0.23812278,
|
||||
-2.51664399, -1.2703721 , -1.4683091 , -2.49907536, -1.30026484,
|
||||
-2.32398309, 1.39380683, -0.89495368, -1.23735395, 0.58485202,
|
||||
-1.25435224, -1.4968484 , -1.71888038, -1.45756652, -2.01906267,
|
||||
3.41729922, -0.17335867, -2.22921828, -1.57470549])
|
||||
np.testing.assert_allclose(results.resid_deviance,
|
||||
[ 3.70529668, -0.74027329, -2.37536322, -0.19586751, -1.11349765,
|
||||
-2.22466106, 2.46246446, 0.4928057 , 1.80799655, 8.02696525,
|
||||
-0.73602255, -1.25021555, 2.10699958, 3.05084608, -2.22214376,
|
||||
-1.45072221, 1.28913747, 4.35106213, 0.6689982 , 0.95669662,
|
||||
-2.45171913, -1.11410444, -0.17960956, 2.76494217, -1.29609865,
|
||||
-2.22612429, 1.13247453, -0.48015254, 1.48508549, -0.23812 ,
|
||||
-2.51476072, -1.27015583, -1.46777697, -2.49699318, -1.29992892,
|
||||
-2.32263069, 1.39348459, -0.89482132, -1.23715363, 0.58483655,
|
||||
-1.25415329, -1.49653039, -1.7181055 , -1.45719072, -2.01791949,
|
||||
3.41437156, -0.1733581 , -2.22765605, -1.57426046])
|
||||
np.testing.assert_allclose(results.null,
|
||||
[ 38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143,
|
||||
38.42857143, 38.42857143, 38.42857143, 38.42857143, 38.42857143])
|
||||
self.assertAlmostEqual(results.D2, .388656011675)
|
||||
self.assertAlmostEqual(results.adj_D2, 0.36207583826952761)
|
||||
|
||||
class TestBinomial(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
#London house price data
|
||||
#y: 'BATH2'
|
||||
y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
|
||||
self.y = y.reshape((316,1))
|
||||
#X: 'FLOORSZ'
|
||||
X = np.array([ 77, 75, 64, 95, 107, 100, 81, 151, 98, 260, 171, 161, 91,
|
||||
80, 50, 85, 52, 69, 60, 84, 155, 97, 69, 126, 90, 43,
|
||||
51, 41, 140, 80, 52, 86, 66, 60, 40, 155, 138, 97, 115,
|
||||
148, 206, 60, 53, 96, 88, 160, 31, 43, 154, 60, 131, 60,
|
||||
46, 61, 125, 150, 76, 92, 96, 100, 105, 72, 48, 41, 72,
|
||||
65, 60, 65, 98, 33, 144, 111, 91, 108, 38, 48, 95, 63,
|
||||
98, 129, 108, 51, 131, 66, 48, 127, 76, 68, 52, 64, 57,
|
||||
121, 67, 76, 112, 96, 90, 53, 93, 64, 97, 58, 44, 157,
|
||||
53, 70, 71, 167, 47, 70, 96, 77, 75, 71, 67, 47, 71,
|
||||
90, 69, 64, 65, 95, 60, 60, 65, 54, 121, 105, 50, 85,
|
||||
69, 69, 62, 65, 93, 93, 70, 62, 155, 68, 117, 80, 80,
|
||||
75, 98, 114, 86, 70, 50, 51, 163, 124, 59, 95, 51, 63,
|
||||
85, 53, 46, 102, 114, 83, 47, 40, 63, 123, 100, 63, 110,
|
||||
79, 98, 99, 120, 52, 48, 37, 81, 30, 88, 50, 35, 116,
|
||||
67, 45, 80, 86, 109, 59, 75, 60, 71, 141, 121, 50, 168,
|
||||
90, 51, 133, 75, 133, 127, 37, 68, 105, 61, 123, 151, 110,
|
||||
77, 220, 94, 77, 70, 100, 98, 126, 55, 105, 60, 176, 104,
|
||||
68, 62, 70, 48, 102, 80, 97, 66, 80, 102, 160, 55, 60,
|
||||
71, 125, 85, 85, 190, 137, 48, 41, 42, 51, 57, 60, 114,
|
||||
88, 84, 108, 66, 85, 42, 98, 90, 127, 100, 55, 76, 82,
|
||||
63, 80, 71, 76, 121, 109, 92, 160, 109, 185, 100, 90, 90,
|
||||
86, 88, 95, 116, 135, 61, 74, 60, 235, 76, 66, 100, 49,
|
||||
50, 37, 100, 88, 90, 52, 95, 81, 79, 96, 75, 91, 86,
|
||||
83, 180, 108, 80, 96, 49, 117, 117, 86, 46, 66, 95, 57,
|
||||
120, 137, 68, 240])
|
||||
self.X = X.reshape((316,1))
|
||||
|
||||
def testIWLS(self):
|
||||
model = GLM(self.y, self.X, family=Binomial())
|
||||
results = model.fit()
|
||||
self.assertEqual(results.n, 316)
|
||||
self.assertEqual(results.df_model, 1)
|
||||
self.assertEqual(results.df_resid, 314)
|
||||
self.assertEqual(results.aic, 155.19347530342466)
|
||||
self.assertEqual(results.bic, -1656.1095797628657)
|
||||
self.assertEqual(results.deviance, 151.19347530342466)
|
||||
self.assertEqual(results.llf, -75.596737651712331)
|
||||
self.assertEqual(results.null_deviance, 189.16038985881212)
|
||||
self.assertEqual(results.scale, 1.0)
|
||||
np.testing.assert_allclose(results.params, [-5.33638276, 0.0287754 ])
|
||||
np.testing.assert_allclose(results.bse, [ 0.64499904, 0.00518312],
|
||||
atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.cov_params(),
|
||||
[[ 4.16023762e-01, -3.14338457e-03],
|
||||
[ -3.14338457e-03, 2.68646833e-05]])
|
||||
np.testing.assert_allclose(results.tvalues, [-8.27347396, 5.55175826])
|
||||
np.testing.assert_allclose(results.pvalues, [ 1.30111233e-16,
|
||||
2.82810512e-08])
|
||||
np.testing.assert_allclose(results.conf_int(),
|
||||
[[-6.60055765, -4.07220787],
|
||||
[ 0.01861668, 0.03893412]], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.normalized_cov_params,
|
||||
[[ 4.16023762e-01, -3.14338457e-03],
|
||||
[ -3.14338457e-03, 2.68646833e-05]])
|
||||
np.testing.assert_allclose(results.mu,
|
||||
[ 0.04226237, 0.03999333, 0.02946178, 0.0689636 , 0.09471181,
|
||||
0.07879431, 0.04717464, 0.27065598, 0.07471691, 0.89522144,
|
||||
0.39752487, 0.33102718, 0.06192993, 0.04589793, 0.01988679,
|
||||
0.0526265 , 0.02104007, 0.03386636, 0.02634295, 0.05121018,
|
||||
0.29396682, 0.07275173, 0.03386636, 0.15307528, 0.06027915,
|
||||
0.01631789, 0.02045547, 0.01541937, 0.2128508 , 0.04589793,
|
||||
0.02104007, 0.05407977, 0.0311527 , 0.02634295, 0.01498855,
|
||||
0.29396682, 0.20336776, 0.07275173, 0.11637537, 0.25395607,
|
||||
0.64367488, 0.02634295, 0.02164101, 0.07083428, 0.05710047,
|
||||
0.32468619, 0.01160845, 0.01631789, 0.28803008, 0.02634295,
|
||||
0.17267234, 0.02634295, 0.01776301, 0.02709115, 0.14938186,
|
||||
0.26501331, 0.04111287, 0.06362285, 0.07083428, 0.07879431,
|
||||
0.08989109, 0.03680743, 0.0187955 , 0.01541937, 0.03680743,
|
||||
0.03029581, 0.02634295, 0.03029581, 0.07471691, 0.01228768,
|
||||
0.23277197, 0.10505173, 0.06192993, 0.09720799, 0.01416217,
|
||||
0.0187955 , 0.0689636 , 0.02865003, 0.07471691, 0.16460503,
|
||||
0.09720799, 0.02045547, 0.17267234, 0.0311527 , 0.0187955 ,
|
||||
0.15684317, 0.04111287, 0.03293737, 0.02104007, 0.02946178,
|
||||
0.02421701, 0.1353385 , 0.03203302, 0.04111287, 0.10778798,
|
||||
0.07083428, 0.06027915, 0.02164101, 0.06535882, 0.02946178,
|
||||
0.07275173, 0.02490638, 0.01678627, 0.30605146, 0.02164101,
|
||||
0.03482061, 0.03580075, 0.37030921, 0.0182721 , 0.03482061,
|
||||
0.07083428, 0.04226237, 0.03999333, 0.03580075, 0.03203302,
|
||||
0.0182721 , 0.03580075, 0.06027915, 0.03386636, 0.02946178,
|
||||
0.03029581, 0.0689636 , 0.02634295, 0.02634295, 0.03029581,
|
||||
0.02225873, 0.1353385 , 0.08989109, 0.01988679, 0.0526265 ,
|
||||
0.03386636, 0.03386636, 0.02786 , 0.03029581, 0.06535882,
|
||||
0.06535882, 0.03482061, 0.02786 , 0.29396682, 0.03293737,
|
||||
0.12242534, 0.04589793, 0.04589793, 0.03999333, 0.07471691,
|
||||
0.11344884, 0.05407977, 0.03482061, 0.01988679, 0.02045547,
|
||||
0.34389327, 0.14576223, 0.02561486, 0.0689636 , 0.02045547,
|
||||
0.02865003, 0.0526265 , 0.02164101, 0.01776301, 0.08307425,
|
||||
0.11344884, 0.04982997, 0.0182721 , 0.01498855, 0.02865003,
|
||||
0.14221564, 0.07879431, 0.02865003, 0.10237696, 0.04465416,
|
||||
0.07471691, 0.07673078, 0.13200634, 0.02104007, 0.0187955 ,
|
||||
0.01376599, 0.04717464, 0.01128289, 0.05710047, 0.01988679,
|
||||
0.01300612, 0.11936722, 0.03203302, 0.01726786, 0.04589793,
|
||||
0.05407977, 0.09976271, 0.02561486, 0.03999333, 0.02634295,
|
||||
0.03580075, 0.21771181, 0.1353385 , 0.01988679, 0.37704374,
|
||||
0.06027915, 0.02045547, 0.18104935, 0.03999333, 0.18104935,
|
||||
0.15684317, 0.01376599, 0.03293737, 0.08989109, 0.02709115,
|
||||
0.14221564, 0.27065598, 0.10237696, 0.04226237, 0.72991785,
|
||||
0.06713876, 0.04226237, 0.03482061, 0.07879431, 0.07471691,
|
||||
0.15307528, 0.02289366, 0.08989109, 0.02634295, 0.43243779,
|
||||
0.08756457, 0.03293737, 0.02786 , 0.03482061, 0.0187955 ,
|
||||
0.08307425, 0.04589793, 0.07275173, 0.0311527 , 0.04589793,
|
||||
0.08307425, 0.32468619, 0.02289366, 0.02634295, 0.03580075,
|
||||
0.14938186, 0.0526265 , 0.0526265 , 0.53268924, 0.19874565,
|
||||
0.0187955 , 0.01541937, 0.01586237, 0.02045547, 0.02421701,
|
||||
0.02634295, 0.11344884, 0.05710047, 0.05121018, 0.09720799,
|
||||
0.0311527 , 0.0526265 , 0.01586237, 0.07471691, 0.06027915,
|
||||
0.15684317, 0.07879431, 0.02289366, 0.04111287, 0.04848506,
|
||||
0.02865003, 0.04589793, 0.03580075, 0.04111287, 0.1353385 ,
|
||||
0.09976271, 0.06362285, 0.32468619, 0.09976271, 0.49676673,
|
||||
0.07879431, 0.06027915, 0.06027915, 0.05407977, 0.05710047,
|
||||
0.0689636 , 0.11936722, 0.18973955, 0.02709115, 0.03890304,
|
||||
0.02634295, 0.80625182, 0.04111287, 0.0311527 , 0.07879431,
|
||||
0.0193336 , 0.01988679, 0.01376599, 0.07879431, 0.05710047,
|
||||
0.06027915, 0.02104007, 0.0689636 , 0.04717464, 0.04465416,
|
||||
0.07083428, 0.03999333, 0.06192993, 0.05407977, 0.04982997,
|
||||
0.46087756, 0.09720799, 0.04589793, 0.07083428, 0.0193336 ,
|
||||
0.12242534, 0.12242534, 0.05407977, 0.01776301, 0.0311527 ,
|
||||
0.0689636 , 0.02421701, 0.13200634, 0.19874565, 0.03293737,
|
||||
0.82774282], atol=1.0e-8)
|
||||
self.assertAlmostEqual(results.pearson_chi2, 271.21110541713801)
|
||||
np.testing.assert_allclose(results.resid_response,
|
||||
[-0.04226237, -0.03999333, -0.02946178, -0.0689636 , -0.09471181,
|
||||
-0.07879431, -0.04717464, -0.27065598, -0.07471691, 0.10477856,
|
||||
-0.39752487, 0.66897282, -0.06192993, -0.04589793, -0.01988679,
|
||||
-0.0526265 , -0.02104007, -0.03386636, -0.02634295, -0.05121018,
|
||||
-0.29396682, 0.92724827, -0.03386636, -0.15307528, -0.06027915,
|
||||
-0.01631789, -0.02045547, -0.01541937, -0.2128508 , -0.04589793,
|
||||
-0.02104007, -0.05407977, -0.0311527 , -0.02634295, -0.01498855,
|
||||
-0.29396682, 0.79663224, -0.07275173, -0.11637537, 0.74604393,
|
||||
-0.64367488, -0.02634295, -0.02164101, -0.07083428, -0.05710047,
|
||||
-0.32468619, -0.01160845, -0.01631789, -0.28803008, -0.02634295,
|
||||
-0.17267234, -0.02634295, -0.01776301, -0.02709115, 0.85061814,
|
||||
0.73498669, -0.04111287, -0.06362285, -0.07083428, -0.07879431,
|
||||
0.91010891, -0.03680743, -0.0187955 , -0.01541937, -0.03680743,
|
||||
-0.03029581, -0.02634295, -0.03029581, -0.07471691, -0.01228768,
|
||||
0.76722803, -0.10505173, -0.06192993, -0.09720799, -0.01416217,
|
||||
-0.0187955 , -0.0689636 , -0.02865003, -0.07471691, -0.16460503,
|
||||
-0.09720799, -0.02045547, 0.82732766, -0.0311527 , -0.0187955 ,
|
||||
-0.15684317, -0.04111287, -0.03293737, -0.02104007, -0.02946178,
|
||||
-0.02421701, -0.1353385 , -0.03203302, -0.04111287, -0.10778798,
|
||||
-0.07083428, -0.06027915, -0.02164101, -0.06535882, -0.02946178,
|
||||
-0.07275173, -0.02490638, -0.01678627, -0.30605146, -0.02164101,
|
||||
-0.03482061, -0.03580075, 0.62969079, -0.0182721 , -0.03482061,
|
||||
-0.07083428, -0.04226237, -0.03999333, -0.03580075, -0.03203302,
|
||||
-0.0182721 , -0.03580075, -0.06027915, -0.03386636, -0.02946178,
|
||||
-0.03029581, -0.0689636 , -0.02634295, -0.02634295, -0.03029581,
|
||||
-0.02225873, -0.1353385 , -0.08989109, -0.01988679, -0.0526265 ,
|
||||
-0.03386636, -0.03386636, -0.02786 , -0.03029581, -0.06535882,
|
||||
-0.06535882, -0.03482061, -0.02786 , -0.29396682, -0.03293737,
|
||||
-0.12242534, -0.04589793, -0.04589793, -0.03999333, -0.07471691,
|
||||
-0.11344884, -0.05407977, -0.03482061, -0.01988679, -0.02045547,
|
||||
0.65610673, 0.85423777, -0.02561486, -0.0689636 , -0.02045547,
|
||||
-0.02865003, -0.0526265 , -0.02164101, -0.01776301, -0.08307425,
|
||||
-0.11344884, -0.04982997, -0.0182721 , -0.01498855, -0.02865003,
|
||||
-0.14221564, -0.07879431, -0.02865003, -0.10237696, -0.04465416,
|
||||
-0.07471691, -0.07673078, -0.13200634, -0.02104007, -0.0187955 ,
|
||||
-0.01376599, -0.04717464, -0.01128289, 0.94289953, -0.01988679,
|
||||
-0.01300612, -0.11936722, -0.03203302, -0.01726786, -0.04589793,
|
||||
-0.05407977, -0.09976271, -0.02561486, -0.03999333, -0.02634295,
|
||||
-0.03580075, -0.21771181, 0.8646615 , -0.01988679, 0.62295626,
|
||||
-0.06027915, -0.02045547, -0.18104935, 0.96000667, -0.18104935,
|
||||
-0.15684317, -0.01376599, -0.03293737, -0.08989109, -0.02709115,
|
||||
-0.14221564, 0.72934402, -0.10237696, -0.04226237, -0.72991785,
|
||||
-0.06713876, -0.04226237, -0.03482061, -0.07879431, -0.07471691,
|
||||
-0.15307528, 0.97710634, 0.91010891, -0.02634295, -0.43243779,
|
||||
-0.08756457, -0.03293737, -0.02786 , -0.03482061, -0.0187955 ,
|
||||
0.91692575, -0.04589793, -0.07275173, -0.0311527 , -0.04589793,
|
||||
-0.08307425, 0.67531381, -0.02289366, -0.02634295, -0.03580075,
|
||||
-0.14938186, -0.0526265 , -0.0526265 , 0.46731076, -0.19874565,
|
||||
-0.0187955 , -0.01541937, -0.01586237, -0.02045547, -0.02421701,
|
||||
-0.02634295, -0.11344884, -0.05710047, -0.05121018, -0.09720799,
|
||||
0.9688473 , -0.0526265 , -0.01586237, -0.07471691, -0.06027915,
|
||||
-0.15684317, -0.07879431, -0.02289366, -0.04111287, -0.04848506,
|
||||
-0.02865003, -0.04589793, -0.03580075, -0.04111287, -0.1353385 ,
|
||||
-0.09976271, -0.06362285, 0.67531381, -0.09976271, -0.49676673,
|
||||
-0.07879431, -0.06027915, -0.06027915, -0.05407977, -0.05710047,
|
||||
-0.0689636 , -0.11936722, -0.18973955, -0.02709115, -0.03890304,
|
||||
-0.02634295, 0.19374818, -0.04111287, -0.0311527 , -0.07879431,
|
||||
-0.0193336 , -0.01988679, -0.01376599, -0.07879431, 0.94289953,
|
||||
-0.06027915, -0.02104007, -0.0689636 , -0.04717464, -0.04465416,
|
||||
0.92916572, -0.03999333, -0.06192993, -0.05407977, -0.04982997,
|
||||
-0.46087756, -0.09720799, -0.04589793, -0.07083428, -0.0193336 ,
|
||||
-0.12242534, -0.12242534, -0.05407977, -0.01776301, -0.0311527 ,
|
||||
-0.0689636 , -0.02421701, -0.13200634, -0.19874565, -0.03293737,
|
||||
-0.82774282], atol=1.0e-8)
|
||||
np.testing.assert_allclose(results.resid_working,
|
||||
[ -1.71062283e-03, -1.53549840e-03, -8.42423701e-04,
|
||||
-4.42798906e-03, -8.12073047e-03, -5.71934606e-03,
|
||||
-2.12046213e-03, -5.34278480e-02, -5.16550074e-03,
|
||||
9.82823035e-03, -9.52067472e-02, 1.48142818e-01,
|
||||
-3.59779501e-03, -2.00993083e-03, -3.87619325e-04,
|
||||
-2.62379729e-03, -4.33370579e-04, -1.10808799e-03,
|
||||
-6.75670103e-04, -2.48818484e-03, -6.10129090e-02,
|
||||
6.25511612e-02, -1.10808799e-03, -1.98451739e-02,
|
||||
-3.41454749e-03, -2.61928659e-04, -4.09867263e-04,
|
||||
-2.34090923e-04, -3.56621577e-02, -2.00993083e-03,
|
||||
-4.33370579e-04, -2.76645832e-03, -9.40257152e-04,
|
||||
-6.75670103e-04, -2.21289369e-04, -6.10129090e-02,
|
||||
1.29061842e-01, -4.90775251e-03, -1.19671283e-02,
|
||||
1.41347263e-01, -1.47631680e-01, -6.75670103e-04,
|
||||
-4.58198217e-04, -4.66208406e-03, -3.07429001e-03,
|
||||
-7.11923401e-02, -1.33191898e-04, -2.61928659e-04,
|
||||
-5.90659690e-02, -6.75670103e-04, -2.46673839e-02,
|
||||
-6.75670103e-04, -3.09919962e-04, -7.14047519e-04,
|
||||
1.08085429e-01, 1.43161630e-01, -1.62077632e-03,
|
||||
-3.79032977e-03, -4.66208406e-03, -5.71934606e-03,
|
||||
7.44566288e-02, -1.30492035e-03, -3.46630910e-04,
|
||||
-2.34090923e-04, -1.30492035e-03, -8.90029618e-04,
|
||||
-6.75670103e-04, -8.90029618e-04, -5.16550074e-03,
|
||||
-1.49131762e-04, 1.37018624e-01, -9.87652847e-03,
|
||||
-3.59779501e-03, -8.53083698e-03, -1.97726627e-04,
|
||||
-3.46630910e-04, -4.42798906e-03, -7.97307494e-04,
|
||||
-5.16550074e-03, -2.26348718e-02, -8.53083698e-03,
|
||||
-4.09867263e-04, 1.18189219e-01, -9.40257152e-04,
|
||||
-3.46630910e-04, -2.07414715e-02, -1.62077632e-03,
|
||||
-1.04913757e-03, -4.33370579e-04, -8.42423701e-04,
|
||||
-5.72261321e-04, -1.58375811e-02, -9.93244730e-04,
|
||||
-1.62077632e-03, -1.03659408e-02, -4.66208406e-03,
|
||||
-3.41454749e-03, -4.58198217e-04, -3.99257703e-03,
|
||||
-8.42423701e-04, -4.90775251e-03, -6.04877746e-04,
|
||||
-2.77048947e-04, -6.50004229e-02, -4.58198217e-04,
|
||||
-1.17025566e-03, -1.23580799e-03, 1.46831486e-01,
|
||||
-3.27769165e-04, -1.17025566e-03, -4.66208406e-03,
|
||||
-1.71062283e-03, -1.53549840e-03, -1.23580799e-03,
|
||||
-9.93244730e-04, -3.27769165e-04, -1.23580799e-03,
|
||||
-3.41454749e-03, -1.10808799e-03, -8.42423701e-04,
|
||||
-8.90029618e-04, -4.42798906e-03, -6.75670103e-04,
|
||||
-6.75670103e-04, -8.90029618e-04, -4.84422741e-04,
|
||||
-1.58375811e-02, -7.35405096e-03, -3.87619325e-04,
|
||||
-2.62379729e-03, -1.10808799e-03, -1.10808799e-03,
|
||||
-7.54555329e-04, -8.90029618e-04, -3.99257703e-03,
|
||||
-3.99257703e-03, -1.17025566e-03, -7.54555329e-04,
|
||||
-6.10129090e-02, -1.04913757e-03, -1.31530576e-02,
|
||||
-2.00993083e-03, -2.00993083e-03, -1.53549840e-03,
|
||||
-5.16550074e-03, -1.14104800e-02, -2.76645832e-03,
|
||||
-1.17025566e-03, -3.87619325e-04, -4.09867263e-04,
|
||||
1.48037813e-01, 1.06365931e-01, -6.39314594e-04,
|
||||
-4.42798906e-03, -4.09867263e-04, -7.97307494e-04,
|
||||
-2.62379729e-03, -4.58198217e-04, -3.09919962e-04,
|
||||
-6.32800839e-03, -1.14104800e-02, -2.35929680e-03,
|
||||
-3.27769165e-04, -2.21289369e-04, -7.97307494e-04,
|
||||
-1.73489362e-02, -5.71934606e-03, -7.97307494e-04,
|
||||
-9.40802551e-03, -1.90495384e-03, -5.16550074e-03,
|
||||
-5.43585191e-03, -1.51253748e-02, -4.33370579e-04,
|
||||
-3.46630910e-04, -1.86893696e-04, -2.12046213e-03,
|
||||
-1.25867293e-04, 5.07657192e-02, -3.87619325e-04,
|
||||
-1.66959104e-04, -1.25477263e-02, -9.93244730e-04,
|
||||
-2.93030065e-04, -2.00993083e-03, -2.76645832e-03,
|
||||
-8.95970087e-03, -6.39314594e-04, -1.53549840e-03,
|
||||
-6.75670103e-04, -1.23580799e-03, -3.70792339e-02,
|
||||
1.01184411e-01, -3.87619325e-04, 1.46321062e-01,
|
||||
-3.41454749e-03, -4.09867263e-04, -2.68442736e-02,
|
||||
3.68583645e-02, -2.68442736e-02, -2.07414715e-02,
|
||||
-1.86893696e-04, -1.04913757e-03, -7.35405096e-03,
|
||||
-7.14047519e-04, -1.73489362e-02, 1.43973473e-01,
|
||||
-9.40802551e-03, -1.71062283e-03, -1.43894386e-01,
|
||||
-4.20497779e-03, -1.71062283e-03, -1.17025566e-03,
|
||||
-5.71934606e-03, -5.16550074e-03, -1.98451739e-02,
|
||||
2.18574168e-02, 7.44566288e-02, -6.75670103e-04,
|
||||
-1.06135519e-01, -6.99614755e-03, -1.04913757e-03,
|
||||
-7.54555329e-04, -1.17025566e-03, -3.46630910e-04,
|
||||
6.98449121e-02, -2.00993083e-03, -4.90775251e-03,
|
||||
-9.40257152e-04, -2.00993083e-03, -6.32800839e-03,
|
||||
1.48072729e-01, -5.12120512e-04, -6.75670103e-04,
|
||||
-1.23580799e-03, -1.89814939e-02, -2.62379729e-03,
|
||||
-2.62379729e-03, 1.16328328e-01, -3.16494123e-02,
|
||||
-3.46630910e-04, -2.34090923e-04, -2.47623705e-04,
|
||||
-4.09867263e-04, -5.72261321e-04, -6.75670103e-04,
|
||||
-1.14104800e-02, -3.07429001e-03, -2.48818484e-03,
|
||||
-8.53083698e-03, 2.92419496e-02, -2.62379729e-03,
|
||||
-2.47623705e-04, -5.16550074e-03, -3.41454749e-03,
|
||||
-2.07414715e-02, -5.71934606e-03, -5.12120512e-04,
|
||||
-1.62077632e-03, -2.23682205e-03, -7.97307494e-04,
|
||||
-2.00993083e-03, -1.23580799e-03, -1.62077632e-03,
|
||||
-1.58375811e-02, -8.95970087e-03, -3.79032977e-03,
|
||||
1.48072729e-01, -8.95970087e-03, -1.24186489e-01,
|
||||
-5.71934606e-03, -3.41454749e-03, -3.41454749e-03,
|
||||
-2.76645832e-03, -3.07429001e-03, -4.42798906e-03,
|
||||
-1.25477263e-02, -2.91702648e-02, -7.14047519e-04,
|
||||
-1.45456868e-03, -6.75670103e-04, 3.02653681e-02,
|
||||
-1.62077632e-03, -9.40257152e-04, -5.71934606e-03,
|
||||
-3.66561274e-04, -3.87619325e-04, -1.86893696e-04,
|
||||
-5.71934606e-03, 5.07657192e-02, -3.41454749e-03,
|
||||
-4.33370579e-04, -4.42798906e-03, -2.12046213e-03,
|
||||
-1.90495384e-03, 6.11546973e-02, -1.53549840e-03,
|
||||
-3.59779501e-03, -2.76645832e-03, -2.35929680e-03,
|
||||
-1.14513988e-01, -8.53083698e-03, -2.00993083e-03,
|
||||
-4.66208406e-03, -3.66561274e-04, -1.31530576e-02,
|
||||
-1.31530576e-02, -2.76645832e-03, -3.09919962e-04,
|
||||
-9.40257152e-04, -4.42798906e-03, -5.72261321e-04,
|
||||
-1.51253748e-02, -3.16494123e-02, -1.04913757e-03,
|
||||
-1.18023417e-01])
|
||||
np.testing.assert_allclose(results.resid_pearson,
|
||||
[-0.21006498, -0.20410641, -0.17423009, -0.27216147, -0.3234511 ,
|
||||
-0.29246179, -0.22250903, -0.60917574, -0.28416602, 0.3421141 ,
|
||||
-0.81229277, 1.42158361, -0.25694055, -0.21933056, -0.142444 ,
|
||||
-0.23569027, -0.14660243, -0.18722578, -0.16448609, -0.2323235 ,
|
||||
-0.64526275, 3.57006696, -0.18722578, -0.42513819, -0.25327023,
|
||||
-0.12879668, -0.14450826, -0.12514332, -0.5200069 , -0.21933056,
|
||||
-0.14660243, -0.23910582, -0.17931646, -0.16448609, -0.12335569,
|
||||
-0.64526275, 1.97919183, -0.28010679, -0.36290807, 1.71396874,
|
||||
-1.3440334 , -0.16448609, -0.14872695, -0.27610555, -0.24608613,
|
||||
-0.69339243, -0.1083734 , -0.12879668, -0.63604537, -0.16448609,
|
||||
-0.45684893, -0.16448609, -0.13447767, -0.16686977, 2.3862634 ,
|
||||
1.66535145, -0.20706426, -0.26066405, -0.27610555, -0.29246179,
|
||||
3.18191348, -0.19548397, -0.13840353, -0.12514332, -0.19548397,
|
||||
-0.17675498, -0.16448609, -0.17675498, -0.28416602, -0.11153719,
|
||||
1.81550268, -0.34261205, -0.25694055, -0.32813846, -0.11985666,
|
||||
-0.13840353, -0.27216147, -0.17174127, -0.28416602, -0.44389026,
|
||||
-0.32813846, -0.14450826, 2.18890738, -0.17931646, -0.13840353,
|
||||
-0.43129917, -0.20706426, -0.18455132, -0.14660243, -0.17423009,
|
||||
-0.1575374 , -0.39562855, -0.18191506, -0.20706426, -0.34757708,
|
||||
-0.27610555, -0.25327023, -0.14872695, -0.26444152, -0.17423009,
|
||||
-0.28010679, -0.15982038, -0.13066317, -0.66410018, -0.14872695,
|
||||
-0.189939 , -0.19269154, 1.30401147, -0.13642648, -0.189939 ,
|
||||
-0.27610555, -0.21006498, -0.20410641, -0.19269154, -0.18191506,
|
||||
-0.13642648, -0.19269154, -0.25327023, -0.18722578, -0.17423009,
|
||||
-0.17675498, -0.27216147, -0.16448609, -0.16448609, -0.17675498,
|
||||
-0.15088226, -0.39562855, -0.3142763 , -0.142444 , -0.23569027,
|
||||
-0.18722578, -0.18722578, -0.169288 , -0.17675498, -0.26444152,
|
||||
-0.26444152, -0.189939 , -0.169288 , -0.64526275, -0.18455132,
|
||||
-0.3735026 , -0.21933056, -0.21933056, -0.20410641, -0.28416602,
|
||||
-0.35772404, -0.23910582, -0.189939 , -0.142444 , -0.14450826,
|
||||
1.38125991, 2.42084442, -0.16213645, -0.27216147, -0.14450826,
|
||||
-0.17174127, -0.23569027, -0.14872695, -0.13447767, -0.30099975,
|
||||
-0.35772404, -0.22900483, -0.13642648, -0.12335569, -0.17174127,
|
||||
-0.4071783 , -0.29246179, -0.17174127, -0.33771794, -0.21619749,
|
||||
-0.28416602, -0.28828407, -0.38997712, -0.14660243, -0.13840353,
|
||||
-0.11814455, -0.22250903, -0.10682532, 4.06361781, -0.142444 ,
|
||||
-0.11479334, -0.36816723, -0.18191506, -0.1325567 , -0.21933056,
|
||||
-0.23910582, -0.33289374, -0.16213645, -0.20410641, -0.16448609,
|
||||
-0.19269154, -0.52754269, 2.52762346, -0.142444 , 1.28538406,
|
||||
-0.25327023, -0.14450826, -0.47018591, 4.89940505, -0.47018591,
|
||||
-0.43129917, -0.11814455, -0.18455132, -0.3142763 , -0.16686977,
|
||||
-0.4071783 , 1.64156241, -0.33771794, -0.21006498, -1.6439517 ,
|
||||
-0.26827373, -0.21006498, -0.189939 , -0.29246179, -0.28416602,
|
||||
-0.42513819, 6.53301013, 3.18191348, -0.16448609, -0.87288109,
|
||||
-0.30978696, -0.18455132, -0.169288 , -0.189939 , -0.13840353,
|
||||
3.32226189, -0.21933056, -0.28010679, -0.17931646, -0.21933056,
|
||||
-0.30099975, 1.44218477, -0.1530688 , -0.16448609, -0.19269154,
|
||||
-0.41906522, -0.23569027, -0.23569027, 0.93662539, -0.4980393 ,
|
||||
-0.13840353, -0.12514332, -0.12695686, -0.14450826, -0.1575374 ,
|
||||
-0.16448609, -0.35772404, -0.24608613, -0.2323235 , -0.32813846,
|
||||
5.57673284, -0.23569027, -0.12695686, -0.28416602, -0.25327023,
|
||||
-0.43129917, -0.29246179, -0.1530688 , -0.20706426, -0.22573357,
|
||||
-0.17174127, -0.21933056, -0.19269154, -0.20706426, -0.39562855,
|
||||
-0.33289374, -0.26066405, 1.44218477, -0.33289374, -0.99355423,
|
||||
-0.29246179, -0.25327023, -0.25327023, -0.23910582, -0.24608613,
|
||||
-0.27216147, -0.36816723, -0.48391225, -0.16686977, -0.20119082,
|
||||
-0.16448609, 0.49021146, -0.20706426, -0.17931646, -0.29246179,
|
||||
-0.14040923, -0.142444 , -0.11814455, -0.29246179, 4.06361781,
|
||||
-0.25327023, -0.14660243, -0.27216147, -0.22250903, -0.21619749,
|
||||
3.6218033 , -0.20410641, -0.25694055, -0.23910582, -0.22900483,
|
||||
-0.92458976, -0.32813846, -0.21933056, -0.27610555, -0.14040923,
|
||||
-0.3735026 , -0.3735026 , -0.23910582, -0.13447767, -0.17931646,
|
||||
-0.27216147, -0.1575374 , -0.38997712, -0.4980393 , -0.18455132,
|
||||
-2.19209332])
|
||||
np.testing.assert_allclose(results.resid_anscombe,
|
||||
[-0.31237627, -0.3036605 , -0.25978208, -0.40240831, -0.47552289,
|
||||
-0.43149255, -0.33053793, -0.85617194, -0.41962951, 0.50181328,
|
||||
-1.0954382 , 1.66940149, -0.38048321, -0.3259044 , -0.21280762,
|
||||
-0.34971301, -0.21896842, -0.27890356, -0.2454118 , -0.34482158,
|
||||
-0.90063409, 2.80452413, -0.27890356, -0.61652596, -0.37518169,
|
||||
-0.19255932, -0.2158664 , -0.18713159, -0.74270558, -0.3259044 ,
|
||||
-0.21896842, -0.35467084, -0.2672722 , -0.2454118 , -0.18447466,
|
||||
-0.90063409, 2.05763941, -0.41381347, -0.53089521, 1.88552083,
|
||||
-1.60654218, -0.2454118 , -0.22211425, -0.40807333, -0.3647888 ,
|
||||
-0.95861559, -0.16218047, -0.19255932, -0.88935802, -0.2454118 ,
|
||||
-0.65930821, -0.2454118 , -0.20099345, -0.24892975, 2.28774016,
|
||||
1.85167195, -0.30798858, -0.38585584, -0.40807333, -0.43149255,
|
||||
2.65398426, -0.2910267 , -0.20681747, -0.18713159, -0.2910267 ,
|
||||
-0.26350118, -0.2454118 , -0.26350118, -0.41962951, -0.16689207,
|
||||
1.95381191, -0.50251231, -0.38048321, -0.48214234, -0.17927213,
|
||||
-0.20681747, -0.40240831, -0.25611424, -0.41962951, -0.64189694,
|
||||
-0.48214234, -0.2158664 , 2.18071204, -0.2672722 , -0.20681747,
|
||||
-0.62488429, -0.30798858, -0.27497271, -0.21896842, -0.25978208,
|
||||
-0.23514749, -0.57618899, -0.27109582, -0.30798858, -0.50947546,
|
||||
-0.40807333, -0.37518169, -0.22211425, -0.39130036, -0.25978208,
|
||||
-0.41381347, -0.2385213 , -0.19533116, -0.92350689, -0.22211425,
|
||||
-0.28288904, -0.28692985, 1.5730846 , -0.20388497, -0.28288904,
|
||||
-0.40807333, -0.31237627, -0.3036605 , -0.28692985, -0.27109582,
|
||||
-0.20388497, -0.28692985, -0.37518169, -0.27890356, -0.25978208,
|
||||
-0.26350118, -0.40240831, -0.2454118 , -0.2454118 , -0.26350118,
|
||||
-0.22530448, -0.57618899, -0.46253505, -0.21280762, -0.34971301,
|
||||
-0.27890356, -0.27890356, -0.25249702, -0.26350118, -0.39130036,
|
||||
-0.39130036, -0.28288904, -0.25249702, -0.90063409, -0.27497271,
|
||||
-0.5456246 , -0.3259044 , -0.3259044 , -0.3036605 , -0.41962951,
|
||||
-0.52366614, -0.35467084, -0.28288904, -0.21280762, -0.2158664 ,
|
||||
1.63703418, 2.30570989, -0.24194253, -0.40240831, -0.2158664 ,
|
||||
-0.25611424, -0.34971301, -0.22211425, -0.20099345, -0.44366892,
|
||||
-0.52366614, -0.33999576, -0.20388497, -0.18447466, -0.25611424,
|
||||
-0.59203547, -0.43149255, -0.25611424, -0.49563627, -0.32133344,
|
||||
-0.41962951, -0.42552227, -0.56840788, -0.21896842, -0.20681747,
|
||||
-0.17672552, -0.33053793, -0.15987433, 2.9768074 , -0.21280762,
|
||||
-0.17173916, -0.53821445, -0.27109582, -0.19814236, -0.3259044 ,
|
||||
-0.35467084, -0.48884654, -0.24194253, -0.3036605 , -0.2454118 ,
|
||||
-0.28692985, -0.75249089, 2.35983933, -0.21280762, 1.55726719,
|
||||
-0.37518169, -0.2158664 , -0.67712261, 3.23165236, -0.67712261,
|
||||
-0.62488429, -0.17672552, -0.27497271, -0.46253505, -0.24892975,
|
||||
-0.59203547, 1.83482464, -0.49563627, -0.31237627, -1.83652534,
|
||||
-0.39681759, -0.31237627, -0.28288904, -0.43149255, -0.41962951,
|
||||
-0.61652596, 3.63983609, 2.65398426, -0.2454118 , -1.16171662,
|
||||
-0.45616505, -0.27497271, -0.25249702, -0.28288904, -0.20681747,
|
||||
2.71015945, -0.3259044 , -0.41381347, -0.2672722 , -0.3259044 ,
|
||||
-0.44366892, 1.68567947, -0.22853969, -0.2454118 , -0.28692985,
|
||||
-0.60826548, -0.34971301, -0.34971301, 1.2290223 , -0.71397735,
|
||||
-0.20681747, -0.18713159, -0.1898263 , -0.2158664 , -0.23514749,
|
||||
-0.2454118 , -0.52366614, -0.3647888 , -0.34482158, -0.48214234,
|
||||
3.41271513, -0.34971301, -0.1898263 , -0.41962951, -0.37518169,
|
||||
-0.62488429, -0.43149255, -0.22853969, -0.30798858, -0.3352348 ,
|
||||
-0.25611424, -0.3259044 , -0.28692985, -0.30798858, -0.57618899,
|
||||
-0.48884654, -0.38585584, 1.68567947, -0.48884654, -1.28709718,
|
||||
-0.43149255, -0.37518169, -0.37518169, -0.35467084, -0.3647888 ,
|
||||
-0.40240831, -0.53821445, -0.69534436, -0.24892975, -0.29939131,
|
||||
-0.2454118 , 0.70366797, -0.30798858, -0.2672722 , -0.43149255,
|
||||
-0.2097915 , -0.21280762, -0.17672552, -0.43149255, 2.9768074 ,
|
||||
-0.37518169, -0.21896842, -0.40240831, -0.33053793, -0.32133344,
|
||||
2.82351017, -0.3036605 , -0.38048321, -0.35467084, -0.33999576,
|
||||
-1.21650102, -0.48214234, -0.3259044 , -0.40807333, -0.2097915 ,
|
||||
-0.5456246 , -0.5456246 , -0.35467084, -0.20099345, -0.2672722 ,
|
||||
-0.40240831, -0.23514749, -0.56840788, -0.71397735, -0.27497271,
|
||||
-2.18250381])
|
||||
np.testing.assert_allclose(results.resid_deviance,
|
||||
[-0.29387552, -0.2857098 , -0.24455876, -0.37803944, -0.44609851,
|
||||
-0.40514674, -0.31088148, -0.79449324, -0.39409528, 0.47049798,
|
||||
-1.00668653, 1.48698001, -0.35757692, -0.30654405, -0.20043547,
|
||||
-0.32882173, -0.20622595, -0.26249995, -0.23106769, -0.32424676,
|
||||
-0.83437766, 2.28941155, -0.26249995, -0.57644334, -0.35262564,
|
||||
-0.18139734, -0.20331052, -0.17629229, -0.69186337, -0.30654405,
|
||||
-0.20622595, -0.33345774, -0.251588 , -0.23106769, -0.17379306,
|
||||
-0.83437766, 1.78479093, -0.38867448, -0.4974393 , 1.65565332,
|
||||
-1.43660134, -0.23106769, -0.20918228, -0.38332275, -0.34291558,
|
||||
-0.88609006, -0.15281596, -0.18139734, -0.82428104, -0.23106769,
|
||||
-0.61571821, -0.23106769, -0.18932865, -0.234371 , 1.94999969,
|
||||
1.62970871, -0.2897651 , -0.36259328, -0.38332275, -0.40514674,
|
||||
2.19506559, -0.27386827, -0.19480442, -0.17629229, -0.27386827,
|
||||
-0.24804925, -0.23106769, -0.24804925, -0.39409528, -0.15725009,
|
||||
1.7074519 , -0.47114617, -0.35757692, -0.4522457 , -0.16889886,
|
||||
-0.19480442, -0.37803944, -0.24111595, -0.39409528, -0.59975102,
|
||||
-0.4522457 , -0.20331052, 1.87422489, -0.251588 , -0.19480442,
|
||||
-0.5841272 , -0.2897651 , -0.25881274, -0.20622595, -0.24455876,
|
||||
-0.22142749, -0.53929061, -0.25517563, -0.2897651 , -0.47760126,
|
||||
-0.38332275, -0.35262564, -0.20918228, -0.36767536, -0.24455876,
|
||||
-0.38867448, -0.2245965 , -0.18400413, -0.85481866, -0.20918228,
|
||||
-0.26623785, -0.27002708, 1.40955093, -0.19204738, -0.26623785,
|
||||
-0.38332275, -0.29387552, -0.2857098 , -0.27002708, -0.25517563,
|
||||
-0.19204738, -0.27002708, -0.35262564, -0.26249995, -0.24455876,
|
||||
-0.24804925, -0.37803944, -0.23106769, -0.23106769, -0.24804925,
|
||||
-0.21218006, -0.53929061, -0.43402996, -0.20043547, -0.32882173,
|
||||
-0.26249995, -0.26249995, -0.23772023, -0.24804925, -0.36767536,
|
||||
-0.36767536, -0.26623785, -0.23772023, -0.83437766, -0.25881274,
|
||||
-0.51106408, -0.30654405, -0.30654405, -0.2857098 , -0.39409528,
|
||||
-0.49074728, -0.33345774, -0.26623785, -0.20043547, -0.20331052,
|
||||
1.46111186, 1.96253843, -0.22780971, -0.37803944, -0.20331052,
|
||||
-0.24111595, -0.32882173, -0.20918228, -0.18932865, -0.41648237,
|
||||
-0.49074728, -0.31973217, -0.19204738, -0.17379306, -0.24111595,
|
||||
-0.55389988, -0.40514674, -0.24111595, -0.46476893, -0.30226435,
|
||||
-0.39409528, -0.39958581, -0.53211065, -0.20622595, -0.19480442,
|
||||
-0.16650295, -0.31088148, -0.15064545, 2.39288231, -0.20043547,
|
||||
-0.16181126, -0.5042114 , -0.25517563, -0.18664773, -0.30654405,
|
||||
-0.33345774, -0.45846897, -0.22780971, -0.2857098 , -0.23106769,
|
||||
-0.27002708, -0.7007597 , 1.99998811, -0.20043547, 1.39670618,
|
||||
-0.35262564, -0.20331052, -0.63203077, 2.53733821, -0.63203077,
|
||||
-0.5841272 , -0.16650295, -0.25881274, -0.43402996, -0.234371 ,
|
||||
-0.55389988, 1.61672923, -0.46476893, -0.29387552, -1.61804148,
|
||||
-0.37282386, -0.29387552, -0.26623785, -0.40514674, -0.39409528,
|
||||
-0.57644334, 2.74841605, 2.19506559, -0.23106769, -1.06433539,
|
||||
-0.42810736, -0.25881274, -0.23772023, -0.26623785, -0.19480442,
|
||||
2.23070414, -0.30654405, -0.38867448, -0.251588 , -0.30654405,
|
||||
-0.41648237, 1.49993075, -0.21521982, -0.23106769, -0.27002708,
|
||||
-0.5688444 , -0.32882173, -0.32882173, 1.12233423, -0.66569789,
|
||||
-0.19480442, -0.17629229, -0.17882689, -0.20331052, -0.22142749,
|
||||
-0.23106769, -0.49074728, -0.34291558, -0.32424676, -0.4522457 ,
|
||||
2.63395309, -0.32882173, -0.17882689, -0.39409528, -0.35262564,
|
||||
-0.5841272 , -0.40514674, -0.21521982, -0.2897651 , -0.3152773 ,
|
||||
-0.24111595, -0.30654405, -0.27002708, -0.2897651 , -0.53929061,
|
||||
-0.45846897, -0.36259328, 1.49993075, -0.45846897, -1.17192274,
|
||||
-0.40514674, -0.35262564, -0.35262564, -0.33345774, -0.34291558,
|
||||
-0.37803944, -0.5042114 , -0.64869028, -0.234371 , -0.28170899,
|
||||
-0.23106769, 0.65629132, -0.2897651 , -0.251588 , -0.40514674,
|
||||
-0.19760028, -0.20043547, -0.16650295, -0.40514674, 2.39288231,
|
||||
-0.35262564, -0.20622595, -0.37803944, -0.31088148, -0.30226435,
|
||||
2.30104857, -0.2857098 , -0.35757692, -0.33345774, -0.31973217,
|
||||
-1.11158678, -0.4522457 , -0.30654405, -0.38332275, -0.19760028,
|
||||
-0.51106408, -0.51106408, -0.33345774, -0.18932865, -0.251588 ,
|
||||
-0.37803944, -0.22142749, -0.53211065, -0.66569789, -0.25881274,
|
||||
-1.87550882])
|
||||
np.testing.assert_allclose(results.null,
|
||||
[ 0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759, 0.08860759, 0.08860759, 0.08860759, 0.08860759,
|
||||
0.08860759])
|
||||
self.assertAlmostEqual(results.D2, .200712816165)
|
||||
self.assertAlmostEqual(results.adj_D2, 0.19816731557930456)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,350 @@
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
|
||||
def _bit_length_26(x):
|
||||
if x == 0:
|
||||
return 0
|
||||
elif x == 1:
|
||||
return 1
|
||||
else:
|
||||
return len(bin(x)) - 2
|
||||
|
||||
|
||||
try:
|
||||
from scipy.lib._version import NumpyVersion
|
||||
except ImportError:
|
||||
import re
|
||||
string_types = basestring
|
||||
|
||||
class NumpyVersion():
|
||||
"""Parse and compare numpy version strings.
|
||||
Numpy has the following versioning scheme (numbers given are examples; they
|
||||
can be >9) in principle):
|
||||
- Released version: '1.8.0', '1.8.1', etc.
|
||||
- Alpha: '1.8.0a1', '1.8.0a2', etc.
|
||||
- Beta: '1.8.0b1', '1.8.0b2', etc.
|
||||
- Release candidates: '1.8.0rc1', '1.8.0rc2', etc.
|
||||
- Development versions: '1.8.0.dev-f1234afa' (git commit hash appended)
|
||||
- Development versions after a1: '1.8.0a1.dev-f1234afa',
|
||||
'1.8.0b2.dev-f1234afa',
|
||||
'1.8.1rc1.dev-f1234afa', etc.
|
||||
- Development versions (no git hash available): '1.8.0.dev-Unknown'
|
||||
Comparing needs to be done against a valid version string or other
|
||||
`NumpyVersion` instance.
|
||||
Parameters
|
||||
----------
|
||||
vstring : str
|
||||
Numpy version string (``np.__version__``).
|
||||
Notes
|
||||
-----
|
||||
All dev versions of the same (pre-)release compare equal.
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.lib._version import NumpyVersion
|
||||
>>> if NumpyVersion(np.__version__) < '1.7.0':
|
||||
... print('skip')
|
||||
skip
|
||||
>>> NumpyVersion('1.7') # raises ValueError, add ".0"
|
||||
"""
|
||||
|
||||
def __init__(self, vstring):
|
||||
self.vstring = vstring
|
||||
ver_main = re.match(r'\d[.]\d+[.]\d+', vstring)
|
||||
if not ver_main:
|
||||
raise ValueError("Not a valid numpy version string")
|
||||
|
||||
self.version = ver_main.group()
|
||||
self.major, self.minor, self.bugfix = [int(x) for x in
|
||||
self.version.split('.')]
|
||||
if len(vstring) == ver_main.end():
|
||||
self.pre_release = 'final'
|
||||
else:
|
||||
alpha = re.match(r'a\d', vstring[ver_main.end():])
|
||||
beta = re.match(r'b\d', vstring[ver_main.end():])
|
||||
rc = re.match(r'rc\d', vstring[ver_main.end():])
|
||||
pre_rel = [m for m in [alpha, beta, rc] if m is not None]
|
||||
if pre_rel:
|
||||
self.pre_release = pre_rel[0].group()
|
||||
else:
|
||||
self.pre_release = ''
|
||||
|
||||
self.is_devversion = bool(re.search(r'.dev-', vstring))
|
||||
|
||||
def _compare_version(self, other):
|
||||
"""Compare major.minor.bugfix"""
|
||||
if self.major == other.major:
|
||||
if self.minor == other.minor:
|
||||
if self.bugfix == other.bugfix:
|
||||
vercmp = 0
|
||||
elif self.bugfix > other.bugfix:
|
||||
vercmp = 1
|
||||
else:
|
||||
vercmp = -1
|
||||
elif self.minor > other.minor:
|
||||
vercmp = 1
|
||||
else:
|
||||
vercmp = -1
|
||||
elif self.major > other.major:
|
||||
vercmp = 1
|
||||
else:
|
||||
vercmp = -1
|
||||
|
||||
return vercmp
|
||||
|
||||
def _compare_pre_release(self, other):
|
||||
"""Compare alpha/beta/rc/final."""
|
||||
if self.pre_release == other.pre_release:
|
||||
vercmp = 0
|
||||
elif self.pre_release == 'final':
|
||||
vercmp = 1
|
||||
elif other.pre_release == 'final':
|
||||
vercmp = -1
|
||||
elif self.pre_release > other.pre_release:
|
||||
vercmp = 1
|
||||
else:
|
||||
vercmp = -1
|
||||
|
||||
return vercmp
|
||||
|
||||
def _compare(self, other):
|
||||
if not isinstance(other, (string_types, NumpyVersion)):
|
||||
raise ValueError("Invalid object to compare with NumpyVersion.")
|
||||
|
||||
if isinstance(other, string_types):
|
||||
other = NumpyVersion(other)
|
||||
|
||||
vercmp = self._compare_version(other)
|
||||
if vercmp == 0:
|
||||
# Same x.y.z version, check for alpha/beta/rc
|
||||
vercmp = self._compare_pre_release(other)
|
||||
if vercmp == 0:
|
||||
# Same version and same pre-release, check if dev version
|
||||
if self.is_devversion is other.is_devversion:
|
||||
vercmp = 0
|
||||
elif self.is_devversion:
|
||||
vercmp = -1
|
||||
else:
|
||||
vercmp = 1
|
||||
|
||||
return vercmp
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other) < 0
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other) <= 0
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other) == 0
|
||||
|
||||
def __ne__(self, other):
|
||||
return self._compare(other) != 0
|
||||
|
||||
def __gt__(self, other):
|
||||
return self._compare(other) > 0
|
||||
|
||||
def __ge__(self, other):
|
||||
return self._compare(other) >= 0
|
||||
|
||||
def __repr(self):
|
||||
return "NumpyVersion(%s)" % self.vstring
|
||||
|
||||
|
||||
def _next_regular(target):
|
||||
"""
|
||||
Find the next regular number greater than or equal to target.
|
||||
Regular numbers are composites of the prime factors 2, 3, and 5.
|
||||
Also known as 5-smooth numbers or Hamming numbers, these are the optimal
|
||||
size for inputs to FFTPACK.
|
||||
Target must be a positive integer.
|
||||
"""
|
||||
if target <= 6:
|
||||
return target
|
||||
|
||||
# Quickly check if it's already a power of 2
|
||||
if not (target & (target - 1)):
|
||||
return target
|
||||
|
||||
match = float('inf') # Anything found will be smaller
|
||||
p5 = 1
|
||||
while p5 < target:
|
||||
p35 = p5
|
||||
while p35 < target:
|
||||
# Ceiling integer division, avoiding conversion to float
|
||||
# (quotient = ceil(target / p35))
|
||||
quotient = -(-target // p35)
|
||||
# Quickly find next power of 2 >= quotient
|
||||
try:
|
||||
p2 = 2 ** ((quotient - 1).bit_length())
|
||||
except AttributeError:
|
||||
# Fallback for Python <2.7
|
||||
p2 = 2 ** _bit_length_26(quotient - 1)
|
||||
|
||||
N = p2 * p35
|
||||
if N == target:
|
||||
return N
|
||||
elif N < match:
|
||||
match = N
|
||||
p35 *= 3
|
||||
if p35 == target:
|
||||
return p35
|
||||
if p35 < match:
|
||||
match = p35
|
||||
p5 *= 5
|
||||
if p5 == target:
|
||||
return p5
|
||||
if p5 < match:
|
||||
match = p5
|
||||
return match
|
||||
if NumpyVersion(np.__version__) >= '1.7.1':
|
||||
np_matrix_rank = np.linalg.matrix_rank
|
||||
else:
|
||||
def np_matrix_rank(M, tol=None):
|
||||
"""
|
||||
Return matrix rank of array using SVD method
|
||||
Rank of the array is the number of SVD singular values of the array that are
|
||||
greater than `tol`.
|
||||
Parameters
|
||||
----------
|
||||
M : {(M,), (M, N)} array_like
|
||||
array of <=2 dimensions
|
||||
tol : {None, float}, optional
|
||||
threshold below which SVD values are considered zero. If `tol` is
|
||||
None, and ``S`` is an array with singular values for `M`, and
|
||||
``eps`` is the epsilon value for datatype of ``S``, then `tol` is
|
||||
set to ``S.max() * max(M.shape) * eps``.
|
||||
Notes
|
||||
-----
|
||||
The default threshold to detect rank deficiency is a test on the magnitude
|
||||
of the singular values of `M`. By default, we identify singular values less
|
||||
than ``S.max() * max(M.shape) * eps`` as indicating rank deficiency (with
|
||||
the symbols defined above). This is the algorithm MATLAB uses [1]. It also
|
||||
appears in *Numerical recipes* in the discussion of SVD solutions for linear
|
||||
least squares [2].
|
||||
This default threshold is designed to detect rank deficiency accounting for
|
||||
the numerical errors of the SVD computation. Imagine that there is a column
|
||||
in `M` that is an exact (in floating point) linear combination of other
|
||||
columns in `M`. Computing the SVD on `M` will not produce a singular value
|
||||
exactly equal to 0 in general: any difference of the smallest SVD value from
|
||||
0 will be caused by numerical imprecision in the calculation of the SVD.
|
||||
Our threshold for small SVD values takes this numerical imprecision into
|
||||
account, and the default threshold will detect such numerical rank
|
||||
deficiency. The threshold may declare a matrix `M` rank deficient even if
|
||||
the linear combination of some columns of `M` is not exactly equal to
|
||||
another column of `M` but only numerically very close to another column of
|
||||
`M`.
|
||||
We chose our default threshold because it is in wide use. Other thresholds
|
||||
are possible. For example, elsewhere in the 2007 edition of *Numerical
|
||||
recipes* there is an alternative threshold of ``S.max() *
|
||||
np.finfo(M.dtype).eps / 2. * np.sqrt(m + n + 1.)``. The authors describe
|
||||
this threshold as being based on "expected roundoff error" (p 71).
|
||||
The thresholds above deal with floating point roundoff error in the
|
||||
calculation of the SVD. However, you may have more information about the
|
||||
sources of error in `M` that would make you consider other tolerance values
|
||||
to detect *effective* rank deficiency. The most useful measure of the
|
||||
tolerance depends on the operations you intend to use on your matrix. For
|
||||
example, if your data come from uncertain measurements with uncertainties
|
||||
greater than floating point epsilon, choosing a tolerance near that
|
||||
uncertainty may be preferable. The tolerance may be absolute if the
|
||||
uncertainties are absolute rather than relative.
|
||||
References
|
||||
----------
|
||||
.. [1] MATLAB reference documention, "Rank"
|
||||
http://www.mathworks.com/help/techdoc/ref/rank.html
|
||||
.. [2] W. H. Press, S. A. Teukolsky, W. T. Vetterling and B. P. Flannery,
|
||||
"Numerical Recipes (3rd edition)", Cambridge University Press, 2007,
|
||||
page 795.
|
||||
Examples
|
||||
--------
|
||||
>>> from numpy.linalg import matrix_rank
|
||||
>>> matrix_rank(np.eye(4)) # Full rank matrix
|
||||
4
|
||||
>>> I=np.eye(4); I[-1,-1] = 0. # rank deficient matrix
|
||||
>>> matrix_rank(I)
|
||||
3
|
||||
>>> matrix_rank(np.ones((4,))) # 1 dimension - rank 1 unless all 0
|
||||
1
|
||||
>>> matrix_rank(np.zeros((4,)))
|
||||
0
|
||||
"""
|
||||
M = np.asarray(M)
|
||||
if M.ndim > 2:
|
||||
raise TypeError('array should have 2 or fewer dimensions')
|
||||
if M.ndim < 2:
|
||||
return int(not all(M == 0))
|
||||
S = np.linalg.svd(M, compute_uv=False)
|
||||
if tol is None:
|
||||
tol = S.max() * max(M.shape) * np.finfo(S.dtype).eps
|
||||
return np.sum(S > tol)
|
||||
|
||||
|
||||
|
||||
class CacheWriteWarning(UserWarning):
|
||||
pass
|
||||
|
||||
class CachedAttribute(object):
|
||||
|
||||
def __init__(self, func, cachename=None, resetlist=None):
|
||||
self.fget = func
|
||||
self.name = func.__name__
|
||||
self.cachename = cachename or '_cache'
|
||||
self.resetlist = resetlist or ()
|
||||
|
||||
def __get__(self, obj, type=None):
|
||||
if obj is None:
|
||||
return self.fget
|
||||
# Get the cache or set a default one if needed
|
||||
_cachename = self.cachename
|
||||
_cache = getattr(obj, _cachename, None)
|
||||
if _cache is None:
|
||||
setattr(obj, _cachename, resettable_cache())
|
||||
_cache = getattr(obj, _cachename)
|
||||
# Get the name of the attribute to set and cache
|
||||
name = self.name
|
||||
_cachedval = _cache.get(name, None)
|
||||
# print("[_cachedval=%s]" % _cachedval)
|
||||
if _cachedval is None:
|
||||
# Call the "fget" function
|
||||
_cachedval = self.fget(obj)
|
||||
# Set the attribute in obj
|
||||
# print("Setting %s in cache to %s" % (name, _cachedval))
|
||||
try:
|
||||
_cache[name] = _cachedval
|
||||
except KeyError:
|
||||
setattr(_cache, name, _cachedval)
|
||||
# Update the reset list if needed (and possible)
|
||||
resetlist = self.resetlist
|
||||
if resetlist is not ():
|
||||
try:
|
||||
_cache._resetdict[name] = self.resetlist
|
||||
except AttributeError:
|
||||
pass
|
||||
# else:
|
||||
# print("Reading %s from cache (%s)" % (name, _cachedval))
|
||||
return _cachedval
|
||||
|
||||
def __set__(self, obj, value):
|
||||
errmsg = "The attribute '%s' cannot be overwritten" % self.name
|
||||
warnings.warn(errmsg, CacheWriteWarning)
|
||||
|
||||
|
||||
class _cache_readonly(object):
|
||||
"""
|
||||
Decorator for CachedAttribute
|
||||
"""
|
||||
|
||||
def __init__(self, cachename=None, resetlist=None):
|
||||
self.func = None
|
||||
self.cachename = cachename
|
||||
self.resetlist = resetlist or None
|
||||
|
||||
def __call__(self, func):
|
||||
return CachedAttribute(func,
|
||||
cachename=self.cachename,
|
||||
resetlist=self.resetlist)
|
||||
cache_readonly = _cache_readonly()
|
||||
|
||||
|
@ -0,0 +1,284 @@
|
||||
"""
|
||||
Variance functions for use with the link functions in statsmodels.family.links
|
||||
"""
|
||||
|
||||
__docformat__ = 'restructuredtext'
|
||||
|
||||
import numpy as np
|
||||
FLOAT_EPS = np.finfo(float).eps
|
||||
|
||||
class VarianceFunction(object):
|
||||
"""
|
||||
Relates the variance of a random variable to its mean. Defaults to 1.
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns an array of ones that is the same shape as `mu`
|
||||
|
||||
Notes
|
||||
-----
|
||||
After a variance function is initialized, its call method can be used.
|
||||
|
||||
Alias for VarianceFunction:
|
||||
constant = VarianceFunction()
|
||||
|
||||
See also
|
||||
--------
|
||||
statsmodels.family.family
|
||||
"""
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Default variance function
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
mu : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
v : array
|
||||
ones(mu.shape)
|
||||
"""
|
||||
mu = np.asarray(mu)
|
||||
return np.ones(mu.shape, np.float64)
|
||||
|
||||
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the variance function v'(mu)
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime_cs
|
||||
# TODO: diag workaround proplem with numdiff for 1d
|
||||
return np.diag(approx_fprime_cs(mu, self))
|
||||
|
||||
|
||||
constant = VarianceFunction()
|
||||
constant.__doc__ = """
|
||||
The call method of constant returns a constant variance, i.e., a vector of ones.
|
||||
|
||||
constant is an alias of VarianceFunction()
|
||||
"""
|
||||
|
||||
class Power(object):
|
||||
"""
|
||||
Power variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
power : float
|
||||
exponent used in power variance function
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns the power variance
|
||||
|
||||
Formulas
|
||||
--------
|
||||
V(mu) = numpy.fabs(mu)**power
|
||||
|
||||
Notes
|
||||
-----
|
||||
Aliases for Power:
|
||||
mu = Power()
|
||||
mu_squared = Power(power=2)
|
||||
mu_cubed = Power(power=3)
|
||||
"""
|
||||
|
||||
def __init__(self, power=1.):
|
||||
self.power = power
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Power variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mu : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
variance : array
|
||||
numpy.fabs(mu)**self.power
|
||||
"""
|
||||
return np.power(np.fabs(mu), self.power)
|
||||
|
||||
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the variance function v'(mu)
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
|
||||
#return approx_fprime_cs(mu, self) # TODO fix breaks in `fabs
|
||||
# TODO: diag is workaround problem with numdiff for 1d
|
||||
return np.diag(approx_fprime(mu, self))
|
||||
|
||||
|
||||
mu = Power()
|
||||
mu.__doc__ = """
|
||||
Returns np.fabs(mu)
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of Power()
|
||||
"""
|
||||
mu_squared = Power(power=2)
|
||||
mu_squared.__doc__ = """
|
||||
Returns np.fabs(mu)**2
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of statsmodels.family.links.Power(power=2)
|
||||
"""
|
||||
mu_cubed = Power(power=3)
|
||||
mu_cubed.__doc__ = """
|
||||
Returns np.fabs(mu)**3
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of statsmodels.family.links.Power(power=3)
|
||||
"""
|
||||
|
||||
class Binomial(object):
|
||||
"""
|
||||
Binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int, optional
|
||||
The number of trials for a binomial variable. The default is 1 for
|
||||
p in (0,1)
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns the binomial variance
|
||||
|
||||
Formulas
|
||||
--------
|
||||
V(mu) = p * (1 - p) * n
|
||||
|
||||
where p = mu / n
|
||||
|
||||
Notes
|
||||
-----
|
||||
Alias for Binomial:
|
||||
binary = Binomial()
|
||||
|
||||
A private method _clean trims the data by machine epsilon so that p is
|
||||
in (0,1)
|
||||
"""
|
||||
|
||||
def __init__(self, n=1):
|
||||
self.n = n
|
||||
|
||||
def _clean(self, p):
|
||||
return np.clip(p, FLOAT_EPS, 1 - FLOAT_EPS)
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Binomial variance function
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
mu : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
variance : array
|
||||
variance = mu/n * (1 - mu/n) * self.n
|
||||
"""
|
||||
p = self._clean(mu / self.n)
|
||||
return p * (1 - p) * self.n
|
||||
|
||||
#TODO: inherit from super
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the variance function v'(mu)
|
||||
"""
|
||||
from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
|
||||
# TODO: diag workaround proplem with numdiff for 1d
|
||||
return np.diag(approx_fprime_cs(mu, self))
|
||||
|
||||
|
||||
binary = Binomial()
|
||||
binary.__doc__ = """
|
||||
The binomial variance function for n = 1
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of Binomial(n=1)
|
||||
"""
|
||||
|
||||
class NegativeBinomial(object):
|
||||
'''
|
||||
Negative binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float
|
||||
The ancillary parameter for the negative binomial variance function.
|
||||
`alpha` is assumed to be nonstochastic. The default is 1.
|
||||
|
||||
Methods
|
||||
-------
|
||||
call
|
||||
Returns the negative binomial variance
|
||||
|
||||
Formulas
|
||||
--------
|
||||
V(mu) = mu + alpha*mu**2
|
||||
|
||||
Notes
|
||||
-----
|
||||
Alias for NegativeBinomial:
|
||||
nbinom = NegativeBinomial()
|
||||
|
||||
A private method _clean trims the data by machine epsilon so that p is
|
||||
in (0,inf)
|
||||
'''
|
||||
|
||||
def __init__(self, alpha=1.):
|
||||
self.alpha = alpha
|
||||
|
||||
def _clean(self, p):
|
||||
return np.clip(p, FLOAT_EPS, np.inf)
|
||||
|
||||
def __call__(self, mu):
|
||||
"""
|
||||
Negative binomial variance function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mu : array-like
|
||||
mean parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
variance : array
|
||||
variance = mu + alpha*mu**2
|
||||
"""
|
||||
p = self._clean(mu)
|
||||
return p + self.alpha*p**2
|
||||
|
||||
def deriv(self, mu):
|
||||
"""
|
||||
Derivative of the negative binomial variance function.
|
||||
"""
|
||||
|
||||
p = self._clean(mu)
|
||||
return 1 + 2 * self.alpha * p
|
||||
|
||||
nbinom = NegativeBinomial()
|
||||
nbinom.__doc__ = """
|
||||
Negative Binomial variance function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is an alias of NegativeBinomial(alpha=1.)
|
||||
"""
|
@ -0,0 +1 @@
|
||||
from base import *
|
@ -0,0 +1,4 @@
|
||||
import gwr
|
||||
import sel_bw
|
||||
import diagnostics
|
||||
import kernels
|
@ -0,0 +1,81 @@
|
||||
"""
|
||||
Diagnostics for estimated gwr modesl
|
||||
"""
|
||||
__author__ = "Taylor Oshan tayoshan@gmail.com"
|
||||
|
||||
import numpy as np
|
||||
from crankshaft.regression.glm.family import Gaussian, Poisson, Binomial
|
||||
|
||||
def get_AICc(gwr):
|
||||
"""
|
||||
Get AICc value
|
||||
|
||||
Gaussian: p61, (2.33), Fotheringham, Brunsdon and Charlton (2002)
|
||||
|
||||
GWGLM: AICc=AIC+2k(k+1)/(n-k-1), Nakaya et al. (2005): p2704, (36)
|
||||
|
||||
"""
|
||||
n = gwr.n
|
||||
k = gwr.tr_S
|
||||
if isinstance(gwr.family, Gaussian):
|
||||
aicc = -2.0*gwr.llf + 2.0*n*(k + 1.0)/(n-k-2.0)
|
||||
elif isinstance(gwr.family, (Poisson, Binomial)):
|
||||
aicc = get_AIC(gwr) + 2.0 * k * (k+1.0) / (n - k - 1.0)
|
||||
return aicc
|
||||
|
||||
def get_AIC(gwr):
|
||||
"""
|
||||
Get AIC calue
|
||||
|
||||
Gaussian: p96, (4.22), Fotheringham, Brunsdon and Charlton (2002)
|
||||
|
||||
GWGLM: AIC(G)=D(G) + 2K(G), where D and K denote the deviance and the effective
|
||||
number of parameters in the model with bandwidth G, respectively.
|
||||
|
||||
"""
|
||||
k = gwr.tr_S
|
||||
#deviance = -2*log-likelihood
|
||||
y = gwr.y
|
||||
mu = gwr.mu
|
||||
if isinstance(gwr.family, Gaussian):
|
||||
aic = -2.0 * gwr.llf + 2.0 * (k+1)
|
||||
elif isinstance(gwr.family, (Poisson, Binomial)):
|
||||
aic = np.sum(gwr.family.resid_dev(y, mu)**2) + 2.0 * k
|
||||
return aic
|
||||
|
||||
def get_BIC(gwr):
|
||||
"""
|
||||
Get BIC value
|
||||
|
||||
Gaussian: p61 (2.34), Fotheringham, Brunsdon and Charlton (2002)
|
||||
BIC = -2log(L)+klog(n)
|
||||
|
||||
GWGLM: BIC = dev + tr_S * log(n)
|
||||
|
||||
"""
|
||||
n = gwr.n # (scalar) number of observations
|
||||
k = gwr.tr_S
|
||||
y = gwr.y
|
||||
mu = gwr.mu
|
||||
if isinstance(gwr.family, Gaussian):
|
||||
bic = -2.0 * gwr.llf + (k+1) * np.log(n)
|
||||
elif isinstance(gwr.family, (Poisson, Binomial)):
|
||||
bic = np.sum(gwr.family.resid_dev(y, mu)**2) + k * np.log(n)
|
||||
return bic
|
||||
|
||||
def get_CV(gwr):
|
||||
"""
|
||||
Get CV value
|
||||
|
||||
Gaussian only
|
||||
|
||||
Methods: p60, (2.31) or p212 (9.4)
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
Modification: sum of residual squared is divided by n according to GWR4 results
|
||||
|
||||
"""
|
||||
aa = gwr.resid_response.reshape((-1,1))/(1.0-gwr.influ)
|
||||
cv = np.sum(aa**2)/gwr.n
|
||||
return cv
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,120 @@
|
||||
# GWR kernel function specifications
|
||||
|
||||
__author__ = "Taylor Oshan tayoshan@gmail.com"
|
||||
|
||||
#from pysal.weights.Distance import Kernel
|
||||
import scipy
|
||||
from scipy.spatial.kdtree import KDTree
|
||||
import numpy as np
|
||||
|
||||
#adaptive specifications should be parameterized with nn-1 to match original gwr
|
||||
#implementation. That is, pysal counts self neighbors with knn automatically.
|
||||
|
||||
def fix_gauss(coords, bw, points=None):
|
||||
w = _Kernel(coords, function='gwr_gaussian', bandwidth=bw,
|
||||
truncate=False, points=points)
|
||||
return w.kernel
|
||||
|
||||
def adapt_gauss(coords, nn, points=None):
|
||||
w = _Kernel(coords, fixed=False, k=nn-1, function='gwr_gaussian',
|
||||
truncate=False, points=points)
|
||||
return w.kernel
|
||||
|
||||
def fix_bisquare(coords, bw, points=None):
|
||||
w = _Kernel(coords, function='bisquare', bandwidth=bw, points=points)
|
||||
return w.kernel
|
||||
|
||||
def adapt_bisquare(coords, nn, points=None):
|
||||
w = _Kernel(coords, fixed=False, k=nn-1, function='bisquare', points=points)
|
||||
return w.kernel
|
||||
|
||||
def fix_exp(coords, bw, points=None):
|
||||
w = _Kernel(coords, function='exponential', bandwidth=bw,
|
||||
truncate=False, points=points)
|
||||
return w.kernel
|
||||
|
||||
def adapt_exp(coords, nn, points=None):
|
||||
w = _Kernel(coords, fixed=False, k=nn-1, function='exponential',
|
||||
truncate=False, points=points)
|
||||
return w.kernel
|
||||
|
||||
from scipy.spatial.distance import cdist
|
||||
|
||||
class _Kernel(object):
|
||||
"""
|
||||
|
||||
"""
|
||||
def __init__(self, data, bandwidth=None, fixed=True, k=None,
|
||||
function='triangular', eps=1.0000001, ids=None, truncate=True,
|
||||
points=None): #Added truncate flag
|
||||
if issubclass(type(data), scipy.spatial.KDTree):
|
||||
self.data = data.data
|
||||
data = self.data
|
||||
else:
|
||||
self.data = data
|
||||
if k is not None:
|
||||
self.k = int(k) + 1
|
||||
else:
|
||||
self.k = k
|
||||
if points is None:
|
||||
self.dmat = cdist(self.data, self.data)
|
||||
else:
|
||||
self.points = points
|
||||
self.dmat = cdist(self.points, self.data)
|
||||
self.function = function.lower()
|
||||
self.fixed = fixed
|
||||
self.eps = eps
|
||||
self.trunc = truncate
|
||||
if bandwidth:
|
||||
try:
|
||||
bandwidth = np.array(bandwidth)
|
||||
bandwidth.shape = (len(bandwidth), 1)
|
||||
except:
|
||||
bandwidth = np.ones((len(data), 1), 'float') * bandwidth
|
||||
self.bandwidth = bandwidth
|
||||
else:
|
||||
self._set_bw()
|
||||
self.kernel = self._kernel_funcs(self.dmat/self.bandwidth)
|
||||
|
||||
if self.trunc:
|
||||
mask = np.repeat(self.bandwidth, len(self.data), axis=1)
|
||||
self.kernel[(self.dmat >= mask)] = 0
|
||||
|
||||
def _set_bw(self):
|
||||
if self.k is not None:
|
||||
dmat = np.sort(self.dmat)[:,:self.k]
|
||||
else:
|
||||
dmat = self.dmat
|
||||
if self.fixed:
|
||||
# use max knn distance as bandwidth
|
||||
bandwidth = dmat.max() * self.eps
|
||||
n = len(self.data)
|
||||
self.bandwidth = np.ones((n, 1), 'float') * bandwidth
|
||||
else:
|
||||
# use local max knn distance
|
||||
self.bandwidth = dmat.max(axis=1) * self.eps
|
||||
self.bandwidth.shape = (self.bandwidth.size, 1)
|
||||
|
||||
|
||||
def _kernel_funcs(self, zs):
|
||||
# functions follow Anselin and Rey (2010) table 5.4
|
||||
if self.function == 'triangular':
|
||||
return 1 - zs
|
||||
elif self.function == 'uniform':
|
||||
return np.ones(zi.shape) * 0.5
|
||||
elif self.function == 'quadratic':
|
||||
return (3. / 4) * (1 - zs ** 2)
|
||||
elif self.function == 'quartic':
|
||||
return (15. / 16) * (1 - zs ** 2) ** 2
|
||||
elif self.function == 'gaussian':
|
||||
c = np.pi * 2
|
||||
c = c ** (-0.5)
|
||||
return c * np.exp(-(zs ** 2) / 2.)
|
||||
elif self.function == 'gwr_gaussian':
|
||||
return np.exp(-0.5*(zs)**2)
|
||||
elif self.function == 'bisquare':
|
||||
return (1-(zs)**2)**2
|
||||
elif self.function =='exponential':
|
||||
return np.exp(-zs)
|
||||
else:
|
||||
print('Unsupported kernel function', self.function)
|
@ -0,0 +1,208 @@
|
||||
#Bandwidth optimization methods
|
||||
|
||||
__author__ = "Taylor Oshan"
|
||||
|
||||
import numpy as np
|
||||
|
||||
def golden_section(a, c, delta, function, tol, max_iter, int_score=False):
|
||||
"""
|
||||
Golden section search routine
|
||||
Method: p212, 9.6.4
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : float
|
||||
initial max search section value
|
||||
b : float
|
||||
initial min search section value
|
||||
delta : float
|
||||
constant used to determine width of search sections
|
||||
function : function
|
||||
obejective function to be evaluated at different section
|
||||
values
|
||||
int_score : boolean
|
||||
False for float score, True for integer score
|
||||
tol : float
|
||||
tolerance used to determine convergence
|
||||
max_iter : integer
|
||||
maximum iterations if no convergence to tolerance
|
||||
|
||||
Returns
|
||||
-------
|
||||
opt_val : float
|
||||
optimal value
|
||||
opt_score : kernel
|
||||
optimal score
|
||||
output : list of tuples
|
||||
searching history
|
||||
"""
|
||||
b = a + delta * np.abs(c-a)
|
||||
d = c - delta * np.abs(c-a)
|
||||
score = 0.0
|
||||
diff = 1.0e9
|
||||
iters = 0
|
||||
output = []
|
||||
while np.abs(diff) > tol and iters < max_iter:
|
||||
iters += 1
|
||||
if int_score:
|
||||
b = np.round(b)
|
||||
d = np.round(d)
|
||||
|
||||
score_a = function(a)
|
||||
score_b = function(b)
|
||||
score_c = function(c)
|
||||
score_d = function(d)
|
||||
|
||||
if score_b <= score_d:
|
||||
opt_val = b
|
||||
opt_score = score_b
|
||||
c = d
|
||||
d = b
|
||||
b = a + delta * np.abs(c-a)
|
||||
#if int_score:
|
||||
#b = np.round(b)
|
||||
else:
|
||||
opt_val = d
|
||||
opt_score = score_d
|
||||
a = b
|
||||
b = d
|
||||
d = c - delta * np.abs(c-a)
|
||||
#if int_score:
|
||||
#d = np.round(b)
|
||||
|
||||
#if int_score:
|
||||
# opt_val = np.round(opt_val)
|
||||
output.append((opt_val, opt_score))
|
||||
diff = score_b - score_d
|
||||
score = opt_score
|
||||
return np.round(opt_val, 2), opt_score, output
|
||||
|
||||
def equal_interval(l_bound, u_bound, interval, function, int_score=False):
|
||||
"""
|
||||
Interval search, using interval as stepsize
|
||||
|
||||
Parameters
|
||||
----------
|
||||
l_bound : float
|
||||
initial min search section value
|
||||
u_bound : float
|
||||
initial max search section value
|
||||
interval : float
|
||||
constant used to determine width of search sections
|
||||
function : function
|
||||
obejective function to be evaluated at different section
|
||||
values
|
||||
int_score : boolean
|
||||
False for float score, True for integer score
|
||||
|
||||
Returns
|
||||
-------
|
||||
opt_val : float
|
||||
optimal value
|
||||
opt_score : kernel
|
||||
optimal score
|
||||
output : list of tuples
|
||||
searching history
|
||||
"""
|
||||
a = l_bound
|
||||
c = u_bound
|
||||
b = a + interval
|
||||
if int_score:
|
||||
a = np.round(a,0)
|
||||
c = np.round(c,0)
|
||||
b = np.round(b,0)
|
||||
|
||||
output = []
|
||||
|
||||
score_a = function(a)
|
||||
score_c = function(c)
|
||||
|
||||
output.append((a,score_a))
|
||||
output.append((c,score_c))
|
||||
|
||||
if score_a < score_c:
|
||||
opt_val = a
|
||||
opt_score = score_a
|
||||
else:
|
||||
opt_val = c
|
||||
opt_score = score_c
|
||||
|
||||
while b < c:
|
||||
score_b = function(b)
|
||||
|
||||
output.append((b,score_b))
|
||||
|
||||
if score_b < opt_score:
|
||||
opt_val = b
|
||||
opt_score = score_b
|
||||
b = b + interval
|
||||
|
||||
return opt_val, opt_score, output
|
||||
|
||||
|
||||
def flexible_bw(init, y, X, n, k, family, tol, max_iter, rss_score,
|
||||
gwr_func, bw_func, sel_func):
|
||||
if init:
|
||||
bw = sel_func(bw_func(y, X))
|
||||
print bw
|
||||
optim_model = gwr_func(y, X, bw)
|
||||
err = optim_model.resid_response.reshape((-1,1))
|
||||
est = optim_model.params
|
||||
else:
|
||||
model = GLM(y, X, family=self.family, constant=False).fit()
|
||||
err = model.resid_response.reshape((-1,1))
|
||||
est = np.repeat(model.params.T, n, axis=0)
|
||||
|
||||
|
||||
XB = np.multiply(est, X)
|
||||
if rss_score:
|
||||
rss = np.sum((err)**2)
|
||||
iters = 0
|
||||
scores = []
|
||||
delta = 1e6
|
||||
BWs = []
|
||||
VALs = []
|
||||
|
||||
while delta > tol and iters < max_iter:
|
||||
iters += 1
|
||||
new_XB = np.zeros_like(X)
|
||||
bws = []
|
||||
vals = []
|
||||
ests = np.zeros_like(X)
|
||||
f_XB = XB.copy()
|
||||
f_err = err.copy()
|
||||
for i in range(k):
|
||||
temp_y = XB[:,i].reshape((-1,1))
|
||||
temp_y = temp_y + err
|
||||
temp_X = X[:,i].reshape((-1,1))
|
||||
bw_class = bw_func(temp_y, temp_X)
|
||||
bw = sel_func(bw_class)
|
||||
optim_model = gwr_func(temp_y, temp_X, bw)
|
||||
err = optim_model.resid_response.reshape((-1,1))
|
||||
est = optim_model.params.reshape((-1,))
|
||||
|
||||
new_XB[:,i] = np.multiply(est, temp_X.reshape((-1,)))
|
||||
bws.append(bw)
|
||||
ests[:,i] = est
|
||||
vals.append(bw_class.bw[1])
|
||||
|
||||
predy = np.sum(np.multiply(ests, X), axis=1).reshape((-1,1))
|
||||
num = np.sum((new_XB - XB)**2)/n
|
||||
den = np.sum(np.sum(new_XB, axis=1)**2)
|
||||
score = (num/den)**0.5
|
||||
XB = new_XB
|
||||
|
||||
if rss_score:
|
||||
new_rss = np.sum((y - predy)**2)
|
||||
score = np.abs((new_rss - rss)/new_rss)
|
||||
rss = new_rss
|
||||
print score
|
||||
scores.append(score)
|
||||
delta = score
|
||||
BWs.append(bws)
|
||||
VALs.append(vals)
|
||||
|
||||
opt_bws = BWs[-1]
|
||||
return opt_bws, np.array(BWs), np.array(VALs), np.array(scores), f_XB, f_err
|
@ -0,0 +1,286 @@
|
||||
# GWR Bandwidth selection class
|
||||
|
||||
#Thinking about removing the search method and just having optimization begin in
|
||||
#class __init__
|
||||
|
||||
#x_glob and offset parameters dont yet do anything; former is for semiparametric
|
||||
#GWR and later is for offset variable for Poisson model
|
||||
|
||||
__author__ = "Taylor Oshan Tayoshan@gmail.com"
|
||||
|
||||
from kernels import *
|
||||
from search import golden_section, equal_interval, flexible_bw
|
||||
from gwr import GWR
|
||||
from crankshaft.regression.glm.family import Gaussian, Poisson, Binomial
|
||||
import pysal.spreg.user_output as USER
|
||||
from diagnostics import get_AICc, get_AIC, get_BIC, get_CV
|
||||
from scipy.spatial.distance import pdist, squareform
|
||||
from pysal.common import KDTree
|
||||
import numpy as np
|
||||
|
||||
kernels = {1: fix_gauss, 2: adapt_gauss, 3: fix_bisquare, 4:
|
||||
adapt_bisquare, 5: fix_exp, 6:adapt_exp}
|
||||
getDiag = {'AICc': get_AICc,'AIC':get_AIC, 'BIC': get_BIC, 'CV': get_CV}
|
||||
|
||||
class Sel_BW(object):
|
||||
"""
|
||||
Select bandwidth for kernel
|
||||
|
||||
Methods: p211 - p213, bandwidth selection
|
||||
Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
|
||||
Geographically weighted regression: the analysis of spatially varying relationships.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
x_glob : array
|
||||
n*k1, fixed independent variable.
|
||||
x_loc : array
|
||||
n*k2, local independent variable, including constant.
|
||||
coords : list of tuples
|
||||
(x,y) of points used in bandwidth selection
|
||||
family : string
|
||||
GWR model type: 'Gaussian', 'logistic, 'Poisson''
|
||||
offset : array
|
||||
n*1, offset variable for Poisson model
|
||||
kernel : string
|
||||
kernel function: 'gaussian', 'bisquare', 'exponetial'
|
||||
fixed : boolean
|
||||
True for fixed bandwidth and False for adaptive (NN)
|
||||
fb : True for flexible (mutliple covaraite-specific) bandwidths
|
||||
False for a traditional (same for all covariates)
|
||||
bandwdith; defualt is False.
|
||||
constant : boolean
|
||||
True to include intercept (default) in model and False to exclude
|
||||
intercept.
|
||||
|
||||
|
||||
Attributes
|
||||
----------
|
||||
y : array
|
||||
n*1, dependent variable.
|
||||
x_glob : array
|
||||
n*k1, fixed independent variable.
|
||||
x_loc : array
|
||||
n*k2, local independent variable, including constant.
|
||||
coords : list of tuples
|
||||
(x,y) of points used in bandwidth selection
|
||||
family : string
|
||||
GWR model type: 'Gaussian', 'logistic, 'Poisson''
|
||||
kernel : string
|
||||
type of kernel used and wether fixed or adaptive
|
||||
criterion : string
|
||||
bw selection criterion: 'AICc', 'AIC', 'BIC', 'CV'
|
||||
search : string
|
||||
bw search method: 'golden', 'interval'
|
||||
bw_min : float
|
||||
min value used in bandwidth search
|
||||
bw_max : float
|
||||
max value used in bandwidth search
|
||||
interval : float
|
||||
interval increment used in interval search
|
||||
tol : float
|
||||
tolerance used to determine convergence
|
||||
max_iter : integer
|
||||
max interations if no convergence to tol
|
||||
fb : True for flexible (mutliple covaraite-specific) bandwidths
|
||||
False for a traditional (same for all covariates)
|
||||
bandwdith; defualt is False.
|
||||
constant : boolean
|
||||
True to include intercept (default) in model and False to exclude
|
||||
intercept.
|
||||
"""
|
||||
def __init__(self, coords, y, x_loc, x_glob=None, family=Gaussian(),
|
||||
offset=None, kernel='bisquare', fixed=False, fb=False, constant=True):
|
||||
self.coords = coords
|
||||
self.y = y
|
||||
self.x_loc = x_loc
|
||||
if x_glob is not None:
|
||||
self.x_glob = x_glob
|
||||
else:
|
||||
self.x_glob = []
|
||||
self.family=family
|
||||
self.fixed = fixed
|
||||
self.kernel = kernel
|
||||
if offset is None:
|
||||
self.offset = np.ones((len(y), 1))
|
||||
else:
|
||||
self.offset = offset * 1.0
|
||||
self.fb = fb
|
||||
self.constant = constant
|
||||
|
||||
def search(self, search='golden_section', criterion='AICc', bw_min=0.0,
|
||||
bw_max=0.0, interval=0.0, tol=1.0e-6, max_iter=200, init_fb=True,
|
||||
tol_fb=1.0e-5, rss_score=False, max_iter_fb=200):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
criterion : string
|
||||
bw selection criterion: 'AICc', 'AIC', 'BIC', 'CV'
|
||||
search : string
|
||||
bw search method: 'golden', 'interval'
|
||||
bw_min : float
|
||||
min value used in bandwidth search
|
||||
bw_max : float
|
||||
max value used in bandwidth search
|
||||
interval : float
|
||||
interval increment used in interval search
|
||||
tol : float
|
||||
tolerance used to determine convergence
|
||||
max_iter : integer
|
||||
max iterations if no convergence to tol
|
||||
init_fb : True to initialize flexible bandwidth search with
|
||||
esitmates from a traditional GWR and False to
|
||||
initialize flexible bandwidth search with global
|
||||
regression estimates
|
||||
tol_fb : convergence tolerence for the flexible bandwidth
|
||||
backfitting algorithm; a larger tolerance may stop the
|
||||
algorith faster though it may result in a less optimal
|
||||
model
|
||||
max_iter_fb : max iterations if no convergence to tol for flexible
|
||||
bandwidth backfittign algorithm
|
||||
rss_score : True to use the residual sum of sqaures to evaluate
|
||||
each iteration of the flexible bandwidth backfitting
|
||||
routine and False to use a smooth function; default is
|
||||
False
|
||||
|
||||
Returns
|
||||
-------
|
||||
bw : scalar or array
|
||||
optimal bandwidth value or values; returns scalar for
|
||||
fb=False and array for fb=True; ordering of bandwidths
|
||||
matches the ordering of the covariates (columns) of the
|
||||
designs matrix, X
|
||||
"""
|
||||
self.search = search
|
||||
self.criterion = criterion
|
||||
self.bw_min = bw_min
|
||||
self.bw_max = bw_max
|
||||
self.interval = interval
|
||||
self.tol = tol
|
||||
self.max_iter = max_iter
|
||||
self.init_fb = init_fb
|
||||
self.tol_fb = tol_fb
|
||||
self.rss_score = rss_score
|
||||
self.max_iter_fb = max_iter_fb
|
||||
|
||||
|
||||
if self.fixed:
|
||||
if self.kernel == 'gaussian':
|
||||
ktype = 1
|
||||
elif self.kernel == 'bisquare':
|
||||
ktype = 3
|
||||
elif self.kernel == 'exponential':
|
||||
ktype = 5
|
||||
else:
|
||||
raise TypeError('Unsupported kernel function ', self.kernel)
|
||||
else:
|
||||
if self.kernel == 'gaussian':
|
||||
ktype = 2
|
||||
elif self.kernel == 'bisquare':
|
||||
ktype = 4
|
||||
elif self.kernel == 'exponential':
|
||||
ktype = 6
|
||||
else:
|
||||
raise TypeError('Unsupported kernel function ', self.kernel)
|
||||
|
||||
function = lambda bw: getDiag[criterion](
|
||||
GWR(self.coords, self.y, self.x_loc, bw, family=self.family,
|
||||
kernel=self.kernel, fixed=self.fixed, offset=self.offset).fit())
|
||||
|
||||
if ktype % 2 == 0:
|
||||
int_score = True
|
||||
else:
|
||||
int_score = False
|
||||
self.int_score = int_score
|
||||
|
||||
if self.fb:
|
||||
self._fbw()
|
||||
print self.bw[1]
|
||||
self.XB = self.bw[4]
|
||||
self.err = self.bw[5]
|
||||
else:
|
||||
self._bw()
|
||||
|
||||
return self.bw[0]
|
||||
|
||||
def _bw(self):
|
||||
gwr_func = lambda bw: getDiag[self.criterion](
|
||||
GWR(self.coords, self.y, self.x_loc, bw, family=self.family,
|
||||
kernel=self.kernel, fixed=self.fixed, constant=self.constant).fit())
|
||||
if self.search == 'golden_section':
|
||||
a,c = self._init_section(self.x_glob, self.x_loc, self.coords,
|
||||
self.constant)
|
||||
delta = 0.38197 #1 - (np.sqrt(5.0)-1.0)/2.0
|
||||
self.bw = golden_section(a, c, delta, gwr_func, self.tol,
|
||||
self.max_iter, self.int_score)
|
||||
elif self.search == 'interval':
|
||||
self.bw = equal_interval(self.bw_min, self.bw_max, self.interval,
|
||||
gwr_func, self.int_score)
|
||||
else:
|
||||
raise TypeError('Unsupported computational search method ', search)
|
||||
|
||||
def _fbw(self):
|
||||
y = self.y
|
||||
if self.constant:
|
||||
X = USER.check_constant(self.x_loc)
|
||||
else:
|
||||
X = self.x_loc
|
||||
n, k = X.shape
|
||||
family = self.family
|
||||
offset = self.offset
|
||||
kernel = self.kernel
|
||||
fixed = self.fixed
|
||||
coords = self.coords
|
||||
search = self.search
|
||||
criterion = self.criterion
|
||||
bw_min = self.bw_min
|
||||
bw_max = self.bw_max
|
||||
interval = self.interval
|
||||
tol = self.tol
|
||||
max_iter = self.max_iter
|
||||
gwr_func = lambda y, X, bw: GWR(coords, y, X, bw, family=family,
|
||||
kernel=kernel, fixed=fixed, offset=offset, constant=False).fit()
|
||||
bw_func = lambda y, X: Sel_BW(coords, y, X, x_glob=[], family=family,
|
||||
kernel=kernel, fixed=fixed, offset=offset, constant=False)
|
||||
sel_func = lambda bw_func: bw_func.search(search=search,
|
||||
criterion=criterion, bw_min=bw_min, bw_max=bw_max,
|
||||
interval=interval, tol=tol, max_iter=max_iter)
|
||||
self.bw = flexible_bw(self.init_fb, y, X, n, k, family, self.tol_fb,
|
||||
self.max_iter_fb, self.rss_score, gwr_func, bw_func, sel_func)
|
||||
|
||||
|
||||
|
||||
def _init_section(self, x_glob, x_loc, coords, constant):
|
||||
if len(x_glob) > 0:
|
||||
n_glob = x_glob.shape[1]
|
||||
else:
|
||||
n_glob = 0
|
||||
if len(x_loc) > 0:
|
||||
n_loc = x_loc.shape[1]
|
||||
else:
|
||||
n_loc = 0
|
||||
if constant:
|
||||
n_vars = n_glob + n_loc + 1
|
||||
else:
|
||||
n_vars = n_glob + n_loc
|
||||
n = np.array(coords).shape[0]
|
||||
|
||||
if self.int_score:
|
||||
a = 40 + 2 * n_vars
|
||||
c = n
|
||||
else:
|
||||
nn = 40 + 2 * n_vars
|
||||
sq_dists = squareform(pdist(coords))
|
||||
sort_dists = np.sort(sq_dists, axis=1)
|
||||
min_dists = sort_dists[:,nn-1]
|
||||
max_dists = sort_dists[:,-1]
|
||||
a = np.min(min_dists)/2.0
|
||||
c = np.max(max_dists)/2.0
|
||||
|
||||
if a < self.bw_min:
|
||||
a = self.bw_min
|
||||
if c > self.bw_max and self.bw_max > 0:
|
||||
c = self.bw_max
|
||||
return a, c
|
@ -0,0 +1,853 @@
|
||||
"""
|
||||
GWR is tested against results from GWR4
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import pickle as pk
|
||||
from crankshaft.regression.gwr.gwr import GWR, FBGWR
|
||||
from crankshaft.regression.gwr.sel_bw import Sel_BW
|
||||
from crankshaft.regression.gwr.diagnostics import get_AICc, get_AIC, get_BIC, get_CV
|
||||
from crankshaft.regression.glm.family import Gaussian, Poisson, Binomial
|
||||
import numpy as np
|
||||
import pysal
|
||||
|
||||
class TestGWRGaussian(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
||||
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
||||
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
||||
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
||||
black = np.array(data.by_col('PctBlack')).reshape((-1,1))
|
||||
self.X = np.hstack([rural, pov, black])
|
||||
self.BS_F = pysal.open(pysal.examples.get_path('georgia_BS_F_listwise.csv'))
|
||||
self.BS_NN = pysal.open(pysal.examples.get_path('georgia_BS_NN_listwise.csv'))
|
||||
self.GS_F = pysal.open(pysal.examples.get_path('georgia_GS_F_listwise.csv'))
|
||||
self.GS_NN = pysal.open(pysal.examples.get_path('georgia_GS_NN_listwise.csv'))
|
||||
self.FB = pk.load(open(pysal.examples.get_path('FB.p'), 'r'))
|
||||
self.XB = pk.load(open(pysal.examples.get_path('XB.p'), 'r'))
|
||||
self.err = pk.load(open(pysal.examples.get_path('err.p'), 'r'))
|
||||
|
||||
def test_BS_F(self):
|
||||
est_Int = self.BS_F.by_col(' est_Intercept')
|
||||
se_Int = self.BS_F.by_col(' se_Intercept')
|
||||
t_Int = self.BS_F.by_col(' t_Intercept')
|
||||
est_rural = self.BS_F.by_col(' est_PctRural')
|
||||
se_rural = self.BS_F.by_col(' se_PctRural')
|
||||
t_rural = self.BS_F.by_col(' t_PctRural')
|
||||
est_pov = self.BS_F.by_col(' est_PctPov')
|
||||
se_pov = self.BS_F.by_col(' se_PctPov')
|
||||
t_pov = self.BS_F.by_col(' t_PctPov')
|
||||
est_black = self.BS_F.by_col(' est_PctBlack')
|
||||
se_black = self.BS_F.by_col(' se_PctBlack')
|
||||
t_black = self.BS_F.by_col(' t_PctBlack')
|
||||
yhat = self.BS_F.by_col(' yhat')
|
||||
res = np.array(self.BS_F.by_col(' residual'))
|
||||
std_res = np.array(self.BS_F.by_col(' std_residual')).reshape((-1,1))
|
||||
localR2 = np.array(self.BS_F.by_col(' localR2')).reshape((-1,1))
|
||||
inf = np.array(self.BS_F.by_col(' influence')).reshape((-1,1))
|
||||
cooksD = np.array(self.BS_F.by_col(' CooksD')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=209267.689, fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 894.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 890.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 944.0)
|
||||
self.assertAlmostEquals(np.round(CV,2), 18.25)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
|
||||
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
|
||||
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
|
||||
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
|
||||
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
|
||||
|
||||
def test_BS_NN(self):
|
||||
est_Int = self.BS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.BS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.BS_NN.by_col(' t_Intercept')
|
||||
est_rural = self.BS_NN.by_col(' est_PctRural')
|
||||
se_rural = self.BS_NN.by_col(' se_PctRural')
|
||||
t_rural = self.BS_NN.by_col(' t_PctRural')
|
||||
est_pov = self.BS_NN.by_col(' est_PctPov')
|
||||
se_pov = self.BS_NN.by_col(' se_PctPov')
|
||||
t_pov = self.BS_NN.by_col(' t_PctPov')
|
||||
est_black = self.BS_NN.by_col(' est_PctBlack')
|
||||
se_black = self.BS_NN.by_col(' se_PctBlack')
|
||||
t_black = self.BS_NN.by_col(' t_PctBlack')
|
||||
yhat = self.BS_NN.by_col(' yhat')
|
||||
res = np.array(self.BS_NN.by_col(' residual'))
|
||||
std_res = np.array(self.BS_NN.by_col(' std_residual')).reshape((-1,1))
|
||||
localR2 = np.array(self.BS_NN.by_col(' localR2')).reshape((-1,1))
|
||||
inf = np.array(self.BS_NN.by_col(' influence')).reshape((-1,1))
|
||||
cooksD = np.array(self.BS_NN.by_col(' CooksD')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=90.000, fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 896.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 892.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 941.0)
|
||||
self.assertAlmostEquals(np.around(CV, 2), 19.19)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
|
||||
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
|
||||
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
|
||||
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
|
||||
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
|
||||
|
||||
def test_GS_F(self):
|
||||
est_Int = self.GS_F.by_col(' est_Intercept')
|
||||
se_Int = self.GS_F.by_col(' se_Intercept')
|
||||
t_Int = self.GS_F.by_col(' t_Intercept')
|
||||
est_rural = self.GS_F.by_col(' est_PctRural')
|
||||
se_rural = self.GS_F.by_col(' se_PctRural')
|
||||
t_rural = self.GS_F.by_col(' t_PctRural')
|
||||
est_pov = self.GS_F.by_col(' est_PctPov')
|
||||
se_pov = self.GS_F.by_col(' se_PctPov')
|
||||
t_pov = self.GS_F.by_col(' t_PctPov')
|
||||
est_black = self.GS_F.by_col(' est_PctBlack')
|
||||
se_black = self.GS_F.by_col(' se_PctBlack')
|
||||
t_black = self.GS_F.by_col(' t_PctBlack')
|
||||
yhat = self.GS_F.by_col(' yhat')
|
||||
res = np.array(self.GS_F.by_col(' residual'))
|
||||
std_res = np.array(self.GS_F.by_col(' std_residual')).reshape((-1,1))
|
||||
localR2 = np.array(self.GS_F.by_col(' localR2')).reshape((-1,1))
|
||||
inf = np.array(self.GS_F.by_col(' influence')).reshape((-1,1))
|
||||
cooksD = np.array(self.GS_F.by_col(' CooksD')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=87308.298,
|
||||
kernel='gaussian', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 895.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 890.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 943.0)
|
||||
self.assertAlmostEquals(np.around(CV, 2), 18.21)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
|
||||
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
|
||||
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
|
||||
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
|
||||
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
|
||||
|
||||
def test_GS_NN(self):
|
||||
est_Int = self.GS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.GS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.GS_NN.by_col(' t_Intercept')
|
||||
est_rural = self.GS_NN.by_col(' est_PctRural')
|
||||
se_rural = self.GS_NN.by_col(' se_PctRural')
|
||||
t_rural = self.GS_NN.by_col(' t_PctRural')
|
||||
est_pov = self.GS_NN.by_col(' est_PctPov')
|
||||
se_pov = self.GS_NN.by_col(' se_PctPov')
|
||||
t_pov = self.GS_NN.by_col(' t_PctPov')
|
||||
est_black = self.GS_NN.by_col(' est_PctBlack')
|
||||
se_black = self.GS_NN.by_col(' se_PctBlack')
|
||||
t_black = self.GS_NN.by_col(' t_PctBlack')
|
||||
yhat = self.GS_NN.by_col(' yhat')
|
||||
res = np.array(self.GS_NN.by_col(' residual'))
|
||||
std_res = np.array(self.GS_NN.by_col(' std_residual')).reshape((-1,1))
|
||||
localR2 = np.array(self.GS_NN.by_col(' localR2')).reshape((-1,1))
|
||||
inf = np.array(self.GS_NN.by_col(' influence')).reshape((-1,1))
|
||||
cooksD = np.array(self.GS_NN.by_col(' CooksD')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=49.000,
|
||||
kernel='gaussian', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
CV = get_CV(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 896)
|
||||
self.assertAlmostEquals(np.floor(AIC), 894.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 922.0)
|
||||
self.assertAlmostEquals(np.around(CV, 2), 17.91)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
|
||||
np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
|
||||
np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
|
||||
np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
|
||||
np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
|
||||
|
||||
def test_FBGWR(self):
|
||||
model = FBGWR(self.coords, self.y, self.X, [157.0, 65.0, 52.0],
|
||||
XB=self.XB, err=self.err, constant=False)
|
||||
rslt = model.fit()
|
||||
|
||||
np.testing.assert_allclose(rslt.predy, self.FB['predy'], atol=1e-07)
|
||||
np.testing.assert_allclose(rslt.params, self.FB['params'], atol=1e-07)
|
||||
np.testing.assert_allclose(rslt.resid_response, self.FB['u'], atol=1e-05)
|
||||
np.testing.assert_almost_equal(rslt.resid_ss, 6339.3497144025841)
|
||||
|
||||
def test_Prediction(self):
|
||||
coords =np.array(self.coords)
|
||||
index = np.arange(len(self.y))
|
||||
#train = index[0:-10]
|
||||
test = index[-10:]
|
||||
|
||||
#y_train = self.y[train]
|
||||
#X_train = self.X[train]
|
||||
#coords_train = list(coords[train])
|
||||
|
||||
#y_test = self.y[test]
|
||||
X_test = self.X[test]
|
||||
coords_test = list(coords[test])
|
||||
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, 93, family=Gaussian(),
|
||||
fixed=False, kernel='bisquare')
|
||||
results = model.predict(coords_test, X_test)
|
||||
|
||||
params = np.array([22.77198, -0.10254, -0.215093, -0.01405,
|
||||
19.10531, -0.094177, -0.232529, 0.071913,
|
||||
19.743421, -0.080447, -0.30893, 0.083206,
|
||||
17.505759, -0.078919, -0.187955, 0.051719,
|
||||
27.747402, -0.165335, -0.208553, 0.004067,
|
||||
26.210627, -0.138398, -0.360514, 0.072199,
|
||||
18.034833, -0.077047, -0.260556, 0.084319,
|
||||
28.452802, -0.163408, -0.14097, -0.063076,
|
||||
22.353095, -0.103046, -0.226654, 0.002992,
|
||||
18.220508, -0.074034, -0.309812, 0.108636]).reshape((10,4))
|
||||
np.testing.assert_allclose(params, results.params, rtol=1e-03)
|
||||
|
||||
bse = np.array([2.080166, 0.021462, 0.102954, 0.049627,
|
||||
2.536355, 0.022111, 0.123857, 0.051917,
|
||||
1.967813, 0.019716, 0.102562, 0.054918,
|
||||
2.463219, 0.021745, 0.110297, 0.044189,
|
||||
1.556056, 0.019513, 0.12764, 0.040315,
|
||||
1.664108, 0.020114, 0.131208, 0.041613,
|
||||
2.5835, 0.021481, 0.113158, 0.047243,
|
||||
1.709483, 0.019752, 0.116944, 0.043636,
|
||||
1.958233, 0.020947, 0.09974, 0.049821,
|
||||
2.276849, 0.020122, 0.107867, 0.047842]).reshape((10,4))
|
||||
np.testing.assert_allclose(bse, results.bse, rtol=1e-03)
|
||||
|
||||
tvalues = np.array([10.947193, -4.777659, -2.089223, -0.283103,
|
||||
7.532584, -4.259179, -1.877395, 1.385161,
|
||||
10.033179, -4.080362, -3.012133, 1.515096,
|
||||
7.106862, -3.629311, -1.704079, 1.17042,
|
||||
17.831878, -8.473156, -1.633924, 0.100891,
|
||||
15.750552, -6.880725, -2.74765, 1.734978,
|
||||
6.980774, -3.586757, -2.302575, 1.784818,
|
||||
16.644095, -8.273001, -1.205451, -1.445501,
|
||||
11.414933, -4.919384, -2.272458, 0.060064,
|
||||
8.00251, -3.679274, -2.872176, 2.270738]).reshape((10,4))
|
||||
np.testing.assert_allclose(tvalues, results.tvalues, rtol=1e-03)
|
||||
|
||||
localR2 = np.array([[ 0.53068693],
|
||||
[ 0.59582647],
|
||||
[ 0.59700925],
|
||||
[ 0.45769954],
|
||||
[ 0.54634509],
|
||||
[ 0.5494828 ],
|
||||
[ 0.55159604],
|
||||
[ 0.55634237],
|
||||
[ 0.53903842],
|
||||
[ 0.55884954]])
|
||||
np.testing.assert_allclose(localR2, results.localR2, rtol=1e-05)
|
||||
|
||||
class TestGWRPoisson(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('Tokyomortality.csv'), mode='Ur')
|
||||
self.coords = zip(data.by_col('X_CENTROID'), data.by_col('Y_CENTROID'))
|
||||
self.y = np.array(data.by_col('db2564')).reshape((-1,1))
|
||||
self.off = np.array(data.by_col('eb2564')).reshape((-1,1))
|
||||
OCC = np.array(data.by_col('OCC_TEC')).reshape((-1,1))
|
||||
OWN = np.array(data.by_col('OWNH')).reshape((-1,1))
|
||||
POP = np.array(data.by_col('POP65')).reshape((-1,1))
|
||||
UNEMP = np.array(data.by_col('UNEMP')).reshape((-1,1))
|
||||
self.X = np.hstack([OCC,OWN,POP,UNEMP])
|
||||
self.BS_F = pysal.open(pysal.examples.get_path('tokyo_BS_F_listwise.csv'))
|
||||
self.BS_NN = pysal.open(pysal.examples.get_path('tokyo_BS_NN_listwise.csv'))
|
||||
self.GS_F = pysal.open(pysal.examples.get_path('tokyo_GS_F_listwise.csv'))
|
||||
self.GS_NN = pysal.open(pysal.examples.get_path('tokyo_GS_NN_listwise.csv'))
|
||||
self.BS_NN_OFF = pysal.open(pysal.examples.get_path('tokyo_BS_NN_OFF_listwise.csv'))
|
||||
|
||||
def test_BS_F(self):
|
||||
est_Int = self.BS_F.by_col(' est_Intercept')
|
||||
se_Int = self.BS_F.by_col(' se_Intercept')
|
||||
t_Int = self.BS_F.by_col(' t_Intercept')
|
||||
est_OCC = self.BS_F.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.BS_F.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.BS_F.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.BS_F.by_col(' est_OWNH')
|
||||
se_OWN = self.BS_F.by_col(' se_OWNH')
|
||||
t_OWN = self.BS_F.by_col(' t_OWNH')
|
||||
est_POP = self.BS_F.by_col(' est_POP65')
|
||||
se_POP = self.BS_F.by_col(' se_POP65')
|
||||
t_POP = self.BS_F.by_col(' t_POP65')
|
||||
est_UNEMP = self.BS_F.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.BS_F.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.BS_F.by_col(' t_UNEMP')
|
||||
yhat = self.BS_F.by_col(' yhat')
|
||||
pdev = np.array(self.BS_F.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=26029.625, family=Poisson(),
|
||||
kernel='bisquare', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 13294.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 13247.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 13485.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-05)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-03)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-03)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-03)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-03)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
|
||||
def test_BS_NN(self):
|
||||
est_Int = self.BS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.BS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.BS_NN.by_col(' t_Intercept')
|
||||
est_OCC = self.BS_NN.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.BS_NN.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.BS_NN.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.BS_NN.by_col(' est_OWNH')
|
||||
se_OWN = self.BS_NN.by_col(' se_OWNH')
|
||||
t_OWN = self.BS_NN.by_col(' t_OWNH')
|
||||
est_POP = self.BS_NN.by_col(' est_POP65')
|
||||
se_POP = self.BS_NN.by_col(' se_POP65')
|
||||
t_POP = self.BS_NN.by_col(' t_POP65')
|
||||
est_UNEMP = self.BS_NN.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.BS_NN.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.BS_NN.by_col(' t_UNEMP')
|
||||
yhat = self.BS_NN.by_col(' yhat')
|
||||
pdev = np.array(self.BS_NN.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(),
|
||||
kernel='bisquare', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 13285)
|
||||
self.assertAlmostEquals(np.floor(AIC), 13259.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 13442.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_BS_NN_Offset(self):
|
||||
est_Int = self.BS_NN_OFF.by_col(' est_Intercept')
|
||||
se_Int = self.BS_NN_OFF.by_col(' se_Intercept')
|
||||
t_Int = self.BS_NN_OFF.by_col(' t_Intercept')
|
||||
est_OCC = self.BS_NN_OFF.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.BS_NN_OFF.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.BS_NN_OFF.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.BS_NN_OFF.by_col(' est_OWNH')
|
||||
se_OWN = self.BS_NN_OFF.by_col(' se_OWNH')
|
||||
t_OWN = self.BS_NN_OFF.by_col(' t_OWNH')
|
||||
est_POP = self.BS_NN_OFF.by_col(' est_POP65')
|
||||
se_POP = self.BS_NN_OFF.by_col(' se_POP65')
|
||||
t_POP = self.BS_NN_OFF.by_col(' t_POP65')
|
||||
est_UNEMP = self.BS_NN_OFF.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.BS_NN_OFF.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.BS_NN_OFF.by_col(' t_UNEMP')
|
||||
yhat = self.BS_NN_OFF.by_col(' yhat')
|
||||
pdev = np.array(self.BS_NN_OFF.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=100, offset=self.off, family=Poisson(),
|
||||
kernel='bisquare', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 367.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 361.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 451.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-02,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02, atol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02, atol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02, atol=1e-02)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-03,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02, atol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-01,
|
||||
atol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-03, atol=1e-02)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-04, atol=1e-02)
|
||||
|
||||
def test_GS_F(self):
|
||||
est_Int = self.GS_F.by_col(' est_Intercept')
|
||||
se_Int = self.GS_F.by_col(' se_Intercept')
|
||||
t_Int = self.GS_F.by_col(' t_Intercept')
|
||||
est_OCC = self.GS_F.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.GS_F.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.GS_F.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.GS_F.by_col(' est_OWNH')
|
||||
se_OWN = self.GS_F.by_col(' se_OWNH')
|
||||
t_OWN = self.GS_F.by_col(' t_OWNH')
|
||||
est_POP = self.GS_F.by_col(' est_POP65')
|
||||
se_POP = self.GS_F.by_col(' se_POP65')
|
||||
t_POP = self.GS_F.by_col(' t_POP65')
|
||||
est_UNEMP = self.GS_F.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.GS_F.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.GS_F.by_col(' t_UNEMP')
|
||||
yhat = self.GS_F.by_col(' yhat')
|
||||
pdev = np.array(self.GS_F.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=8764.474, family=Poisson(),
|
||||
kernel='gaussian', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 11283.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 11211.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 11497.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_GS_NN(self):
|
||||
est_Int = self.GS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.GS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.GS_NN.by_col(' t_Intercept')
|
||||
est_OCC = self.GS_NN.by_col(' est_OCC_TEC')
|
||||
se_OCC = self.GS_NN.by_col(' se_OCC_TEC')
|
||||
t_OCC = self.GS_NN.by_col(' t_OCC_TEC')
|
||||
est_OWN = self.GS_NN.by_col(' est_OWNH')
|
||||
se_OWN = self.GS_NN.by_col(' se_OWNH')
|
||||
t_OWN = self.GS_NN.by_col(' t_OWNH')
|
||||
est_POP = self.GS_NN.by_col(' est_POP65')
|
||||
se_POP = self.GS_NN.by_col(' se_POP65')
|
||||
t_POP = self.GS_NN.by_col(' t_POP65')
|
||||
est_UNEMP = self.GS_NN.by_col(' est_UNEMP')
|
||||
se_UNEMP = self.GS_NN.by_col(' se_UNEMP')
|
||||
t_UNEMP = self.GS_NN.by_col(' t_UNEMP')
|
||||
yhat = self.GS_NN.by_col(' yhat')
|
||||
pdev = np.array(self.GS_NN.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(),
|
||||
kernel='gaussian', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 21070.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 21069.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 21111.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
|
||||
np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
|
||||
np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
|
||||
np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
|
||||
np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
class TestGWRBinomial(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('landslides.csv'))
|
||||
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||
self.y = np.array(data.by_col('Landslid')).reshape((-1,1))
|
||||
ELEV = np.array(data.by_col('Elev')).reshape((-1,1))
|
||||
SLOPE = np.array(data.by_col('Slope')).reshape((-1,1))
|
||||
SIN = np.array(data.by_col('SinAspct')).reshape((-1,1))
|
||||
COS = np.array(data.by_col('CosAspct')).reshape((-1,1))
|
||||
SOUTH = np.array(data.by_col('AbsSouth')).reshape((-1,1))
|
||||
DIST = np.array(data.by_col('DistStrm')).reshape((-1,1))
|
||||
self.X = np.hstack([ELEV, SLOPE, SIN, COS, SOUTH, DIST])
|
||||
self.BS_F = pysal.open(pysal.examples.get_path('clearwater_BS_F_listwise.csv'))
|
||||
self.BS_NN = pysal.open(pysal.examples.get_path('clearwater_BS_NN_listwise.csv'))
|
||||
self.GS_F = pysal.open(pysal.examples.get_path('clearwater_GS_F_listwise.csv'))
|
||||
self.GS_NN = pysal.open(pysal.examples.get_path('clearwater_GS_NN_listwise.csv'))
|
||||
|
||||
def test_BS_F(self):
|
||||
est_Int = self.BS_F.by_col(' est_Intercept')
|
||||
se_Int = self.BS_F.by_col(' se_Intercept')
|
||||
t_Int = self.BS_F.by_col(' t_Intercept')
|
||||
est_elev = self.BS_F.by_col(' est_Elev')
|
||||
se_elev = self.BS_F.by_col(' se_Elev')
|
||||
t_elev = self.BS_F.by_col(' t_Elev')
|
||||
est_slope = self.BS_F.by_col(' est_Slope')
|
||||
se_slope = self.BS_F.by_col(' se_Slope')
|
||||
t_slope = self.BS_F.by_col(' t_Slope')
|
||||
est_sin = self.BS_F.by_col(' est_SinAspct')
|
||||
se_sin = self.BS_F.by_col(' se_SinAspct')
|
||||
t_sin = self.BS_F.by_col(' t_SinAspct')
|
||||
est_cos = self.BS_F.by_col(' est_CosAspct')
|
||||
se_cos = self.BS_F.by_col(' se_CosAspct')
|
||||
t_cos = self.BS_F.by_col(' t_CosAspct')
|
||||
est_south = self.BS_F.by_col(' est_AbsSouth')
|
||||
se_south = self.BS_F.by_col(' se_AbsSouth')
|
||||
t_south = self.BS_F.by_col(' t_AbsSouth')
|
||||
est_strm = self.BS_F.by_col(' est_DistStrm')
|
||||
se_strm = self.BS_F.by_col(' se_DistStrm')
|
||||
t_strm = self.BS_F.by_col(' t_DistStrm')
|
||||
yhat = self.BS_F.by_col(' yhat')
|
||||
pdev = np.array(self.BS_F.by_col(' localpdev')).reshape((-1,1))
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=19642.170, family=Binomial(),
|
||||
kernel='bisquare', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 275.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 271.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 349.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
|
||||
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
|
||||
#This test fails - likely due to compound rounding errors
|
||||
#Has been tested using statsmodels.family calculations and
|
||||
#code from Jing's python version, which both yield the same
|
||||
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_BS_NN(self):
|
||||
est_Int = self.BS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.BS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.BS_NN.by_col(' t_Intercept')
|
||||
est_elev = self.BS_NN.by_col(' est_Elev')
|
||||
se_elev = self.BS_NN.by_col(' se_Elev')
|
||||
t_elev = self.BS_NN.by_col(' t_Elev')
|
||||
est_slope = self.BS_NN.by_col(' est_Slope')
|
||||
se_slope = self.BS_NN.by_col(' se_Slope')
|
||||
t_slope = self.BS_NN.by_col(' t_Slope')
|
||||
est_sin = self.BS_NN.by_col(' est_SinAspct')
|
||||
se_sin = self.BS_NN.by_col(' se_SinAspct')
|
||||
t_sin = self.BS_NN.by_col(' t_SinAspct')
|
||||
est_cos = self.BS_NN.by_col(' est_CosAspct')
|
||||
se_cos = self.BS_NN.by_col(' se_CosAspct')
|
||||
t_cos = self.BS_NN.by_col(' t_CosAspct')
|
||||
est_south = self.BS_NN.by_col(' est_AbsSouth')
|
||||
se_south = self.BS_NN.by_col(' se_AbsSouth')
|
||||
t_south = self.BS_NN.by_col(' t_AbsSouth')
|
||||
est_strm = self.BS_NN.by_col(' est_DistStrm')
|
||||
se_strm = self.BS_NN.by_col(' se_DistStrm')
|
||||
t_strm = self.BS_NN.by_col(' t_DistStrm')
|
||||
yhat = self.BS_NN.by_col(' yhat')
|
||||
pdev = self.BS_NN.by_col(' localpdev')
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=158, family=Binomial(),
|
||||
kernel='bisquare', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 277.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 271.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 358.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e03)
|
||||
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
|
||||
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e03)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
|
||||
#This test fails - likely due to compound rounding errors
|
||||
#Has been tested using statsmodels.family calculations and
|
||||
#code from Jing's python version, which both yield the same
|
||||
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_GS_F(self):
|
||||
est_Int = self.GS_F.by_col(' est_Intercept')
|
||||
se_Int = self.GS_F.by_col(' se_Intercept')
|
||||
t_Int = self.GS_F.by_col(' t_Intercept')
|
||||
est_elev = self.GS_F.by_col(' est_Elev')
|
||||
se_elev = self.GS_F.by_col(' se_Elev')
|
||||
t_elev = self.GS_F.by_col(' t_Elev')
|
||||
est_slope = self.GS_F.by_col(' est_Slope')
|
||||
se_slope = self.GS_F.by_col(' se_Slope')
|
||||
t_slope = self.GS_F.by_col(' t_Slope')
|
||||
est_sin = self.GS_F.by_col(' est_SinAspct')
|
||||
se_sin = self.GS_F.by_col(' se_SinAspct')
|
||||
t_sin = self.GS_F.by_col(' t_SinAspct')
|
||||
est_cos = self.GS_F.by_col(' est_CosAspct')
|
||||
se_cos = self.GS_F.by_col(' se_CosAspct')
|
||||
t_cos = self.GS_F.by_col(' t_CosAspct')
|
||||
est_south = self.GS_F.by_col(' est_AbsSouth')
|
||||
se_south = self.GS_F.by_col(' se_AbsSouth')
|
||||
t_south = self.GS_F.by_col(' t_AbsSouth')
|
||||
est_strm = self.GS_F.by_col(' est_DistStrm')
|
||||
se_strm = self.GS_F.by_col(' se_DistStrm')
|
||||
t_strm = self.GS_F.by_col(' t_DistStrm')
|
||||
yhat = self.GS_F.by_col(' yhat')
|
||||
pdev = self.GS_F.by_col(' localpdev')
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=8929.061, family=Binomial(),
|
||||
kernel='gaussian', fixed=True)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 276.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 272.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 341.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
|
||||
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
|
||||
#This test fails - likely due to compound rounding errors
|
||||
#Has been tested using statsmodels.family calculations and
|
||||
#code from Jing's python version, which both yield the same
|
||||
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
def test_GS_NN(self):
|
||||
est_Int = self.GS_NN.by_col(' est_Intercept')
|
||||
se_Int = self.GS_NN.by_col(' se_Intercept')
|
||||
t_Int = self.GS_NN.by_col(' t_Intercept')
|
||||
est_elev = self.GS_NN.by_col(' est_Elev')
|
||||
se_elev = self.GS_NN.by_col(' se_Elev')
|
||||
t_elev = self.GS_NN.by_col(' t_Elev')
|
||||
est_slope = self.GS_NN.by_col(' est_Slope')
|
||||
se_slope = self.GS_NN.by_col(' se_Slope')
|
||||
t_slope = self.GS_NN.by_col(' t_Slope')
|
||||
est_sin = self.GS_NN.by_col(' est_SinAspct')
|
||||
se_sin = self.GS_NN.by_col(' se_SinAspct')
|
||||
t_sin = self.GS_NN.by_col(' t_SinAspct')
|
||||
est_cos = self.GS_NN.by_col(' est_CosAspct')
|
||||
se_cos = self.GS_NN.by_col(' se_CosAspct')
|
||||
t_cos = self.GS_NN.by_col(' t_CosAspct')
|
||||
est_south = self.GS_NN.by_col(' est_AbsSouth')
|
||||
se_south = self.GS_NN.by_col(' se_AbsSouth')
|
||||
t_south = self.GS_NN.by_col(' t_AbsSouth')
|
||||
est_strm = self.GS_NN.by_col(' est_DistStrm')
|
||||
se_strm = self.GS_NN.by_col(' se_DistStrm')
|
||||
t_strm = self.GS_NN.by_col(' t_DistStrm')
|
||||
yhat = self.GS_NN.by_col(' yhat')
|
||||
pdev = self.GS_NN.by_col(' localpdev')
|
||||
|
||||
model = GWR(self.coords, self.y, self.X, bw=64, family=Binomial(),
|
||||
kernel='gaussian', fixed=False)
|
||||
rslt = model.fit()
|
||||
|
||||
AICc = get_AICc(rslt)
|
||||
AIC = get_AIC(rslt)
|
||||
BIC = get_BIC(rslt)
|
||||
|
||||
self.assertAlmostEquals(np.floor(AICc), 276.0)
|
||||
self.assertAlmostEquals(np.floor(AIC), 273.0)
|
||||
self.assertAlmostEquals(np.floor(BIC), 331.0)
|
||||
np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
|
||||
np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
|
||||
np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
|
||||
np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
|
||||
np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
|
||||
np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
|
||||
np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-00)
|
||||
#This test fails - likely due to compound rounding errors
|
||||
#Has been tested using statsmodels.family calculations and
|
||||
#code from Jing's python version, which both yield the same
|
||||
#np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,84 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
import pysal
|
||||
from pysal.contrib.gwr.kernels import *
|
||||
|
||||
PEGP = pysal.examples.get_path
|
||||
|
||||
class TestKernels(unittest.TestCase):
|
||||
def setUp(self):
|
||||
np.random.seed(1234)
|
||||
x = np.arange(1,6)
|
||||
y = np.arange(5,0, -1)
|
||||
np.random.shuffle(x)
|
||||
np.random.shuffle(y)
|
||||
self.coords = np.array(zip(x, y))
|
||||
self.fix_gauss_kern = np.array([
|
||||
[ 1. , 0.38889556, 0.48567179, 0.48567179, 0.89483932],
|
||||
[ 0.38889556, 1. , 0.89483932, 0.64118039, 0.48567179],
|
||||
[ 0.48567179, 0.89483932, 1. , 0.89483932, 0.48567179],
|
||||
[ 0.48567179, 0.64118039, 0.89483932, 1. , 0.38889556],
|
||||
[ 0.89483932, 0.48567179, 0.48567179, 0.38889556, 1. ]])
|
||||
self.adapt_gauss_kern = np.array([
|
||||
[ 1. , 0.52004183, 0.60653072, 0.60653072, 0.92596109],
|
||||
[ 0.34559083, 1. , 0.88249692, 0.60653072, 0.44374738],
|
||||
[ 0.03877423, 0.60653072, 1. , 0.60653072, 0.03877423],
|
||||
[ 0.44374738, 0.60653072, 0.88249692, 1. , 0.34559083],
|
||||
[ 0.92596109, 0.60653072, 0.60653072, 0.52004183, 1. ]])
|
||||
self.fix_bisquare_kern = np.array([
|
||||
[ 1. , 0. , 0. , 0. , 0.60493827],
|
||||
[ 0. , 1. , 0.60493827, 0.01234568, 0. ],
|
||||
[ 0. , 0.60493827, 1. , 0.60493827, 0. ],
|
||||
[ 0. , 0.01234568, 0.60493827, 1. , 0. ],
|
||||
[ 0.60493827, 0. , 0. , 0. , 1. ]])
|
||||
self.adapt_bisquare_kern = np.array([
|
||||
[ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00,
|
||||
3.99999881e-14, 7.15976383e-01],
|
||||
[ 0.00000000e+00, 1.00000000e+00, 5.62500075e-01,
|
||||
3.99999881e-14, 0.00000000e+00],
|
||||
[ 0.00000000e+00, 3.99999881e-14, 1.00000000e+00,
|
||||
3.99999881e-14, 0.00000000e+00],
|
||||
[ 0.00000000e+00, 3.99999881e-14, 5.62500075e-01,
|
||||
1.00000000e+00, 0.00000000e+00],
|
||||
[ 7.15976383e-01, 0.00000000e+00, 3.99999881e-14,
|
||||
0.00000000e+00, 1.00000000e+00]])
|
||||
self.fix_exp_kern = np.array([
|
||||
[ 1. , 0.2529993 , 0.30063739, 0.30063739, 0.62412506],
|
||||
[ 0.2529993 , 1. , 0.62412506, 0.38953209, 0.30063739],
|
||||
[ 0.30063739, 0.62412506, 1. , 0.62412506, 0.30063739],
|
||||
[ 0.30063739, 0.38953209, 0.62412506, 1. , 0.2529993 ],
|
||||
[ 0.62412506, 0.30063739, 0.30063739, 0.2529993 , 1. ]])
|
||||
self.adapt_exp_kern = np.array([
|
||||
[ 1. , 0.31868771, 0.36787948, 0.36787948, 0.67554721],
|
||||
[ 0.23276223, 1. , 0.60653069, 0.36787948, 0.27949951],
|
||||
[ 0.07811997, 0.36787948, 1. , 0.36787948, 0.07811997],
|
||||
[ 0.27949951, 0.36787948, 0.60653069, 1. , 0.23276223],
|
||||
[ 0.67554721, 0.36787948, 0.36787948, 0.31868771, 1. ]])
|
||||
|
||||
def test_fix_gauss(self):
|
||||
kern = fix_gauss(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.fix_gauss_kern)
|
||||
|
||||
def test_adapt_gauss(self):
|
||||
kern = adapt_gauss(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.adapt_gauss_kern)
|
||||
|
||||
def test_fix_biqsquare(self):
|
||||
kern = fix_bisquare(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.fix_bisquare_kern,
|
||||
atol=1e-01)
|
||||
|
||||
def test_adapt_bisqaure(self):
|
||||
kern = adapt_bisquare(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.adapt_bisquare_kern, atol=1e-012)
|
||||
|
||||
def test_fix_exp(self):
|
||||
kern = fix_exp(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.fix_exp_kern)
|
||||
|
||||
def test_adapt_exp(self):
|
||||
kern = adapt_exp(self.coords, 3)
|
||||
np.testing.assert_allclose(kern, self.adapt_exp_kern)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,139 @@
|
||||
|
||||
"""
|
||||
GWR is tested against results from GWR4
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import pickle as pk
|
||||
from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
|
||||
from pysal.contrib.gwr.sel_bw import Sel_BW
|
||||
import numpy as np
|
||||
import pysal
|
||||
|
||||
class TestSelBW(unittest.TestCase):
|
||||
def setUp(self):
|
||||
data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
|
||||
self.coords = zip(data.by_col('X'), data.by_col('Y'))
|
||||
self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
|
||||
rural = np.array(data.by_col('PctRural')).reshape((-1,1))
|
||||
pov = np.array(data.by_col('PctPov')).reshape((-1,1))
|
||||
black = np.array(data.by_col('PctBlack')).reshape((-1,1))
|
||||
self.X = np.hstack([rural, pov, black])
|
||||
self.XB = pk.load(open(pysal.examples.get_path('XB.p'), 'r'))
|
||||
self.err = pk.load(open(pysal.examples.get_path('err.p'), 'r'))
|
||||
|
||||
def test_golden_fixed_AICc(self):
|
||||
bw1 = 211027.34
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
|
||||
fixed=True).search(criterion='AICc')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_adapt_AICc(self):
|
||||
bw1 = 93.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
|
||||
fixed=False).search(criterion='AICc')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_fixed_AIC(self):
|
||||
bw1 = 76169.15
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='AIC')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_adapt_AIC(self):
|
||||
bw1 = 50.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='AIC')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_fixed_BIC(self):
|
||||
bw1 = 279451.43
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='BIC')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_adapt_BIC(self):
|
||||
bw1 = 62.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='BIC')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_fixed_CV(self):
|
||||
bw1 = 130406.67
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='CV')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_golden_adapt_CV(self):
|
||||
bw1 = 68.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='CV')
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_fixed_AICc(self):
|
||||
bw1 = 211025.0#211027.00
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
|
||||
fixed=True).search(criterion='AICc', search='interval', bw_min=211001.,
|
||||
bw_max=211035.0, interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_adapt_AICc(self):
|
||||
bw1 = 93.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
|
||||
fixed=False).search(criterion='AICc', search='interval',
|
||||
bw_min=90.0, bw_max=95.0, interval=1)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_fixed_AIC(self):
|
||||
bw1 = 76175.0#76169.00
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='AIC', search='interval',
|
||||
bw_min=76161.0, bw_max=76175.0, interval=1)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_adapt_AIC(self):
|
||||
bw1 = 40.0#50.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='AIC', search='interval', bw_min=40.0,
|
||||
bw_max=60.0, interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_fixed_BIC(self):
|
||||
bw1 = 279461.0#279451.00
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='BIC', search='interval', bw_min=279441.0,
|
||||
bw_max=279461.0, interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_adapt_BIC(self):
|
||||
bw1 = 62.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='BIC', search='interval',
|
||||
bw_min=52.0, bw_max=72.0, interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_fixed_CV(self):
|
||||
bw1 = 130400.0#130406.00
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=True).search(criterion='CV', search='interval', bw_min=130400.0,
|
||||
bw_max=130410.0, interval=1)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_interval_adapt_CV(self):
|
||||
bw1 = 62.0#68.0
|
||||
bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
|
||||
fixed=False).search(criterion='CV', search='interval', bw_min=60.0,
|
||||
bw_max=76.0 , interval=2)
|
||||
self.assertAlmostEqual(bw1, bw2)
|
||||
|
||||
def test_FBGWR_AIC(self):
|
||||
bw1 = [157.0, 65.0, 52.0]
|
||||
sel = Sel_BW(self.coords, self.y, self.X, fb=True, kernel='bisquare',
|
||||
constant=False)
|
||||
bw2 = sel.search(tol_fb=1e-03)
|
||||
np.testing.assert_allclose(bw1, bw2)
|
||||
np.testing.assert_allclose(sel.XB, self.XB, atol=1e-05)
|
||||
np.testing.assert_allclose(sel.err, self.err, atol=1e-05)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
202
release/python/0.8.1/crankshaft/crankshaft/regression/gwr_cs.py
Normal file
202
release/python/0.8.1/crankshaft/crankshaft/regression/gwr_cs.py
Normal file
@ -0,0 +1,202 @@
|
||||
"""
|
||||
Geographically weighted regression
|
||||
"""
|
||||
import numpy as np
|
||||
from gwr.base.gwr import GWR as PySAL_GWR
|
||||
from gwr.base.sel_bw import Sel_BW
|
||||
import json
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import plpy
|
||||
|
||||
|
||||
class GWR:
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider:
|
||||
self.data_provider = data_provider
|
||||
else:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
|
||||
def gwr(self, subquery, dep_var, ind_vars,
|
||||
bw=None, fixed=False, kernel='bisquare',
|
||||
geom_col='the_geom', id_col='cartodb_id'):
|
||||
"""
|
||||
subquery: 'select * from demographics'
|
||||
dep_var: 'pctbachelor'
|
||||
ind_vars: ['intercept', 'pctpov', 'pctrural', 'pctblack']
|
||||
bw: value of bandwidth, if None then select optimal
|
||||
fixed: False (kNN) or True ('distance')
|
||||
kernel: 'bisquare' (default), or 'exponential', 'gaussian'
|
||||
"""
|
||||
|
||||
params = {'geom_col': geom_col,
|
||||
'id_col': id_col,
|
||||
'subquery': subquery,
|
||||
'dep_var': dep_var,
|
||||
'ind_vars': ind_vars}
|
||||
|
||||
# get data from data provider
|
||||
query_result = self.data_provider.get_gwr(params)
|
||||
|
||||
# exit if data to analyze is empty
|
||||
if len(query_result) == 0:
|
||||
plpy.error('No data passed to analysis or independent variables '
|
||||
'are all null-valued')
|
||||
|
||||
# unique ids and variable names list
|
||||
rowid = np.array(query_result[0]['rowid'], dtype=np.int)
|
||||
|
||||
# x, y are centroids of input geometries
|
||||
x = np.array(query_result[0]['x'], dtype=np.float)
|
||||
y = np.array(query_result[0]['y'], dtype=np.float)
|
||||
coords = zip(x, y)
|
||||
|
||||
# extract dependent variable
|
||||
Y = np.array(query_result[0]['dep_var'], dtype=np.float).reshape((-1, 1))
|
||||
|
||||
n = Y.shape[0]
|
||||
k = len(ind_vars)
|
||||
X = np.zeros((n, k))
|
||||
|
||||
# extract query result
|
||||
for attr in range(0, k):
|
||||
attr_name = 'attr' + str(attr + 1)
|
||||
X[:, attr] = np.array(
|
||||
query_result[0][attr_name], dtype=np.float).flatten()
|
||||
|
||||
# add intercept variable name
|
||||
ind_vars.insert(0, 'intercept')
|
||||
|
||||
# calculate bandwidth if none is supplied
|
||||
if bw is None:
|
||||
bw = Sel_BW(coords, Y, X,
|
||||
fixed=fixed, kernel=kernel).search()
|
||||
model = PySAL_GWR(coords, Y, X, bw,
|
||||
fixed=fixed, kernel=kernel).fit()
|
||||
|
||||
# containers for outputs
|
||||
coeffs = []
|
||||
stand_errs = []
|
||||
t_vals = []
|
||||
filtered_t_vals = []
|
||||
|
||||
# extracted model information
|
||||
c_alpha = model.adj_alpha
|
||||
filtered_t = model.filter_tvals(c_alpha[1])
|
||||
predicted = model.predy.flatten()
|
||||
residuals = model.resid_response
|
||||
r_squared = model.localR2.flatten()
|
||||
bw = np.repeat(float(bw), n)
|
||||
|
||||
# create lists of json objs for model outputs
|
||||
for idx in xrange(n):
|
||||
coeffs.append(json.dumps({var: model.params[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
stand_errs.append(json.dumps({var: model.bse[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
t_vals.append(json.dumps({var: model.tvalues[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
filtered_t_vals.append(
|
||||
json.dumps({var: filtered_t[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
|
||||
return zip(coeffs, stand_errs, t_vals, filtered_t_vals,
|
||||
predicted, residuals, r_squared, bw, rowid)
|
||||
|
||||
def gwr_predict(self, subquery, dep_var, ind_vars,
|
||||
bw=None, fixed=False, kernel='bisquare',
|
||||
geom_col='the_geom', id_col='cartodb_id'):
|
||||
"""
|
||||
subquery: 'select * from demographics'
|
||||
dep_var: 'pctbachelor'
|
||||
ind_vars: ['intercept', 'pctpov', 'pctrural', 'pctblack']
|
||||
bw: value of bandwidth, if None then select optimal
|
||||
fixed: False (kNN) or True ('distance')
|
||||
kernel: 'bisquare' (default), or 'exponential', 'gaussian'
|
||||
"""
|
||||
|
||||
params = {'geom_col': geom_col,
|
||||
'id_col': id_col,
|
||||
'subquery': subquery,
|
||||
'dep_var': dep_var,
|
||||
'ind_vars': ind_vars}
|
||||
|
||||
# get data from data provider
|
||||
query_result = self.data_provider.get_gwr_predict(params)
|
||||
|
||||
# exit if data to analyze is empty
|
||||
if len(query_result) == 0:
|
||||
plpy.error('No data passed to analysis or independent variables '
|
||||
'are all null-valued')
|
||||
|
||||
# unique ids and variable names list
|
||||
rowid = np.array(query_result[0]['rowid'], dtype=np.int)
|
||||
|
||||
x = np.array(query_result[0]['x'], dtype=np.float)
|
||||
y = np.array(query_result[0]['y'], dtype=np.float)
|
||||
coords = np.array(zip(x, y), dtype=np.float)
|
||||
|
||||
# extract dependent variable
|
||||
Y = np.array(query_result[0]['dep_var']).reshape((-1, 1))
|
||||
|
||||
n = Y.shape[0]
|
||||
k = len(ind_vars)
|
||||
X = np.empty((n, k), dtype=np.float)
|
||||
|
||||
for attr in range(0, k):
|
||||
attr_name = 'attr' + str(attr + 1)
|
||||
X[:, attr] = np.array(
|
||||
query_result[0][attr_name], dtype=np.float).flatten()
|
||||
|
||||
# add intercept variable name
|
||||
ind_vars.insert(0, 'intercept')
|
||||
|
||||
# split data into "training" and "test" for predictions
|
||||
# create index to split based on null y values
|
||||
train = np.where(Y != np.array(None))[0]
|
||||
test = np.where(Y == np.array(None))[0]
|
||||
|
||||
# report error if there is no data to predict
|
||||
if len(test) < 1:
|
||||
plpy.error('No rows flagged for prediction: verify that rows '
|
||||
'denoting prediction locations have a dependent '
|
||||
'variable value of `null`')
|
||||
|
||||
# split dependent variable (only need training which is non-Null's)
|
||||
Y_train = Y[train].reshape((-1, 1))
|
||||
Y_train = Y_train.astype(np.float)
|
||||
|
||||
# split coords
|
||||
coords_train = coords[train]
|
||||
coords_test = coords[test]
|
||||
|
||||
# split explanatory variables
|
||||
X_train = X[train]
|
||||
X_test = X[test]
|
||||
|
||||
# calculate bandwidth if none is supplied
|
||||
if bw is None:
|
||||
bw = Sel_BW(coords_train, Y_train, X_train,
|
||||
fixed=fixed, kernel=kernel).search()
|
||||
|
||||
# estimate model and predict at new locations
|
||||
model = PySAL_GWR(coords_train, Y_train, X_train,
|
||||
bw, fixed=fixed,
|
||||
kernel=kernel).predict(coords_test, X_test)
|
||||
|
||||
coeffs = []
|
||||
stand_errs = []
|
||||
t_vals = []
|
||||
r_squared = model.localR2.flatten()
|
||||
predicted = model.predy.flatten()
|
||||
|
||||
m = len(model.predy)
|
||||
for idx in xrange(m):
|
||||
coeffs.append(json.dumps({var: model.params[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
stand_errs.append(json.dumps({var: model.bse[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
t_vals.append(json.dumps({var: model.tvalues[idx, k]
|
||||
for k, var in enumerate(ind_vars)}))
|
||||
|
||||
return zip(coeffs, stand_errs, t_vals,
|
||||
r_squared, predicted, rowid[test])
|
@ -0,0 +1 @@
|
||||
from segmentation import *
|
@ -0,0 +1,176 @@
|
||||
"""
|
||||
Segmentation creation and prediction
|
||||
"""
|
||||
|
||||
import sklearn
|
||||
import numpy as np
|
||||
import plpy
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn import metrics
|
||||
from sklearn.cross_validation import train_test_split
|
||||
|
||||
# Lower level functions
|
||||
#----------------------
|
||||
|
||||
def replace_nan_with_mean(array):
|
||||
"""
|
||||
Input:
|
||||
@param array: an array of floats which may have null-valued entries
|
||||
Output:
|
||||
array with nans filled in with the mean of the dataset
|
||||
"""
|
||||
# returns an array of rows and column indices
|
||||
indices = np.where(np.isnan(array))
|
||||
|
||||
# iterate through entries which have nan values
|
||||
for row, col in zip(*indices):
|
||||
array[row, col] = np.mean(array[~np.isnan(array[:, col]), col])
|
||||
|
||||
return array
|
||||
|
||||
def get_data(variable, feature_columns, query):
|
||||
"""
|
||||
Fetch data from the database, clean, and package into
|
||||
numpy arrays
|
||||
Input:
|
||||
@param variable: name of the target variable
|
||||
@param feature_columns: list of column names
|
||||
@param query: subquery that data is pulled from for the packaging
|
||||
Output:
|
||||
prepared data, packaged into NumPy arrays
|
||||
"""
|
||||
|
||||
columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns])
|
||||
|
||||
try:
|
||||
data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format(
|
||||
variable=variable,
|
||||
columns=columns,
|
||||
query=query))
|
||||
except Exception, e:
|
||||
plpy.error('Failed to access data to build segmentation model: %s' % e)
|
||||
|
||||
# extract target data from plpy object
|
||||
target = np.array(data[0]['target'])
|
||||
|
||||
# put n feature data arrays into an n x m array of arrays
|
||||
features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns])
|
||||
|
||||
return replace_nan_with_mean(target), replace_nan_with_mean(features)
|
||||
|
||||
# High level interface
|
||||
# --------------------
|
||||
|
||||
def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters):
|
||||
"""
|
||||
Version of create_and_predict_segment that works on arrays that come stright form the SQL calling
|
||||
the function.
|
||||
|
||||
Input:
|
||||
@param target: The 1D array of lenth NSamples containing the target variable we want the model to predict
|
||||
@param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model
|
||||
@param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from
|
||||
@param model_parameters: A dictionary containing parameters for the model.
|
||||
"""
|
||||
|
||||
clean_target = replace_nan_with_mean(target)
|
||||
clean_features = replace_nan_with_mean(features)
|
||||
target_features = replace_nan_with_mean(target_features)
|
||||
|
||||
model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2)
|
||||
prediction = model.predict(target_features)
|
||||
accuracy_array = [accuracy]*prediction.shape[0]
|
||||
return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array))
|
||||
|
||||
|
||||
|
||||
def create_and_predict_segment(query, variable, target_query, model_params):
|
||||
"""
|
||||
generate a segment with machine learning
|
||||
Stuart Lynn
|
||||
"""
|
||||
|
||||
## fetch column names
|
||||
try:
|
||||
columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys()
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
## extract column names to be used in building the segmentation model
|
||||
feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator'])
|
||||
## get data from database
|
||||
target, features = get_data(variable, feature_columns, query)
|
||||
|
||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||
cartodb_ids, result = predict_segment(model, feature_columns, target_query)
|
||||
accuracy_array = [accuracy]*result.shape[0]
|
||||
return zip(cartodb_ids, result, accuracy_array)
|
||||
|
||||
|
||||
def train_model(target, features, model_params, test_split):
|
||||
"""
|
||||
Train the Gradient Boosting model on the provided data and calculate the accuracy of the model
|
||||
Input:
|
||||
@param target: 1D Array of the variable that the model is to be trianed to predict
|
||||
@param features: 2D Array NSamples * NFeatures to use in trining the model
|
||||
@param model_params: A dictionary of model parameters, the full specification can be found on the
|
||||
scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html)
|
||||
@parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray
|
||||
"""
|
||||
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split)
|
||||
model = GradientBoostingRegressor(**model_params)
|
||||
model.fit(features_train, target_train)
|
||||
accuracy = calculate_model_accuracy(model, features, target)
|
||||
return model, accuracy
|
||||
|
||||
def calculate_model_accuracy(model, features, target):
|
||||
"""
|
||||
Calculate the mean squared error of the model prediction
|
||||
Input:
|
||||
@param model: model trained from input features
|
||||
@param features: features to make a prediction from
|
||||
@param target: target to compare prediction to
|
||||
Output:
|
||||
mean squared error of the model prection compared to the target
|
||||
"""
|
||||
prediction = model.predict(features)
|
||||
return metrics.mean_squared_error(prediction, target)
|
||||
|
||||
def predict_segment(model, features, target_query):
|
||||
"""
|
||||
Use the provided model to predict the values for the new feature set
|
||||
Input:
|
||||
@param model: The pretrained model
|
||||
@features: A list of features to use in the model prediction (list of column names)
|
||||
@target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it.
|
||||
"""
|
||||
|
||||
batch_size = 1000
|
||||
joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features])
|
||||
|
||||
try:
|
||||
cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format(
|
||||
joined_features=joined_features,
|
||||
target_query=target_query))
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
results = []
|
||||
|
||||
while True:
|
||||
rows = cursor.fetch(batch_size)
|
||||
if not rows:
|
||||
break
|
||||
batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows])
|
||||
|
||||
#Need to fix this. Should be global mean. This will cause weird effects
|
||||
batch = replace_nan_with_mean(batch)
|
||||
prediction = model.predict(batch)
|
||||
results.append(prediction)
|
||||
|
||||
try:
|
||||
cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids']
|
||||
except Exception, e:
|
||||
plpy.error('Failed to build segmentation model: %s' % e)
|
||||
|
||||
return cartodb_ids, np.concatenate(results)
|
@ -0,0 +1,2 @@
|
||||
"""Import all functions from clustering libraries."""
|
||||
from markov import *
|
@ -0,0 +1,194 @@
|
||||
"""
|
||||
Spatial dynamics measurements using Spatial Markov
|
||||
"""
|
||||
|
||||
# TODO: remove all plpy dependencies
|
||||
|
||||
import numpy as np
|
||||
import pysal as ps
|
||||
import plpy
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
|
||||
class Markov(object):
|
||||
def __init__(self, data_provider=None):
|
||||
if data_provider is None:
|
||||
self.data_provider = AnalysisDataProvider()
|
||||
else:
|
||||
self.data_provider = data_provider
|
||||
|
||||
def spatial_trend(self, subquery, time_cols, num_classes=7,
|
||||
w_type='knn', num_ngbrs=5, permutations=0,
|
||||
geom_col='the_geom', id_col='cartodb_id'):
|
||||
"""
|
||||
Predict the trends of a unit based on:
|
||||
1. history of its transitions to different classes (e.g., 1st
|
||||
quantile -> 2nd quantile)
|
||||
2. average class of its neighbors
|
||||
|
||||
Inputs:
|
||||
@param subquery string: e.g., SELECT the_geom, cartodb_id,
|
||||
interesting_time_column FROM table_name
|
||||
@param time_cols list of strings: list of strings of column names
|
||||
@param num_classes (optional): number of classes to break
|
||||
distribution of values into. Currently uses quantile bins.
|
||||
@param w_type string (optional): weight type ('knn' or 'queen')
|
||||
@param num_ngbrs int (optional): number of neighbors (if knn type)
|
||||
@param permutations int (optional): number of permutations for test
|
||||
stats
|
||||
@param geom_col string (optional): name of column which contains
|
||||
the geometries
|
||||
@param id_col string (optional): name of column which has the ids
|
||||
of the table
|
||||
|
||||
Outputs:
|
||||
@param trend_up float: probablity that a geom will move to a higher
|
||||
class
|
||||
@param trend_down float: probablity that a geom will move to a
|
||||
lower class
|
||||
@param trend float: (trend_up - trend_down) / trend_static
|
||||
@param volatility float: a measure of the volatility based on
|
||||
probability stddev(prob array)
|
||||
"""
|
||||
|
||||
if len(time_cols) < 2:
|
||||
plpy.error('More than one time column needs to be passed')
|
||||
|
||||
params = {"id_col": id_col,
|
||||
"time_cols": time_cols,
|
||||
"geom_col": geom_col,
|
||||
"subquery": subquery,
|
||||
"num_ngbrs": num_ngbrs}
|
||||
|
||||
result = self.data_provider.get_markov(w_type, params)
|
||||
|
||||
# build weight
|
||||
weights = pu.get_weight(result, w_type)
|
||||
weights.transform = 'r'
|
||||
|
||||
# prep time data
|
||||
t_data = get_time_data(result, time_cols)
|
||||
|
||||
sp_markov_result = ps.Spatial_Markov(t_data,
|
||||
weights,
|
||||
k=num_classes,
|
||||
fixed=False,
|
||||
permutations=permutations)
|
||||
|
||||
# get lag classes
|
||||
lag_classes = ps.Quantiles(
|
||||
ps.lag_spatial(weights, t_data[:, -1]),
|
||||
k=num_classes).yb
|
||||
|
||||
# look up probablity distribution for each unit according to class and
|
||||
# lag class
|
||||
prob_dist = get_prob_dist(sp_markov_result.P,
|
||||
lag_classes,
|
||||
sp_markov_result.classes[:, -1])
|
||||
|
||||
# find the ups and down and overall distribution of each cell
|
||||
trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
|
||||
|
||||
# output the results
|
||||
return zip(trend, trend_up, trend_down, volatility, weights.id_order)
|
||||
|
||||
|
||||
|
||||
def get_time_data(markov_data, time_cols):
|
||||
"""
|
||||
Extract the time columns and bin appropriately
|
||||
"""
|
||||
num_attrs = len(time_cols)
|
||||
return np.array([[x['attr' + str(i)] for x in markov_data]
|
||||
for i in range(1, num_attrs+1)], dtype=float).transpose()
|
||||
|
||||
|
||||
# not currently used
|
||||
def rebin_data(time_data, num_time_per_bin):
|
||||
"""
|
||||
Convert an n x l matrix into an (n/m) x l matrix where the values are
|
||||
reduced (averaged) for the intervening states:
|
||||
1 2 3 4 1.5 3.5
|
||||
5 6 7 8 -> 5.5 7.5
|
||||
9 8 7 6 8.5 6.5
|
||||
5 4 3 2 4.5 2.5
|
||||
|
||||
if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix.
|
||||
|
||||
This process effectively resamples the data at a longer time span n
|
||||
units longer than the input data.
|
||||
For cases when there is a remainder (remainder(5/3) = 2), the remaining
|
||||
two columns are binned together as the last time period, while the
|
||||
first three are binned together for the first period.
|
||||
|
||||
Input:
|
||||
@param time_data n x l ndarray: measurements of an attribute at
|
||||
different time intervals
|
||||
@param num_time_per_bin int: number of columns to average into a new
|
||||
column
|
||||
Output:
|
||||
ceil(n / m) x l ndarray of resampled time series
|
||||
"""
|
||||
|
||||
if time_data.shape[1] % num_time_per_bin == 0:
|
||||
# if fit is perfect, then use it
|
||||
n_max = time_data.shape[1] / num_time_per_bin
|
||||
else:
|
||||
# fit remainders into an additional column
|
||||
n_max = time_data.shape[1] / num_time_per_bin + 1
|
||||
|
||||
return np.array(
|
||||
[time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
|
||||
for i in range(n_max)]).T
|
||||
|
||||
|
||||
def get_prob_dist(transition_matrix, lag_indices, unit_indices):
|
||||
"""
|
||||
Given an array of transition matrices, look up the probability
|
||||
associated with the arrangements passed
|
||||
|
||||
Input:
|
||||
@param transition_matrix ndarray[k,k,k]:
|
||||
@param lag_indices ndarray:
|
||||
@param unit_indices ndarray:
|
||||
|
||||
Output:
|
||||
Array of probability distributions
|
||||
"""
|
||||
|
||||
return np.array([transition_matrix[(lag_indices[i], unit_indices[i])]
|
||||
for i in range(len(lag_indices))])
|
||||
|
||||
|
||||
def get_prob_stats(prob_dist, unit_indices):
|
||||
"""
|
||||
get the statistics of the probability distributions
|
||||
|
||||
Outputs:
|
||||
@param trend_up ndarray(float): sum of probabilities for upward
|
||||
movement (relative to the unit index of that prob)
|
||||
@param trend_down ndarray(float): sum of probabilities for downward
|
||||
movement (relative to the unit index of that prob)
|
||||
@param trend ndarray(float): difference of upward and downward
|
||||
movements
|
||||
"""
|
||||
|
||||
num_elements = len(unit_indices)
|
||||
trend_up = np.empty(num_elements, dtype=float)
|
||||
trend_down = np.empty(num_elements, dtype=float)
|
||||
trend = np.empty(num_elements, dtype=float)
|
||||
|
||||
for i in range(num_elements):
|
||||
trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum()
|
||||
trend_down[i] = prob_dist[i, :unit_indices[i]].sum()
|
||||
if prob_dist[i, unit_indices[i]] > 0.0:
|
||||
trend[i] = (trend_up[i] - trend_down[i]) / (
|
||||
prob_dist[i, unit_indices[i]])
|
||||
else:
|
||||
trend[i] = None
|
||||
|
||||
# calculate volatility of distribution
|
||||
volatility = prob_dist.std(axis=1)
|
||||
|
||||
return trend_up, trend_down, trend, volatility
|
5
release/python/0.8.1/crankshaft/requirements.txt
Normal file
5
release/python/0.8.1/crankshaft/requirements.txt
Normal file
@ -0,0 +1,5 @@
|
||||
joblib==0.8.3
|
||||
numpy==1.6.1
|
||||
scipy==0.14.0
|
||||
pysal==1.14.3
|
||||
scikit-learn==0.14.1
|
49
release/python/0.8.1/crankshaft/setup.py
Normal file
49
release/python/0.8.1/crankshaft/setup.py
Normal file
@ -0,0 +1,49 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.0',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.14.3', 'scikit-learn==0.14.1'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
49
release/python/0.8.1/crankshaft/setup.py-r
Normal file
49
release/python/0.8.1/crankshaft/setup.py-r
Normal file
@ -0,0 +1,49 @@
|
||||
|
||||
"""
|
||||
CartoDB Spatial Analysis Python Library
|
||||
See:
|
||||
https://github.com/CartoDB/crankshaft
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='crankshaft',
|
||||
|
||||
version='0.0.0',
|
||||
|
||||
description='CartoDB Spatial Analysis Python Library',
|
||||
|
||||
url='https://github.com/CartoDB/crankshaft',
|
||||
|
||||
author='Data Services Team - CartoDB',
|
||||
author_email='dataservices@cartodb.com',
|
||||
|
||||
license='MIT',
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Mapping comunity',
|
||||
'Topic :: Maps :: Mapping Tools',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
],
|
||||
|
||||
keywords='maps mapping tools spatial analysis geostatistics',
|
||||
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
|
||||
extras_require={
|
||||
'dev': ['unittest'],
|
||||
'test': ['unittest', 'nose', 'mock'],
|
||||
},
|
||||
|
||||
# The choice of component versions is dictated by what's
|
||||
# provisioned in the production servers.
|
||||
# IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
|
||||
install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.14.3', 'scikit-learn==0.14.1'],
|
||||
|
||||
requires=['pysal', 'numpy', 'sklearn'],
|
||||
|
||||
test_suite='test'
|
||||
)
|
1
release/python/0.8.1/crankshaft/test/fixtures/getis.json
vendored
Normal file
1
release/python/0.8.1/crankshaft/test/fixtures/getis.json
vendored
Normal file
@ -0,0 +1 @@
|
||||
[[0.004793783909323601, 0.17999999999999999, 0.49808756424021061], [-1.0701189472090842, 0.079000000000000001, 0.14228288580832316], [-0.67867750971877305, 0.42099999999999999, 0.24867110969448558], [-0.67407386707620487, 0.246, 0.25013217644612995], [-0.79495689068870035, 0.33200000000000002, 0.21331928959090596], [-0.49279481022182703, 0.058999999999999997, 0.31107878905057329], [-0.38075627530057132, 0.28399999999999997, 0.35169205342069643], [-0.86710921611314895, 0.23699999999999999, 0.19294108571294855], [-0.78618647240956485, 0.050000000000000003, 0.2158791250244505], [-0.76108527223116984, 0.064000000000000001, 0.22330306830813684], [-0.13340753531942209, 0.247, 0.44693554317763651], [-0.57584545722033043, 0.48999999999999999, 0.28235982246156488], [-0.78882694661192831, 0.433, 0.2151065788731219], [-0.38769767950046219, 0.375, 0.34911988661484239], [-0.56057819488052207, 0.41399999999999998, 0.28754255985169652], [-0.41354017495644935, 0.45500000000000002, 0.339605447117173], [-0.23993577722243081, 0.49099999999999999, 0.40519002230969337], [-0.1389080156677496, 0.40400000000000003, 0.44476141839645233], [-0.25485737510500855, 0.376, 0.39941662953554224], [-0.71218610582902353, 0.17399999999999999, 0.23817476979886087], [-0.54533105995872144, 0.13700000000000001, 0.2927629228714812], [-0.39547917847510977, 0.033000000000000002, 0.34624464252424236], [-0.43052658996257548, 0.35399999999999998, 0.33340631435564982], [-0.37296719193774736, 0.40300000000000002, 0.35458643102865428], [-0.66482612169465694, 0.31900000000000001, 0.25308085650392698], [-0.13772133540823422, 0.34699999999999998, 0.44523032843016275], [-0.6765304487868502, 0.20999999999999999, 0.24935196033890672], [-0.64518763494323472, 0.32200000000000001, 0.25940279912025543], [-0.5078622084312413, 0.41099999999999998, 0.30577498972600159], [-0.12652006733772059, 0.42899999999999999, 0.44966013262301163], [-0.32691133022814595, 0.498, 0.37186747562269029], [0.25533848511500978, 0.42399999999999999, 0.39923083899077472], [2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577], [-0.1551614486076057, 0.44400000000000001, 0.43834701985429037], [1.9524487722567723, 0.012999999999999999, 0.025442473674991528], [-1.2055816465306763, 0.017000000000000001, 0.11398941970467646], [3.478472976017831, 0.002, 0.00025213964072468009], [-1.4621715757903719, 0.002, 0.071847099325659136], [-0.84010307600180256, 0.085000000000000006, 0.20042529779230778], [5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09], [1.5082367956567375, 0.065000000000000002, 0.065746966514827365], [-0.58337270103430816, 0.44, 0.27982121546450034], [-0.083271860457022437, 0.45100000000000001, 0.46681768733385554], [-0.46872337815000953, 0.34599999999999997, 0.31963368715684204], [0.18490279849545319, 0.23799999999999999, 0.42665263797981101], [3.470424529947997, 0.012, 0.00025981817437825683], [-0.99942612137154796, 0.032000000000000001, 0.15879415560388499], [-1.3650387953594485, 0.034000000000000002, 0.08612042845912049], [1.8617160516432014, 0.081000000000000003, 0.03132156240215267], [1.1321188945775384, 0.11600000000000001, 0.12879222611766061], [0.064116686050580601, 0.27300000000000002, 0.4744386578180424], [-0.42032194540259099, 0.29999999999999999, 0.33712514016213468], [-0.79581215423980922, 0.123, 0.21307061309098785], [-0.42792753720906046, 0.45600000000000002, 0.33435193892883741], [-1.0629378527428395, 0.051999999999999998, 0.14390506780140866], [-0.54164761752225477, 0.33700000000000002, 0.29403064095211839], [1.0934778886820793, 0.13700000000000001, 0.13709201601893539], [-0.094068785378413719, 0.38200000000000001, 0.46252725802998929], [0.13482026574801856, 0.36799999999999999, 0.44637699118865737], [-0.13976995315653129, 0.34699999999999998, 0.44442087706276601], [-0.051047663924746682, 0.32000000000000001, 0.47964376985626245], [-0.21468297736730158, 0.41699999999999998, 0.41500724761906527], [-0.20873154637330626, 0.38800000000000001, 0.41732890604390893], [-0.32427876152583485, 0.49199999999999999, 0.37286349875557478], [-0.65254842943280977, 0.374, 0.25702372075306734], [-0.48611858196118796, 0.23300000000000001, 0.31344154643990074], [-0.14482354344529477, 0.32600000000000001, 0.44242509660469886], [-0.51052030974200002, 0.439, 0.30484349480873729], [0.56814382285283538, 0.14999999999999999, 0.28496865660103166], [0.58680919931668207, 0.161, 0.27866592887231878], [0.013390357044409013, 0.25800000000000001, 0.49465818005865647], [-0.19050728887961568, 0.41399999999999998, 0.4244558160399462], [-0.60531777422216049, 0.35199999999999998, 0.2724839368239631], [1.0899331115425805, 0.127, 0.13787130480311838], [0.17015055382651084, 0.36899999999999999, 0.43244586845546418], [-0.21738337124409801, 0.40600000000000003, 0.41395479459421991], [1.0329303331079593, 0.079000000000000001, 0.15081825117169467], [1.0218317101096221, 0.104, 0.15343027913308094]]
|
1
release/python/0.8.1/crankshaft/test/fixtures/gwr_packed_data.json
vendored
Normal file
1
release/python/0.8.1/crankshaft/test/fixtures/gwr_packed_data.json
vendored
Normal file
File diff suppressed because one or more lines are too long
1
release/python/0.8.1/crankshaft/test/fixtures/gwr_packed_knowns.json
vendored
Normal file
1
release/python/0.8.1/crankshaft/test/fixtures/gwr_packed_knowns.json
vendored
Normal file
File diff suppressed because one or more lines are too long
1
release/python/0.8.1/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
1
release/python/0.8.1/crankshaft/test/fixtures/kmeans.json
vendored
Normal file
@ -0,0 +1 @@
|
||||
[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
|
1
release/python/0.8.1/crankshaft/test/fixtures/markov.json
vendored
Normal file
1
release/python/0.8.1/crankshaft/test/fixtures/markov.json
vendored
Normal file
@ -0,0 +1 @@
|
||||
[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]
|
52
release/python/0.8.1/crankshaft/test/fixtures/moran.json
vendored
Normal file
52
release/python/0.8.1/crankshaft/test/fixtures/moran.json
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
[[0.9319096128346788, "HH"],
|
||||
[-1.135787401862846, "HL"],
|
||||
[0.11732030672508517, "LL"],
|
||||
[0.6152779669180425, "LL"],
|
||||
[-0.14657336660125297, "LH"],
|
||||
[0.6967858120189607, "LL"],
|
||||
[0.07949310115714454, "HH"],
|
||||
[0.4703198759258987, "HH"],
|
||||
[0.4421125200498064, "HH"],
|
||||
[0.5724288737143592, "LL"],
|
||||
[0.8970743435692062, "LL"],
|
||||
[0.18327334401918674, "LL"],
|
||||
[-0.01466729201304962, "HL"],
|
||||
[0.3481559372544409, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329988, "HH"],
|
||||
[0.4373841193538136, "HH"],
|
||||
[0.15971286468915544, "LL"],
|
||||
[1.0543588860308968, "HH"],
|
||||
[1.7372866900020818, "HH"],
|
||||
[1.091998586053999, "LL"],
|
||||
[0.1171572584252222, "HH"],
|
||||
[0.08438455015300014, "LL"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.15482141569329985, "HH"],
|
||||
[1.1627044812890683, "HH"],
|
||||
[0.06547094736902978, "LL"],
|
||||
[0.795275137550483, "HH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.3010757406693439, "LL"],
|
||||
[2.8205795942839376, "HH"],
|
||||
[0.11259190602909264, "LL"],
|
||||
[-0.07116352791516614, "HL"],
|
||||
[-0.09945240794119009, "LH"],
|
||||
[0.18562939195219, "LL"],
|
||||
[0.1832733440191868, "LL"],
|
||||
[-0.39054253768447705, "HL"],
|
||||
[-0.1672071289487642, "HL"],
|
||||
[0.3337669247916343, "HH"],
|
||||
[0.2584386102554792, "HH"],
|
||||
[-0.19733845476322634, "HL"],
|
||||
[-0.9379282899805409, "LH"],
|
||||
[-0.028770969951095866, "LH"],
|
||||
[0.051367269430983485, "LL"],
|
||||
[-0.2172548045913472, "LH"],
|
||||
[0.05136726943098351, "LL"],
|
||||
[0.04191046803899837, "LL"],
|
||||
[0.7482357030403517, "HH"],
|
||||
[-0.014585767863118111, "LH"],
|
||||
[0.5410013139159929, "HH"],
|
||||
[1.0223932668429925, "LL"],
|
||||
[1.4179402898927476, "LL"]]
|
54
release/python/0.8.1/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
54
release/python/0.8.1/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
[
|
||||
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||
]
|
1
release/python/0.8.1/crankshaft/test/fixtures/neighbors_getis.json
vendored
Normal file
1
release/python/0.8.1/crankshaft/test/fixtures/neighbors_getis.json
vendored
Normal file
File diff suppressed because one or more lines are too long
1
release/python/0.8.1/crankshaft/test/fixtures/neighbors_markov.json
vendored
Normal file
1
release/python/0.8.1/crankshaft/test/fixtures/neighbors_markov.json
vendored
Normal file
File diff suppressed because one or more lines are too long
13
release/python/0.8.1/crankshaft/test/helper.py
Normal file
13
release/python/0.8.1/crankshaft/test/helper.py
Normal file
@ -0,0 +1,13 @@
|
||||
import unittest
|
||||
|
||||
from mock_plpy import MockPlPy
|
||||
plpy = MockPlPy()
|
||||
|
||||
import sys
|
||||
sys.modules['plpy'] = plpy
|
||||
|
||||
import os
|
||||
|
||||
def fixture_file(name):
|
||||
dir = os.path.dirname(os.path.realpath(__file__))
|
||||
return os.path.join(dir, 'fixtures', name)
|
57
release/python/0.8.1/crankshaft/test/mock_plpy.py
Normal file
57
release/python/0.8.1/crankshaft/test/mock_plpy.py
Normal file
@ -0,0 +1,57 @@
|
||||
import re
|
||||
|
||||
|
||||
class MockCursor:
|
||||
def __init__(self, data):
|
||||
self.cursor_pos = 0
|
||||
self.data = data
|
||||
|
||||
def fetch(self, batch_size):
|
||||
batch = self.data[self.cursor_pos:self.cursor_pos + batch_size]
|
||||
self.cursor_pos += batch_size
|
||||
return batch
|
||||
|
||||
|
||||
class MockPlPy:
|
||||
def __init__(self):
|
||||
self._reset()
|
||||
|
||||
def _reset(self):
|
||||
self.infos = []
|
||||
self.notices = []
|
||||
self.debugs = []
|
||||
self.logs = []
|
||||
self.warnings = []
|
||||
self.errors = []
|
||||
self.fatals = []
|
||||
self.executes = []
|
||||
self.results = []
|
||||
self.prepares = []
|
||||
self.results = []
|
||||
|
||||
def _define_result(self, query, result):
|
||||
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
|
||||
self.results.append([pattern, result])
|
||||
|
||||
def notice(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def debug(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def info(self, msg):
|
||||
self.infos.append(msg)
|
||||
|
||||
def error(self, msg):
|
||||
self.notices.append(msg)
|
||||
|
||||
def cursor(self, query):
|
||||
data = self.execute(query)
|
||||
return MockCursor(data)
|
||||
|
||||
# TODO: additional arguments
|
||||
def execute(self, query):
|
||||
for result in self.results:
|
||||
if result[0].match(query):
|
||||
return result[1]
|
||||
return []
|
@ -0,0 +1,78 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from helper import fixture_file
|
||||
|
||||
from crankshaft.clustering import Getis
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
# Fixture files produced as follows
|
||||
#
|
||||
# import pysal as ps
|
||||
# import numpy as np
|
||||
# import random
|
||||
#
|
||||
# # setup variables
|
||||
# f = ps.open(ps.examples.get_path("stl_hom.dbf"))
|
||||
# y = np.array(f.by_col['HR8893'])
|
||||
# w_queen = ps.queen_from_shapefile(ps.examples.get_path("stl_hom.shp"))
|
||||
#
|
||||
# out_queen = [{"id": index + 1,
|
||||
# "neighbors": [x+1 for x in w_queen.neighbors[index]],
|
||||
# "value": val} for index, val in enumerate(y)]
|
||||
#
|
||||
# with open('neighbors_queen_getis.json', 'w') as f:
|
||||
# f.write(str(out_queen))
|
||||
#
|
||||
# random.seed(1234)
|
||||
# np.random.seed(1234)
|
||||
# lgstar_queen = ps.esda.getisord.G_Local(y, w_queen, star=True,
|
||||
# permutations=999)
|
||||
#
|
||||
# with open('getis_queen.json', 'w') as f:
|
||||
# f.write(str(zip(lgstar_queen.z_sim,
|
||||
# lgstar_queen.p_sim, lgstar_queen.p_z_sim)))
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mock_data):
|
||||
self.mock_result = mock_data
|
||||
|
||||
def get_getis(self, w_type, param):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class GetisTest(unittest.TestCase):
|
||||
"""Testing class for Getis-Ord's G* funtion
|
||||
This test replicates the work done in PySAL documentation:
|
||||
https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
# load raw data for analysis
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors_getis.json')).read())
|
||||
|
||||
# load pre-computed/known values
|
||||
self.getis_data = json.loads(
|
||||
open(fixture_file('getis.json')).read())
|
||||
|
||||
def test_getis_ord(self):
|
||||
"""Test Getis-Ord's G*"""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
getis = Getis(FakeDataProvider(data))
|
||||
|
||||
result = getis.getis_ord('subquery', 'value',
|
||||
'queen', None, 999, 'the_geom',
|
||||
'cartodb_id')
|
||||
result = [(row[0], row[1]) for row in result]
|
||||
expected = np.array(self.getis_data)[:, 0:2]
|
||||
for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_z, exp_z, delta=1e-2)
|
@ -0,0 +1,87 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
|
||||
from helper import fixture_file
|
||||
from crankshaft.clustering import Kmeans
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import crankshaft.clustering as cc
|
||||
from crankshaft import random_seeds
|
||||
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mocked_result):
|
||||
self.mocked_result = mocked_result
|
||||
|
||||
def get_spatial_kmeans(self, query):
|
||||
return self.mocked_result
|
||||
|
||||
def get_nonspatial_kmeans(self, query):
|
||||
return self.mocked_result
|
||||
|
||||
|
||||
class KMeansTest(unittest.TestCase):
|
||||
"""Testing class for k-means spatial"""
|
||||
|
||||
def setUp(self):
|
||||
self.cluster_data = json.loads(
|
||||
open(fixture_file('kmeans.json')).read())
|
||||
self.params = {"subquery": "select * from table",
|
||||
"no_clusters": "10"}
|
||||
|
||||
def test_kmeans(self):
|
||||
"""
|
||||
"""
|
||||
data = [{'xs': d['xs'],
|
||||
'ys': d['ys'],
|
||||
'ids': d['ids']} for d in self.cluster_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
kmeans = Kmeans(FakeDataProvider(data))
|
||||
clusters = kmeans.spatial('subquery', 2)
|
||||
labels = [a[1] for a in clusters]
|
||||
c1 = [a for a in clusters if a[1] == 0]
|
||||
c2 = [a for a in clusters if a[1] == 1]
|
||||
|
||||
self.assertEqual(len(np.unique(labels)), 2)
|
||||
self.assertEqual(len(c1), 20)
|
||||
self.assertEqual(len(c2), 20)
|
||||
|
||||
|
||||
class KMeansNonspatialTest(unittest.TestCase):
|
||||
"""Testing class for k-means non-spatial"""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"subquery": "SELECT * FROM TABLE",
|
||||
"n_clusters": 5}
|
||||
|
||||
def test_kmeans_nonspatial(self):
|
||||
"""
|
||||
test for k-means non-spatial
|
||||
"""
|
||||
# data from:
|
||||
# http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn-cluster-kmeans
|
||||
data_raw = [OrderedDict([("arr_col1", [1, 1, 1, 4, 4, 4]),
|
||||
("arr_col2", [2, 4, 0, 2, 4, 0]),
|
||||
("rowid", [1, 2, 3, 4, 5, 6])])]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
kmeans = Kmeans(FakeDataProvider(data_raw))
|
||||
clusters = kmeans.nonspatial('subquery', ['col1', 'col2'], 2)
|
||||
|
||||
cl1 = clusters[0][0]
|
||||
cl2 = clusters[3][0]
|
||||
|
||||
for idx, val in enumerate(clusters):
|
||||
if idx < 3:
|
||||
self.assertEqual(val[0], cl1)
|
||||
else:
|
||||
self.assertEqual(val[0], cl2)
|
||||
|
||||
# raises exception for no data
|
||||
with self.assertRaises(Exception):
|
||||
kmeans = Kmeans(FakeDataProvider([]))
|
||||
kmeans.nonspatial('subquery', ['col1', 'col2'], 2)
|
112
release/python/0.8.1/crankshaft/test/test_clustering_moran.py
Normal file
112
release/python/0.8.1/crankshaft/test/test_clustering_moran.py
Normal file
@ -0,0 +1,112 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
from helper import fixture_file
|
||||
from crankshaft.clustering import Moran
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
import json
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mock_data):
|
||||
self.mock_result = mock_data
|
||||
|
||||
def get_moran(self, w_type, params):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class MoranTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"attr1": "andy",
|
||||
"attr2": "jay_z",
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.params_markov = {"id_col": "cartodb_id",
|
||||
"time_cols": ["_2013_dec", "_2014_jan",
|
||||
"_2014_feb"],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors.json')).read())
|
||||
self.moran_data = json.loads(
|
||||
open(fixture_file('moran.json')).read())
|
||||
|
||||
def test_map_quads(self):
|
||||
"""Test map_quads"""
|
||||
from crankshaft.clustering import map_quads
|
||||
self.assertEqual(map_quads(1), 'HH')
|
||||
self.assertEqual(map_quads(2), 'LH')
|
||||
self.assertEqual(map_quads(3), 'LL')
|
||||
self.assertEqual(map_quads(4), 'HL')
|
||||
self.assertEqual(map_quads(33), None)
|
||||
self.assertEqual(map_quads('andy'), None)
|
||||
|
||||
def test_quad_position(self):
|
||||
"""Test lisa_sig_vals"""
|
||||
from crankshaft.clustering import quad_position
|
||||
|
||||
quads = np.array([1, 2, 3, 4], np.int)
|
||||
|
||||
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||
test_ans = quad_position(quads)
|
||||
|
||||
self.assertTrue((test_ans == ans).all())
|
||||
|
||||
def test_local_stat(self):
|
||||
"""Test Moran's I local"""
|
||||
data = [OrderedDict([('id', d['id']),
|
||||
('attr1', d['value']),
|
||||
('neighbors', d['neighbors'])])
|
||||
for d in self.neighbors_data]
|
||||
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
random_seeds.set_random_seeds(1234)
|
||||
result = moran.local_stat('subquery', 'value',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[6]) for row in result]
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
|
||||
for ([res_quad, res_val], [exp_val, exp_quad]) in zipped_values:
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
self.assertEqual(res_quad, exp_quad)
|
||||
|
||||
def test_moran_local_rate(self):
|
||||
"""Test Moran's I rate"""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'attr2': 1,
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
|
||||
random_seeds.set_random_seeds(1234)
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
result = moran.local_rate_stat('subquery', 'numerator', 'denominator',
|
||||
'knn', 5, 99, 'the_geom', 'cartodb_id')
|
||||
result = [(row[0], row[6]) for row in result]
|
||||
|
||||
zipped_values = zip(result, self.moran_data)
|
||||
|
||||
for ([res_quad, res_val], [exp_val, exp_quad]) in zipped_values:
|
||||
self.assertAlmostEqual(res_val, exp_val)
|
||||
|
||||
def test_moran(self):
|
||||
"""Test Moran's I global"""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['value'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
random_seeds.set_random_seeds(1235)
|
||||
moran = Moran(FakeDataProvider(data))
|
||||
result = moran.global_stat('table', 'value',
|
||||
'knn', 5, 99, 'the_geom',
|
||||
'cartodb_id')
|
||||
|
||||
result_moran = result[0][0]
|
||||
expected_moran = np.array([row[0] for row in self.moran_data]).mean()
|
||||
self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
|
83
release/python/0.8.1/crankshaft/test/test_pysal_utils.py
Normal file
83
release/python/0.8.1/crankshaft/test/test_pysal_utils.py
Normal file
@ -0,0 +1,83 @@
|
||||
import unittest
|
||||
|
||||
import crankshaft.pysal_utils as pu
|
||||
from crankshaft import random_seeds
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class PysalUtilsTest(unittest.TestCase):
|
||||
"""Testing class for utility functions related to PySAL integrations"""
|
||||
|
||||
def setUp(self):
|
||||
self.params1 = OrderedDict([("id_col", "cartodb_id"),
|
||||
("attr1", "andy"),
|
||||
("attr2", "jay_z"),
|
||||
("subquery", "SELECT * FROM a_list"),
|
||||
("geom_col", "the_geom"),
|
||||
("num_ngbrs", 321)])
|
||||
|
||||
self.params2 = OrderedDict([("id_col", "cartodb_id"),
|
||||
("numerator", "price"),
|
||||
("denominator", "sq_meters"),
|
||||
("subquery", "SELECT * FROM pecan"),
|
||||
("geom_col", "the_geom"),
|
||||
("num_ngbrs", 321)])
|
||||
|
||||
self.params3 = OrderedDict([("id_col", "cartodb_id"),
|
||||
("numerator", "sq_meters"),
|
||||
("denominator", "price"),
|
||||
("subquery", "SELECT * FROM pecan"),
|
||||
("geom_col", "the_geom"),
|
||||
("num_ngbrs", 321)])
|
||||
|
||||
self.params_array = {"id_col": "cartodb_id",
|
||||
"time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
|
||||
def test_query_attr_select(self):
|
||||
"""Test query_attr_select"""
|
||||
|
||||
ans1 = ("i.\"andy\"::numeric As attr1, "
|
||||
"i.\"jay_z\"::numeric As attr2, ")
|
||||
|
||||
ans2 = ("i.\"price\"::numeric As attr1, "
|
||||
"i.\"sq_meters\"::numeric As attr2, ")
|
||||
|
||||
ans3 = ("i.\"sq_meters\"::numeric As attr1, "
|
||||
"i.\"price\"::numeric As attr2, ")
|
||||
|
||||
ans_array = ("i.\"_2013_dec\"::numeric As attr1, "
|
||||
"i.\"_2014_jan\"::numeric As attr2, "
|
||||
"i.\"_2014_feb\"::numeric As attr3, ")
|
||||
|
||||
self.assertEqual(pu.query_attr_select(self.params1), ans1)
|
||||
self.assertEqual(pu.query_attr_select(self.params2), ans2)
|
||||
self.assertEqual(pu.query_attr_select(self.params3), ans3)
|
||||
self.assertEqual(pu.query_attr_select(self.params_array), ans_array)
|
||||
|
||||
def test_query_attr_where(self):
|
||||
"""Test pu.query_attr_where"""
|
||||
|
||||
ans1 = ("idx_replace.\"andy\" IS NOT NULL AND "
|
||||
"idx_replace.\"jay_z\" IS NOT NULL")
|
||||
|
||||
ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND "
|
||||
"idx_replace.\"_2014_jan\" IS NOT NULL AND "
|
||||
"idx_replace.\"_2014_feb\" IS NOT NULL")
|
||||
|
||||
self.assertEqual(pu.query_attr_where(self.params1), ans1)
|
||||
self.assertEqual(pu.query_attr_where(self.params_array), ans_array)
|
||||
|
||||
def test_get_attributes(self):
|
||||
"""Test get_attributes"""
|
||||
|
||||
# need to add tests
|
||||
|
||||
self.assertEqual(True, True)
|
||||
|
||||
def test_get_weight(self):
|
||||
"""Test get_weight"""
|
||||
|
||||
self.assertEqual(True, True)
|
130
release/python/0.8.1/crankshaft/test/test_regression_gwr.py
Normal file
130
release/python/0.8.1/crankshaft/test/test_regression_gwr.py
Normal file
@ -0,0 +1,130 @@
|
||||
import unittest
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
|
||||
from crankshaft import random_seeds
|
||||
from helper import fixture_file
|
||||
from crankshaft.regression import GWR
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, mocked_result):
|
||||
self.mocked_result = mocked_result
|
||||
|
||||
def get_gwr(self, params):
|
||||
return self.mocked_result
|
||||
|
||||
def get_gwr_predict(self, params):
|
||||
return self.mocked_result
|
||||
|
||||
|
||||
class GWRTest(unittest.TestCase):
|
||||
"""Testing class for geographically weighted regression (gwr)"""
|
||||
|
||||
def setUp(self):
|
||||
"""
|
||||
fixture packed from canonical GWR georgia dataset using the
|
||||
following query:
|
||||
SELECT array_agg(x) As x,
|
||||
array_agg(y) As y,
|
||||
array_agg(pctbach) As dep_var,
|
||||
array_agg(pctrural) As attr1,
|
||||
array_agg(pctpov) As attr2,
|
||||
array_agg(pctblack) As attr3,
|
||||
array_agg(areakey) As rowid
|
||||
FROM g_utm
|
||||
WHERE pctbach is not NULL AND
|
||||
pctrural IS NOT NULL AND
|
||||
pctpov IS NOT NULL AND
|
||||
pctblack IS NOT NULL
|
||||
"""
|
||||
import copy
|
||||
# data packed from https://github.com/TaylorOshan/pysal/blob/1d6af33bda46b1d623f70912c56155064463383f/pysal/examples/georgia/GData_utm.csv
|
||||
self.data = json.loads(
|
||||
open(fixture_file('gwr_packed_data.json')).read())
|
||||
|
||||
# data packed from https://github.com/TaylorOshan/pysal/blob/a44c5541e2e0d10a99ff05edc1b7f81b70f5a82f/pysal/examples/georgia/georgia_BS_NN_listwise.csv
|
||||
self.knowns = json.loads(
|
||||
open(fixture_file('gwr_packed_knowns.json')).read())
|
||||
|
||||
# data for GWR prediction
|
||||
self.data_predict = copy.deepcopy(self.data)
|
||||
self.ids_of_unknowns = [13083, 13009, 13281, 13115, 13247, 13169]
|
||||
self.idx_ids_of_unknowns = [self.data_predict[0]['rowid'].index(idx)
|
||||
for idx in self.ids_of_unknowns]
|
||||
|
||||
for idx in self.idx_ids_of_unknowns:
|
||||
self.data_predict[0]['dep_var'][idx] = None
|
||||
|
||||
self.predicted_knowns = {13009: 10.879,
|
||||
13083: 4.5259,
|
||||
13115: 9.4022,
|
||||
13169: 6.0793,
|
||||
13247: 8.1608,
|
||||
13281: 13.886}
|
||||
|
||||
# params, with ind_vars in same ordering as query above
|
||||
self.params = {'subquery': 'select * from table',
|
||||
'dep_var': 'pctbach',
|
||||
'ind_vars': ['pctrural', 'pctpov', 'pctblack'],
|
||||
'bw': 90.000,
|
||||
'fixed': False,
|
||||
'geom_col': 'the_geom',
|
||||
'id_col': 'areakey'}
|
||||
|
||||
def test_gwr(self):
|
||||
"""
|
||||
"""
|
||||
gwr = GWR(FakeDataProvider(self.data))
|
||||
gwr_resp = gwr.gwr(self.params['subquery'],
|
||||
self.params['dep_var'],
|
||||
self.params['ind_vars'],
|
||||
bw=self.params['bw'],
|
||||
fixed=self.params['fixed'])
|
||||
|
||||
# unpack response
|
||||
coeffs, stand_errs, t_vals, t_vals_filtered, predicteds, \
|
||||
residuals, r_squareds, bws, rowids = zip(*gwr_resp)
|
||||
|
||||
# prepare for comparision
|
||||
coeff_known_pctpov = self.knowns['est_pctpov']
|
||||
tval_known_pctblack = self.knowns['t_pctrural']
|
||||
pctpov_se = self.knowns['se_pctpov']
|
||||
ids = self.knowns['area_key']
|
||||
resp_idx = None
|
||||
|
||||
# test pctpov coefficient estimates
|
||||
for idx, val in enumerate(coeff_known_pctpov):
|
||||
resp_idx = rowids.index(ids[idx])
|
||||
self.assertAlmostEquals(val,
|
||||
json.loads(coeffs[resp_idx])['pctpov'],
|
||||
places=4)
|
||||
# test pctrural tvals
|
||||
for idx, val in enumerate(tval_known_pctblack):
|
||||
resp_idx = rowids.index(ids[idx])
|
||||
self.assertAlmostEquals(val,
|
||||
json.loads(t_vals[resp_idx])['pctrural'],
|
||||
places=4)
|
||||
|
||||
def test_gwr_predict(self):
|
||||
"""Testing for GWR_Predict"""
|
||||
gwr = GWR(FakeDataProvider(self.data_predict))
|
||||
gwr_resp = gwr.gwr_predict(self.params['subquery'],
|
||||
self.params['dep_var'],
|
||||
self.params['ind_vars'],
|
||||
bw=self.params['bw'],
|
||||
fixed=self.params['fixed'])
|
||||
|
||||
# unpack response
|
||||
coeffs, stand_errs, t_vals, \
|
||||
r_squareds, predicteds, rowid = zip(*gwr_resp)
|
||||
threshold = 0.01
|
||||
|
||||
for i, idx in enumerate(self.idx_ids_of_unknowns):
|
||||
|
||||
known_val = self.predicted_knowns[rowid[i]]
|
||||
predicted_val = predicteds[i]
|
||||
test_val = abs(known_val - predicted_val) / known_val
|
||||
self.assertTrue(test_val < threshold)
|
64
release/python/0.8.1/crankshaft/test/test_segmentation.py
Normal file
64
release/python/0.8.1/crankshaft/test/test_segmentation.py
Normal file
@ -0,0 +1,64 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from helper import plpy, fixture_file
|
||||
import crankshaft.segmentation as segmentation
|
||||
import json
|
||||
|
||||
class SegmentationTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
|
||||
def generate_random_data(self,n_samples,random_state, row_type=False):
|
||||
x1 = random_state.uniform(size=n_samples)
|
||||
x2 = random_state.uniform(size=n_samples)
|
||||
x3 = random_state.randint(0, 4, size=n_samples)
|
||||
|
||||
y = x1+x2*x2+x3
|
||||
cartodb_id = range(len(x1))
|
||||
|
||||
if row_type:
|
||||
return [ {'features': vals} for vals in zip(x1,x2,x3)], y
|
||||
else:
|
||||
return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))]
|
||||
|
||||
def test_replace_nan_with_mean(self):
|
||||
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
|
||||
|
||||
def test_create_and_predict_segment(self):
|
||||
n_samples = 1000
|
||||
|
||||
random_state_train = np.random.RandomState(13)
|
||||
random_state_test = np.random.RandomState(134)
|
||||
training_data = self.generate_random_data(n_samples, random_state_train)
|
||||
test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
|
||||
|
||||
|
||||
ids = [{'cartodb_ids': range(len(test_data))}]
|
||||
rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}]
|
||||
|
||||
plpy._define_result('select \* from \(select \* from training\) a limit 1',rows)
|
||||
plpy._define_result('.*from \(select \* from training\) as a' ,training_data)
|
||||
plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids)
|
||||
plpy._define_result('.*select \* from test.*' ,test_data)
|
||||
|
||||
model_parameters = {'n_estimators': 1200,
|
||||
'max_depth': 3,
|
||||
'subsample' : 0.5,
|
||||
'learning_rate': 0.01,
|
||||
'min_samples_leaf': 1}
|
||||
|
||||
result = segmentation.create_and_predict_segment(
|
||||
'select * from training',
|
||||
'target',
|
||||
'select * from test',
|
||||
model_parameters)
|
||||
|
||||
prediction = [r[1] for r in result]
|
||||
|
||||
accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
|
||||
|
||||
self.assertEqual(len(result),len(test_data))
|
||||
self.assertTrue( result[0][2] < 0.01)
|
||||
self.assertTrue( accuracy < 0.5*np.mean(test_y) )
|
349
release/python/0.8.1/crankshaft/test/test_space_time_dynamics.py
Normal file
349
release/python/0.8.1/crankshaft/test/test_space_time_dynamics.py
Normal file
@ -0,0 +1,349 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
|
||||
import unittest
|
||||
|
||||
|
||||
from helper import fixture_file
|
||||
|
||||
from crankshaft.space_time_dynamics import Markov
|
||||
import crankshaft.space_time_dynamics as std
|
||||
from crankshaft import random_seeds
|
||||
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||
import json
|
||||
|
||||
|
||||
class FakeDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, data):
|
||||
self.mock_result = data
|
||||
|
||||
def get_markov(self, w_type, params):
|
||||
return self.mock_result
|
||||
|
||||
|
||||
class SpaceTimeTests(unittest.TestCase):
|
||||
"""Testing class for Markov Functions."""
|
||||
|
||||
def setUp(self):
|
||||
self.params = {"id_col": "cartodb_id",
|
||||
"time_cols": ['dec_2013', 'jan_2014', 'feb_2014'],
|
||||
"subquery": "SELECT * FROM a_list",
|
||||
"geom_col": "the_geom",
|
||||
"num_ngbrs": 321}
|
||||
self.neighbors_data = json.loads(
|
||||
open(fixture_file('neighbors_markov.json')).read())
|
||||
self.markov_data = json.loads(open(fixture_file('markov.json')).read())
|
||||
|
||||
self.time_data = np.array([i * np.ones(10, dtype=float)
|
||||
for i in range(10)]).T
|
||||
|
||||
self.transition_matrix = np.array([
|
||||
[[0.96341463, 0.0304878, 0.00609756, 0., 0.],
|
||||
[0.06040268, 0.83221477, 0.10738255, 0., 0.],
|
||||
[0., 0.14, 0.74, 0.12, 0.],
|
||||
[0., 0.03571429, 0.32142857, 0.57142857, 0.07142857],
|
||||
[0., 0., 0., 0.16666667, 0.83333333]],
|
||||
[[0.79831933, 0.16806723, 0.03361345, 0., 0.],
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0.00537634, 0.06989247, 0.8655914, 0.05913978, 0.],
|
||||
[0., 0., 0.06372549, 0.90196078, 0.03431373],
|
||||
[0., 0., 0., 0.19444444, 0.80555556]],
|
||||
[[0.84693878, 0.15306122, 0., 0., 0.],
|
||||
[0.08133971, 0.78947368, 0.1291866, 0., 0.],
|
||||
[0.00518135, 0.0984456, 0.79274611, 0.0984456, 0.00518135],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0., 0., 0., 0.10204082, 0.89795918]],
|
||||
[[0.8852459, 0.09836066, 0., 0.01639344, 0.],
|
||||
[0.03875969, 0.81395349, 0.13953488, 0., 0.00775194],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0.02339181, 0.12865497, 0.75438596, 0.09356725],
|
||||
[0., 0., 0., 0.09661836, 0.90338164]],
|
||||
[[0.33333333, 0.66666667, 0., 0., 0.],
|
||||
[0.0483871, 0.77419355, 0.16129032, 0.01612903, 0.],
|
||||
[0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.],
|
||||
[0., 0.01036269, 0.06217617, 0.89637306, 0.03108808],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]]]
|
||||
)
|
||||
|
||||
def test_spatial_markov(self):
|
||||
"""Test Spatial Markov."""
|
||||
data = [{'id': d['id'],
|
||||
'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009'],
|
||||
'neighbors': d['neighbors']} for d in self.neighbors_data]
|
||||
# print(str(data[0]))
|
||||
markov = Markov(FakeDataProvider(data))
|
||||
random_seeds.set_random_seeds(1234)
|
||||
|
||||
result = markov.spatial_trend('subquery',
|
||||
['y1995', 'y1996', 'y1997', 'y1998',
|
||||
'y1999', 'y2000', 'y2001', 'y2002',
|
||||
'y2003', 'y2004', 'y2005', 'y2006',
|
||||
'y2007', 'y2008', 'y2009'],
|
||||
5, 'knn', 5, 0, 'the_geom',
|
||||
'cartodb_id')
|
||||
|
||||
self.assertTrue(result is not None)
|
||||
result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
|
||||
print result[0]
|
||||
expected = self.markov_data
|
||||
for ([res_trend, res_up, res_down, res_vol, res_id],
|
||||
[exp_trend, exp_up, exp_down, exp_vol, exp_id]
|
||||
) in zip(result, expected):
|
||||
self.assertAlmostEqual(res_trend, exp_trend)
|
||||
|
||||
def test_get_time_data(self):
|
||||
"""Test get_time_data"""
|
||||
data = [{'attr1': d['y1995'],
|
||||
'attr2': d['y1996'],
|
||||
'attr3': d['y1997'],
|
||||
'attr4': d['y1998'],
|
||||
'attr5': d['y1999'],
|
||||
'attr6': d['y2000'],
|
||||
'attr7': d['y2001'],
|
||||
'attr8': d['y2002'],
|
||||
'attr9': d['y2003'],
|
||||
'attr10': d['y2004'],
|
||||
'attr11': d['y2005'],
|
||||
'attr12': d['y2006'],
|
||||
'attr13': d['y2007'],
|
||||
'attr14': d['y2008'],
|
||||
'attr15': d['y2009']} for d in self.neighbors_data]
|
||||
|
||||
result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998',
|
||||
'y1999', 'y2000', 'y2001', 'y2002',
|
||||
'y2003', 'y2004', 'y2005', 'y2006',
|
||||
'y2007', 'y2008', 'y2009'])
|
||||
|
||||
# expected was prepared from PySAL example:
|
||||
# f = ps.open(ps.examples.get_path("usjoin.csv"))
|
||||
# pci = np.array([f.by_col[str(y)]
|
||||
# for y in range(1995, 2010)]).transpose()
|
||||
# rpci = pci / (pci.mean(axis = 0))
|
||||
|
||||
expected = np.array(
|
||||
[[0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154,
|
||||
0.83271652, 0.83786314, 0.85012593, 0.85509656, 0.86416612,
|
||||
0.87119375, 0.86302631, 0.86148267, 0.86252252, 0.86746356],
|
||||
[0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388,
|
||||
0.90746978, 0.89830489, 0.89431991, 0.88924794, 0.89815176,
|
||||
0.91832091, 0.91706054, 0.90139505, 0.87897455, 0.86216858],
|
||||
[0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522,
|
||||
0.78964559, 0.80584442, 0.8084998, 0.82258551, 0.82668196,
|
||||
0.82373724, 0.81814804, 0.83675961, 0.83574199, 0.84647177],
|
||||
[1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841,
|
||||
1.14506948, 1.12151133, 1.11160697, 1.10888621, 1.11399806,
|
||||
1.12168029, 1.13164797, 1.12958508, 1.11371818, 1.09936775],
|
||||
[1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025,
|
||||
1.16898201, 1.17212488, 1.14752303, 1.11843284, 1.11024964,
|
||||
1.11943471, 1.11736468, 1.10863242, 1.09642516, 1.07762337],
|
||||
[1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684,
|
||||
1.44184737, 1.44782832, 1.41978227, 1.39092208, 1.4059372,
|
||||
1.40788646, 1.44052766, 1.45241216, 1.43306098, 1.4174431],
|
||||
[1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149,
|
||||
1.10888138, 1.11856629, 1.13062931, 1.11944984, 1.12446239,
|
||||
1.11671008, 1.10880034, 1.08401709, 1.06959206, 1.07875225],
|
||||
[1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545,
|
||||
0.99854316, 0.9880258, 0.99669587, 0.99327676, 1.01400905,
|
||||
1.03176742, 1.040511, 1.01749645, 0.9936394, 0.98279746],
|
||||
[0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845,
|
||||
0.99127006, 0.97925917, 0.9683482, 0.95335147, 0.93694787,
|
||||
0.94308213, 0.92232874, 0.91284091, 0.89689833, 0.88928858],
|
||||
[0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044,
|
||||
0.8578708, 0.86036185, 0.86107306, 0.8500772, 0.86981998,
|
||||
0.86837929, 0.87204141, 0.86633032, 0.84946077, 0.83287146],
|
||||
[1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624,
|
||||
1.14450183, 1.12349752, 1.12596664, 1.12213996, 1.1119989,
|
||||
1.10257792, 1.10491258, 1.11059842, 1.10509795, 1.10020097],
|
||||
[0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687,
|
||||
0.95831051, 0.94480909, 0.94804195, 0.95430286, 0.94103989,
|
||||
0.92122519, 0.91010201, 0.89280392, 0.89298243, 0.89165385],
|
||||
[0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647,
|
||||
0.9480927, 0.93539182, 0.95388718, 0.94597005, 0.96918424,
|
||||
0.94781281, 0.93466815, 0.94281559, 0.96520315, 0.96715441],
|
||||
[0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897,
|
||||
0.98687073, 0.99237486, 0.98209969, 0.9877653, 0.97399471,
|
||||
0.96910087, 0.98416665, 0.98423613, 0.99823861, 0.99545704],
|
||||
[0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012,
|
||||
0.86191535, 0.84981451, 0.85472102, 0.84564835, 0.83998883,
|
||||
0.83478547, 0.82803648, 0.8198736, 0.82265395, 0.8399404],
|
||||
[0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136,
|
||||
0.82785597, 0.86008789, 0.86776298, 0.86720209, 0.8676334,
|
||||
0.89179317, 0.94202108, 0.9422231, 0.93902708, 0.94479184],
|
||||
[0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238,
|
||||
0.90906632, 0.92693339, 0.93695966, 0.94242697, 0.94338265,
|
||||
0.91981796, 0.91108804, 0.90543476, 0.91737138, 0.94793657],
|
||||
[1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723,
|
||||
1.20172869, 1.21328691, 1.22624778, 1.22397075, 1.23857042,
|
||||
1.24419893, 1.23929384, 1.23418676, 1.23626739, 1.26754398],
|
||||
[1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667,
|
||||
1.34790023, 1.34399863, 1.32575181, 1.30795492, 1.30544841,
|
||||
1.30303302, 1.32107766, 1.32936244, 1.33001241, 1.33288462],
|
||||
[1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093,
|
||||
1.05059016, 1.03405057, 1.02747623, 1.03162734, 0.9961416,
|
||||
0.97356208, 0.94241549, 0.92754547, 0.92549227, 0.92138102],
|
||||
[1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264,
|
||||
1.13889622, 1.12442212, 1.13367018, 1.13982256, 1.14029944,
|
||||
1.11979401, 1.10905389, 1.10577769, 1.11166825, 1.09985155],
|
||||
[0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284,
|
||||
0.74480073, 0.76098396, 0.76156903, 0.76651952, 0.76533288,
|
||||
0.78205934, 0.76842416, 0.77487118, 0.77768683, 0.78801192],
|
||||
[0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803,
|
||||
0.97370819, 0.96419154, 0.97209861, 0.97441313, 0.96356162,
|
||||
0.94745352, 0.93965462, 0.93069645, 0.94020973, 0.94358232],
|
||||
[0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801,
|
||||
0.80071489, 0.83358256, 0.83451613, 0.85175032, 0.85954307,
|
||||
0.86790024, 0.87170334, 0.87863799, 0.87497981, 0.87888675],
|
||||
[0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619,
|
||||
0.98733195, 0.99644997, 0.99669587, 1.02559097, 1.01116651,
|
||||
0.99988024, 0.97906749, 0.99323123, 1.00204939, 0.99602148],
|
||||
[1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683,
|
||||
1.08312397, 1.05192626, 1.04230892, 1.05577278, 1.08569751,
|
||||
1.12443486, 1.08891079, 1.08603695, 1.05997314, 1.02160943],
|
||||
[1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272,
|
||||
1.18257029, 1.16226243, 1.16009196, 1.14467789, 1.14820235,
|
||||
1.12386598, 1.12680236, 1.12357937, 1.1159258, 1.12570828],
|
||||
[1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667,
|
||||
1.31210239, 1.29989156, 1.29203193, 1.27183516, 1.26830786,
|
||||
1.2617743, 1.28656675, 1.29734097, 1.29390205, 1.29345446],
|
||||
[0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864,
|
||||
0.78772975, 0.82848011, 0.8259679, 0.82435705, 0.83108634,
|
||||
0.84373784, 0.83891093, 0.84349247, 0.85637272, 0.86539395],
|
||||
[1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626,
|
||||
1.2256767, 1.21126648, 1.19377804, 1.18355337, 1.19674434,
|
||||
1.21536573, 1.23653297, 1.27962009, 1.27968392, 1.25907738],
|
||||
[0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282,
|
||||
0.96480308, 0.94686376, 0.93679073, 0.92540049, 0.92988835,
|
||||
0.93442917, 0.92100464, 0.91475304, 0.90249622, 0.9021363],
|
||||
[0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368,
|
||||
0.88937573, 0.894401, 0.90448993, 0.95495898, 0.92698333,
|
||||
0.94745352, 0.92562488, 0.96635366, 1.02520312, 1.0394296],
|
||||
[1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073,
|
||||
1.00759019, 0.99192968, 0.99747298, 0.99550759, 0.97583768,
|
||||
0.9610168, 0.94779638, 0.93759089, 0.93353431, 0.94121705],
|
||||
[0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613,
|
||||
0.83434854, 0.85813595, 0.84667961, 0.84374558, 0.85951183,
|
||||
0.87194227, 0.89455097, 0.88283929, 0.90349491, 0.90600675],
|
||||
[1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086,
|
||||
1.00581626, 0.98850522, 0.99291168, 0.98983209, 0.97511924,
|
||||
0.96134615, 0.96382634, 0.95011401, 0.9434686, 0.94637765],
|
||||
[1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857,
|
||||
1.04800023, 1.03024941, 1.04200483, 1.0402554, 1.03296979,
|
||||
1.02191682, 1.02476275, 1.02347523, 1.02517684, 1.04359571],
|
||||
[1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043,
|
||||
1.0531801, 1.07452771, 1.09383478, 1.1052447, 1.10322136,
|
||||
1.09167939, 1.08772756, 1.08859544, 1.09177338, 1.1096083],
|
||||
[0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809,
|
||||
0.86287327, 0.85169796, 0.85411285, 0.84886336, 0.84517414,
|
||||
0.84843858, 0.84488343, 0.83374329, 0.82812044, 0.82878599],
|
||||
[0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286,
|
||||
0.92652175, 0.94278865, 0.93682452, 0.98655146, 0.992237,
|
||||
0.9798497, 0.93869677, 0.96947771, 1.00362626, 0.98102351],
|
||||
[0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967,
|
||||
0.93092109, 0.92662519, 0.93412152, 0.93501274, 0.92879506,
|
||||
0.92110542, 0.91035556, 0.90430364, 0.89994694, 0.90073864],
|
||||
[0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824,
|
||||
0.98882205, 0.97662234, 0.95601578, 0.94905385, 0.94934888,
|
||||
0.97152609, 0.97163004, 0.9700702, 0.97158948, 0.95884908],
|
||||
[0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751,
|
||||
0.84818516, 0.85265681, 0.84502402, 0.82645665, 0.81743586,
|
||||
0.83550406, 0.83338919, 0.83511679, 0.82136617, 0.80921874],
|
||||
[0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441,
|
||||
0.95440787, 0.96364363, 0.96804412, 0.97136214, 0.97583768,
|
||||
0.95571724, 0.96895368, 0.97001634, 0.97082733, 0.98782366],
|
||||
[1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249,
|
||||
1.10558188, 1.1214086, 1.12292577, 1.13021031, 1.13342735,
|
||||
1.14686068, 1.14502975, 1.14474747, 1.14084037, 1.16142926],
|
||||
[1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863,
|
||||
1.11856702, 1.09764283, 1.08815849, 1.08044313, 1.09278827,
|
||||
1.07003204, 1.08398066, 1.09831768, 1.09298232, 1.09176125],
|
||||
[0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744,
|
||||
0.77751194, 0.79902974, 0.81437881, 0.80788828, 0.79603865,
|
||||
0.78966436, 0.79949807, 0.80172182, 0.82168155, 0.85587911],
|
||||
[1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561,
|
||||
1.00162979, 0.99860739, 1.00814981, 1.00574316, 0.99030032,
|
||||
0.97682565, 0.97292596, 0.96519561, 0.96173403, 0.95890284],
|
||||
[0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289,
|
||||
0.96608031, 0.99727185, 1.00781194, 1.03484236, 1.05333619,
|
||||
1.0983263, 1.1704974, 1.17025154, 1.18730553, 1.14242645]])
|
||||
|
||||
self.assertTrue(np.allclose(result, expected))
|
||||
self.assertTrue(type(result) == type(expected))
|
||||
self.assertTrue(result.shape == expected.shape)
|
||||
|
||||
def test_rebin_data(self):
|
||||
"""Test rebin_data"""
|
||||
# sample in double the time (even case since 10 % 2 = 0):
|
||||
# (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2
|
||||
# = 0.5, 2.5, 4.5, 6.5, 8.5
|
||||
ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float)
|
||||
for i in range(0, 10, 2)]).T
|
||||
|
||||
self.assertTrue(
|
||||
np.array_equal(std.rebin_data(self.time_data, 2), ans_even))
|
||||
|
||||
# sample in triple the time (uneven since 10 % 3 = 1):
|
||||
# (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1
|
||||
# = 1, 4, 7, 9
|
||||
ans_odd = np.array([i * np.ones(10, dtype=float)
|
||||
for i in (1, 4, 7, 9)]).T
|
||||
self.assertTrue(
|
||||
np.array_equal(std.rebin_data(self.time_data, 3), ans_odd))
|
||||
|
||||
def test_get_prob_dist(self):
|
||||
"""Test get_prob_dist"""
|
||||
lag_indices = np.array([1, 2, 3, 4])
|
||||
unit_indices = np.array([1, 3, 2, 4])
|
||||
answer = np.array([
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]
|
||||
])
|
||||
result = std.get_prob_dist(self.transition_matrix,
|
||||
lag_indices, unit_indices)
|
||||
|
||||
self.assertTrue(np.array_equal(result, answer))
|
||||
|
||||
def test_get_prob_stats(self):
|
||||
"""Test get_prob_stats"""
|
||||
|
||||
probs = np.array([
|
||||
[0.0754717, 0.88207547, 0.04245283, 0., 0.],
|
||||
[0., 0., 0.09411765, 0.87058824, 0.03529412],
|
||||
[0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
|
||||
[0., 0., 0., 0.02352941, 0.97647059]
|
||||
])
|
||||
unit_indices = np.array([1, 3, 2, 4])
|
||||
answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.])
|
||||
answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941])
|
||||
answer_trend = np.array([-0.03301887 / 0.88207547,
|
||||
-0.05882353 / 0.87058824,
|
||||
0.02475248 / 0.77722772,
|
||||
-0.02352941 / 0.97647059])
|
||||
answer_volatility = np.array([0.34221495, 0.33705421,
|
||||
0.29226542, 0.38834223])
|
||||
|
||||
result = std.get_prob_stats(probs, unit_indices)
|
||||
result_up = result[0]
|
||||
result_down = result[1]
|
||||
result_trend = result[2]
|
||||
result_volatility = result[3]
|
||||
|
||||
self.assertTrue(np.allclose(result_up, answer_up))
|
||||
self.assertTrue(np.allclose(result_down, answer_down))
|
||||
self.assertTrue(np.allclose(result_trend, answer_trend))
|
||||
self.assertTrue(np.allclose(result_volatility, answer_volatility))
|
@ -1,5 +1,5 @@
|
||||
comment = 'CartoDB Spatial Analysis extension'
|
||||
default_version = '0.8.0'
|
||||
default_version = '0.8.1'
|
||||
requires = 'plpythonu, postgis'
|
||||
superuser = true
|
||||
schema = cdb_crankshaft
|
||||
|
Loading…
Reference in New Issue
Block a user