expand outputs for python moran, adjust existing functions to return

previous values
This commit is contained in:
Andy Eschbacher 2018-03-01 09:25:56 -05:00
parent 0d050a2206
commit 7b1ce57abc
2 changed files with 103 additions and 35 deletions

View File

@ -39,7 +39,7 @@ AS $$
result = moran.local_stat(subquery, column_name, w_type, result = moran.local_stat(subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col) num_ngbrs, permutations, geom_col, id_col)
# remove spatial lag # remove spatial lag
return [r[:-1] for r in result] return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE; $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
-- Moran's I Local (internal function) -- Moran's I Local (internal function)
@ -53,17 +53,21 @@ CREATE OR REPLACE FUNCTION
geom_col TEXT, geom_col TEXT,
id_col TEXT) id_col TEXT)
RETURNS TABLE ( RETURNS TABLE (
moran NUMERIC,
quads TEXT, quads TEXT,
significance NUMERIC, significance NUMERIC,
rowid INT, spatial_lag NUMERIC,
vals NUMERIC, spatial_lag_std NUMERIC,
spatial_lag NUMERIC) orig_val NUMERIC,
orig_val_std NUMERIC,
moran_stat NUMERIC,
rowid INT)
AS $$ AS $$
from crankshaft.clustering import Moran
moran = Moran() from crankshaft.clustering import Moran
return moran.local_stat(subquery, column_name, w_type, moran = Moran()
num_ngbrs, permutations, geom_col, id_col) return moran.local_stat(subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col)
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE; $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
@ -79,15 +83,19 @@ CREATE OR REPLACE FUNCTION
geom_col TEXT DEFAULT 'the_geom', geom_col TEXT DEFAULT 'the_geom',
id_col TEXT DEFAULT 'cartodb_id') id_col TEXT DEFAULT 'cartodb_id')
RETURNS TABLE ( RETURNS TABLE (
moran NUMERIC,
quads TEXT, quads TEXT,
significance NUMERIC, significance NUMERIC,
rowid INT, spatial_lag NUMERIC,
vals NUMERIC, spatial_lag_std NUMERIC,
spatial_lag NUMERIC) orig_val NUMERIC,
orig_val_std NUMERIC,
moran_stat NUMERIC,
rowid INT)
AS $$ AS $$
SELECT moran, quads, significance, rowid, vals, spatial_lag SELECT
quads, significance, spatial_lag, spatial_lag_std,
orig_val, orig_val_std, moran_stat, rowid
FROM cdb_crankshaft._CDB_MoransILocal( FROM cdb_crankshaft._CDB_MoransILocal(
subquery, column_name, w_type, subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col); num_ngbrs, permutations, geom_col, id_col);
@ -214,7 +222,7 @@ AS $$
# TODO: use named parameters or a dictionary # TODO: use named parameters or a dictionary
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
# remove spatial lag # remove spatial lag
return [r[:-1] for r in result] return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE; $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
-- Moran's I Local Rate (public-facing function) - DEPRECATED -- Moran's I Local Rate (public-facing function) - DEPRECATED
@ -250,17 +258,27 @@ CREATE OR REPLACE FUNCTION
id_col TEXT) id_col TEXT)
RETURNS RETURNS
TABLE( TABLE(
moran NUMERIC,
quads TEXT, quads TEXT,
significance NUMERIC, significance NUMERIC,
rowid INT, spatial_lag NUMERIC,
vals NUMERIC, spatial_lag_std NUMERIC,
spatial_lag NUMERIC) orig_val NUMERIC,
orig_val_std NUMERIC,
moran_stat NUMERIC,
rowid INT)
AS $$ AS $$
from crankshaft.clustering import Moran from crankshaft.clustering import Moran
moran = Moran() moran = Moran()
# TODO: use named parameters or a dictionary return moran.local_rate_stat(
return moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) subquery,
numerator,
denominator,
w_type,
num_ngbrs,
permutations,
geom_col,
id_col
)
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE; $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
-- Moran's I Rate -- Moran's I Rate
@ -277,18 +295,22 @@ CREATE OR REPLACE FUNCTION
id_col TEXT DEFAULT 'cartodb_id') id_col TEXT DEFAULT 'cartodb_id')
RETURNS RETURNS
TABLE( TABLE(
moran NUMERIC,
quads TEXT, quads TEXT,
significance NUMERIC, significance NUMERIC,
rowid INT, spatial_lag NUMERIC,
vals NUMERIC, spatial_lag_std NUMERIC,
spatial_lag NUMERIC) orig_val NUMERIC,
orig_val_std NUMERIC,
moran_stat NUMERIC,
rowid INT)
AS $$ AS $$
SELECT moran, quads, significance, rowid, vals, spatial_lag SELECT
FROM cdb_crankshaft._CDB_MoransILocalRate( quads, significance, spatial_lag, spatial_lag_std,
subquery, numerator, denominator, w_type, orig_val, orig_val_std, moran_stat, rowid
num_ngbrs, permutations, geom_col, id_col); FROM cdb_crankshaft._CDB_MoransILocalRate(
subquery, numerator, denominator, w_type,
num_ngbrs, permutations, geom_col, id_col);
$$ LANGUAGE SQL VOLATILE PARALLEL UNSAFE; $$ LANGUAGE SQL VOLATILE PARALLEL UNSAFE;

View File

@ -1,6 +1,9 @@
""" """
Moran's I geostatistics (global clustering & outliers presence) Moran's I geostatistics (global clustering & outliers presence)
Functionality relies PySAL: http://pysal.readthedocs.io/en/latest/ Functionality relies on a combination of `PySAL
<http://pysal.readthedocs.io/en/latest/>`__ and the data providered provided in
the class instantiation (which defaults to PostgreSQL's plpy module's `database
access functions <https://www.postgresql.org/docs/10/static/plpython.html>`__).
""" """
from collections import OrderedDict from collections import OrderedDict
@ -97,6 +100,18 @@ class Moran(object):
geom_col (str): Name of the geometry column in the dataset for geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods. finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index. id_col (str): Row index for each value. Usually the database index.
Returns:
list of tuples: Where each tuple consists of the following values:
- quadrants classification (one of `HH`, `HL`, `LL`, or `LH`)
- p-value
- spatial lag
- standardized spatial lag (centered on the mean, normalized by the
standard deviation)
- original value
- standardized value
- Moran's I statistic
- original row index
""" """
# geometries with attributes that are null are ignored # geometries with attributes that are null are ignored
@ -122,9 +137,18 @@ class Moran(object):
# calculate spatial lag # calculate spatial lag
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y) lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, return zip(
lisa.y, lag) quads,
lisa.p_sim,
lag,
lag_std,
lisa.y,
lisa.z,
lisa.Is,
weight.id_order
)
def global_rate_stat(self, subquery, numerator, denominator, def global_rate_stat(self, subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col): w_type, num_ngbrs, permutations, geom_col, id_col):
@ -196,6 +220,18 @@ class Moran(object):
geom_col (str): Name of the geometry column in the dataset for geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods. finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index. id_col (str): Row index for each value. Usually the database index.
Returns:
list of tuples: Where each tuple consists of the following values:
- quadrants classification (one of `HH`, `HL`, `LL`, or `LH`)
- p-value
- spatial lag
- standardized spatial lag (centered on the mean, normalized by the
standard deviation)
- original value (roughly numerator divided by denominator)
- standardized value
- Moran's I statistic
- original row index
""" """
# geometries with values that are null are ignored # geometries with values that are null are ignored
# resulting in a collection of not as near neighbors # resulting in a collection of not as near neighbors
@ -224,8 +260,18 @@ class Moran(object):
# spatial lag # spatial lag
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y) lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y, lag) return zip(
quads,
lisa.p_sim,
lag,
lag_std,
lisa.y,
lisa.z,
lisa.Is,
weight.id_order
)
def local_bivariate_stat(self, subquery, attr1, attr2, def local_bivariate_stat(self, subquery, attr1, attr2,
permutations, geom_col, id_col, permutations, geom_col, id_col,