expand outputs for python moran, adjust existing functions to return

previous values
This commit is contained in:
Andy Eschbacher 2018-03-01 09:25:56 -05:00
parent 0d050a2206
commit 7b1ce57abc
2 changed files with 103 additions and 35 deletions

View File

@ -39,7 +39,7 @@ AS $$
result = moran.local_stat(subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col)
# remove spatial lag
return [r[:-1] for r in result]
return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
-- Moran's I Local (internal function)
@ -53,17 +53,21 @@ CREATE OR REPLACE FUNCTION
geom_col TEXT,
id_col TEXT)
RETURNS TABLE (
moran NUMERIC,
quads TEXT,
significance NUMERIC,
rowid INT,
vals NUMERIC,
spatial_lag NUMERIC)
spatial_lag NUMERIC,
spatial_lag_std NUMERIC,
orig_val NUMERIC,
orig_val_std NUMERIC,
moran_stat NUMERIC,
rowid INT)
AS $$
from crankshaft.clustering import Moran
moran = Moran()
return moran.local_stat(subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col)
from crankshaft.clustering import Moran
moran = Moran()
return moran.local_stat(subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col)
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
@ -79,15 +83,19 @@ CREATE OR REPLACE FUNCTION
geom_col TEXT DEFAULT 'the_geom',
id_col TEXT DEFAULT 'cartodb_id')
RETURNS TABLE (
moran NUMERIC,
quads TEXT,
significance NUMERIC,
rowid INT,
vals NUMERIC,
spatial_lag NUMERIC)
spatial_lag NUMERIC,
spatial_lag_std NUMERIC,
orig_val NUMERIC,
orig_val_std NUMERIC,
moran_stat NUMERIC,
rowid INT)
AS $$
SELECT moran, quads, significance, rowid, vals, spatial_lag
SELECT
quads, significance, spatial_lag, spatial_lag_std,
orig_val, orig_val_std, moran_stat, rowid
FROM cdb_crankshaft._CDB_MoransILocal(
subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col);
@ -214,7 +222,7 @@ AS $$
# TODO: use named parameters or a dictionary
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
# remove spatial lag
return [r[:-1] for r in result]
return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
-- Moran's I Local Rate (public-facing function) - DEPRECATED
@ -250,17 +258,27 @@ CREATE OR REPLACE FUNCTION
id_col TEXT)
RETURNS
TABLE(
moran NUMERIC,
quads TEXT,
significance NUMERIC,
rowid INT,
vals NUMERIC,
spatial_lag NUMERIC)
spatial_lag NUMERIC,
spatial_lag_std NUMERIC,
orig_val NUMERIC,
orig_val_std NUMERIC,
moran_stat NUMERIC,
rowid INT)
AS $$
from crankshaft.clustering import Moran
moran = Moran()
# TODO: use named parameters or a dictionary
return moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
from crankshaft.clustering import Moran
moran = Moran()
return moran.local_rate_stat(
subquery,
numerator,
denominator,
w_type,
num_ngbrs,
permutations,
geom_col,
id_col
)
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
-- Moran's I Rate
@ -277,18 +295,22 @@ CREATE OR REPLACE FUNCTION
id_col TEXT DEFAULT 'cartodb_id')
RETURNS
TABLE(
moran NUMERIC,
quads TEXT,
significance NUMERIC,
rowid INT,
vals NUMERIC,
spatial_lag NUMERIC)
spatial_lag NUMERIC,
spatial_lag_std NUMERIC,
orig_val NUMERIC,
orig_val_std NUMERIC,
moran_stat NUMERIC,
rowid INT)
AS $$
SELECT moran, quads, significance, rowid, vals, spatial_lag
FROM cdb_crankshaft._CDB_MoransILocalRate(
subquery, numerator, denominator, w_type,
num_ngbrs, permutations, geom_col, id_col);
SELECT
quads, significance, spatial_lag, spatial_lag_std,
orig_val, orig_val_std, moran_stat, rowid
FROM cdb_crankshaft._CDB_MoransILocalRate(
subquery, numerator, denominator, w_type,
num_ngbrs, permutations, geom_col, id_col);
$$ LANGUAGE SQL VOLATILE PARALLEL UNSAFE;

View File

@ -1,6 +1,9 @@
"""
Moran's I geostatistics (global clustering & outliers presence)
Functionality relies PySAL: http://pysal.readthedocs.io/en/latest/
Functionality relies on a combination of `PySAL
<http://pysal.readthedocs.io/en/latest/>`__ and the data providered provided in
the class instantiation (which defaults to PostgreSQL's plpy module's `database
access functions <https://www.postgresql.org/docs/10/static/plpython.html>`__).
"""
from collections import OrderedDict
@ -97,6 +100,18 @@ class Moran(object):
geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index.
Returns:
list of tuples: Where each tuple consists of the following values:
- quadrants classification (one of `HH`, `HL`, `LL`, or `LH`)
- p-value
- spatial lag
- standardized spatial lag (centered on the mean, normalized by the
standard deviation)
- original value
- standardized value
- Moran's I statistic
- original row index
"""
# geometries with attributes that are null are ignored
@ -122,9 +137,18 @@ class Moran(object):
# calculate spatial lag
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order,
lisa.y, lag)
return zip(
quads,
lisa.p_sim,
lag,
lag_std,
lisa.y,
lisa.z,
lisa.Is,
weight.id_order
)
def global_rate_stat(self, subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
@ -196,6 +220,18 @@ class Moran(object):
geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index.
Returns:
list of tuples: Where each tuple consists of the following values:
- quadrants classification (one of `HH`, `HL`, `LL`, or `LH`)
- p-value
- spatial lag
- standardized spatial lag (centered on the mean, normalized by the
standard deviation)
- original value (roughly numerator divided by denominator)
- standardized value
- Moran's I statistic
- original row index
"""
# geometries with values that are null are ignored
# resulting in a collection of not as near neighbors
@ -224,8 +260,18 @@ class Moran(object):
# spatial lag
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y, lag)
return zip(
quads,
lisa.p_sim,
lag,
lag_std,
lisa.y,
lisa.z,
lisa.Is,
weight.id_order
)
def local_bivariate_stat(self, subquery, attr1, attr2,
permutations, geom_col, id_col,