diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index 45e1e1e..bc75aff 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -39,7 +39,7 @@ AS $$ result = moran.local_stat(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) # remove spatial lag - return [r[:-1] for r in result] + return [(r[6], r[0], r[1], r[7], r[5]) for r in result] $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE; -- Moran's I Local (internal function) @@ -53,17 +53,21 @@ CREATE OR REPLACE FUNCTION geom_col TEXT, id_col TEXT) RETURNS TABLE ( - moran NUMERIC, quads TEXT, significance NUMERIC, - rowid INT, - vals NUMERIC, - spatial_lag NUMERIC) + spatial_lag NUMERIC, + spatial_lag_std NUMERIC, + orig_val NUMERIC, + orig_val_std NUMERIC, + moran_stat NUMERIC, + rowid INT) AS $$ - from crankshaft.clustering import Moran - moran = Moran() - return moran.local_stat(subquery, column_name, w_type, - num_ngbrs, permutations, geom_col, id_col) + +from crankshaft.clustering import Moran +moran = Moran() +return moran.local_stat(subquery, column_name, w_type, + num_ngbrs, permutations, geom_col, id_col) + $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE; @@ -79,15 +83,19 @@ CREATE OR REPLACE FUNCTION geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE ( - moran NUMERIC, quads TEXT, significance NUMERIC, - rowid INT, - vals NUMERIC, - spatial_lag NUMERIC) + spatial_lag NUMERIC, + spatial_lag_std NUMERIC, + orig_val NUMERIC, + orig_val_std NUMERIC, + moran_stat NUMERIC, + rowid INT) AS $$ - SELECT moran, quads, significance, rowid, vals, spatial_lag + SELECT + quads, significance, spatial_lag, spatial_lag_std, + orig_val, orig_val_std, moran_stat, rowid FROM cdb_crankshaft._CDB_MoransILocal( subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); @@ -214,7 +222,7 @@ AS $$ # TODO: use named parameters or a dictionary result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) # remove spatial lag - return [r[:-1] for r in result] + return [(r[6], r[0], r[1], r[7], r[4]) for r in result] $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE; -- Moran's I Local Rate (public-facing function) - DEPRECATED @@ -250,17 +258,27 @@ CREATE OR REPLACE FUNCTION id_col TEXT) RETURNS TABLE( - moran NUMERIC, quads TEXT, significance NUMERIC, - rowid INT, - vals NUMERIC, - spatial_lag NUMERIC) + spatial_lag NUMERIC, + spatial_lag_std NUMERIC, + orig_val NUMERIC, + orig_val_std NUMERIC, + moran_stat NUMERIC, + rowid INT) AS $$ - from crankshaft.clustering import Moran - moran = Moran() - # TODO: use named parameters or a dictionary - return moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +from crankshaft.clustering import Moran +moran = Moran() +return moran.local_rate_stat( + subquery, + numerator, + denominator, + w_type, + num_ngbrs, + permutations, + geom_col, + id_col +) $$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE; -- Moran's I Rate @@ -277,18 +295,22 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE( - moran NUMERIC, quads TEXT, significance NUMERIC, - rowid INT, - vals NUMERIC, - spatial_lag NUMERIC) + spatial_lag NUMERIC, + spatial_lag_std NUMERIC, + orig_val NUMERIC, + orig_val_std NUMERIC, + moran_stat NUMERIC, + rowid INT) AS $$ - SELECT moran, quads, significance, rowid, vals, spatial_lag - FROM cdb_crankshaft._CDB_MoransILocalRate( - subquery, numerator, denominator, w_type, - num_ngbrs, permutations, geom_col, id_col); +SELECT + quads, significance, spatial_lag, spatial_lag_std, + orig_val, orig_val_std, moran_stat, rowid +FROM cdb_crankshaft._CDB_MoransILocalRate( + subquery, numerator, denominator, w_type, + num_ngbrs, permutations, geom_col, id_col); $$ LANGUAGE SQL VOLATILE PARALLEL UNSAFE; diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index ff4501b..cce5670 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -1,6 +1,9 @@ """ Moran's I geostatistics (global clustering & outliers presence) -Functionality relies PySAL: http://pysal.readthedocs.io/en/latest/ +Functionality relies on a combination of `PySAL +`__ and the data providered provided in +the class instantiation (which defaults to PostgreSQL's plpy module's `database +access functions `__). """ from collections import OrderedDict @@ -97,6 +100,18 @@ class Moran(object): geom_col (str): Name of the geometry column in the dataset for finding the spatial neighborhoods. id_col (str): Row index for each value. Usually the database index. + + Returns: + list of tuples: Where each tuple consists of the following values: + - quadrants classification (one of `HH`, `HL`, `LL`, or `LH`) + - p-value + - spatial lag + - standardized spatial lag (centered on the mean, normalized by the + standard deviation) + - original value + - standardized value + - Moran's I statistic + - original row index """ # geometries with attributes that are null are ignored @@ -122,9 +137,18 @@ class Moran(object): # calculate spatial lag lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y) + lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z) - return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, - lisa.y, lag) + return zip( + quads, + lisa.p_sim, + lag, + lag_std, + lisa.y, + lisa.z, + lisa.Is, + weight.id_order + ) def global_rate_stat(self, subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col): @@ -196,6 +220,18 @@ class Moran(object): geom_col (str): Name of the geometry column in the dataset for finding the spatial neighborhoods. id_col (str): Row index for each value. Usually the database index. + + Returns: + list of tuples: Where each tuple consists of the following values: + - quadrants classification (one of `HH`, `HL`, `LL`, or `LH`) + - p-value + - spatial lag + - standardized spatial lag (centered on the mean, normalized by the + standard deviation) + - original value (roughly numerator divided by denominator) + - standardized value + - Moran's I statistic + - original row index """ # geometries with values that are null are ignored # resulting in a collection of not as near neighbors @@ -224,8 +260,18 @@ class Moran(object): # spatial lag lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y) + lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z) - return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y, lag) + return zip( + quads, + lisa.p_sim, + lag, + lag_std, + lisa.y, + lisa.z, + lisa.Is, + weight.id_order + ) def local_bivariate_stat(self, subquery, attr1, attr2, permutations, geom_col, id_col,