diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql
index 45e1e1e..bc75aff 100644
--- a/src/pg/sql/10_moran.sql
+++ b/src/pg/sql/10_moran.sql
@@ -39,7 +39,7 @@ AS $$
result = moran.local_stat(subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col)
# remove spatial lag
- return [r[:-1] for r in result]
+ return [(r[6], r[0], r[1], r[7], r[5]) for r in result]
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
-- Moran's I Local (internal function)
@@ -53,17 +53,21 @@ CREATE OR REPLACE FUNCTION
geom_col TEXT,
id_col TEXT)
RETURNS TABLE (
- moran NUMERIC,
quads TEXT,
significance NUMERIC,
- rowid INT,
- vals NUMERIC,
- spatial_lag NUMERIC)
+ spatial_lag NUMERIC,
+ spatial_lag_std NUMERIC,
+ orig_val NUMERIC,
+ orig_val_std NUMERIC,
+ moran_stat NUMERIC,
+ rowid INT)
AS $$
- from crankshaft.clustering import Moran
- moran = Moran()
- return moran.local_stat(subquery, column_name, w_type,
- num_ngbrs, permutations, geom_col, id_col)
+
+from crankshaft.clustering import Moran
+moran = Moran()
+return moran.local_stat(subquery, column_name, w_type,
+ num_ngbrs, permutations, geom_col, id_col)
+
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
@@ -79,15 +83,19 @@ CREATE OR REPLACE FUNCTION
geom_col TEXT DEFAULT 'the_geom',
id_col TEXT DEFAULT 'cartodb_id')
RETURNS TABLE (
- moran NUMERIC,
quads TEXT,
significance NUMERIC,
- rowid INT,
- vals NUMERIC,
- spatial_lag NUMERIC)
+ spatial_lag NUMERIC,
+ spatial_lag_std NUMERIC,
+ orig_val NUMERIC,
+ orig_val_std NUMERIC,
+ moran_stat NUMERIC,
+ rowid INT)
AS $$
- SELECT moran, quads, significance, rowid, vals, spatial_lag
+ SELECT
+ quads, significance, spatial_lag, spatial_lag_std,
+ orig_val, orig_val_std, moran_stat, rowid
FROM cdb_crankshaft._CDB_MoransILocal(
subquery, column_name, w_type,
num_ngbrs, permutations, geom_col, id_col);
@@ -214,7 +222,7 @@ AS $$
# TODO: use named parameters or a dictionary
result = moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
# remove spatial lag
- return [r[:-1] for r in result]
+ return [(r[6], r[0], r[1], r[7], r[4]) for r in result]
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
-- Moran's I Local Rate (public-facing function) - DEPRECATED
@@ -250,17 +258,27 @@ CREATE OR REPLACE FUNCTION
id_col TEXT)
RETURNS
TABLE(
- moran NUMERIC,
quads TEXT,
significance NUMERIC,
- rowid INT,
- vals NUMERIC,
- spatial_lag NUMERIC)
+ spatial_lag NUMERIC,
+ spatial_lag_std NUMERIC,
+ orig_val NUMERIC,
+ orig_val_std NUMERIC,
+ moran_stat NUMERIC,
+ rowid INT)
AS $$
- from crankshaft.clustering import Moran
- moran = Moran()
- # TODO: use named parameters or a dictionary
- return moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+from crankshaft.clustering import Moran
+moran = Moran()
+return moran.local_rate_stat(
+ subquery,
+ numerator,
+ denominator,
+ w_type,
+ num_ngbrs,
+ permutations,
+ geom_col,
+ id_col
+)
$$ LANGUAGE plpythonu VOLATILE PARALLEL UNSAFE;
-- Moran's I Rate
@@ -277,18 +295,22 @@ CREATE OR REPLACE FUNCTION
id_col TEXT DEFAULT 'cartodb_id')
RETURNS
TABLE(
- moran NUMERIC,
quads TEXT,
significance NUMERIC,
- rowid INT,
- vals NUMERIC,
- spatial_lag NUMERIC)
+ spatial_lag NUMERIC,
+ spatial_lag_std NUMERIC,
+ orig_val NUMERIC,
+ orig_val_std NUMERIC,
+ moran_stat NUMERIC,
+ rowid INT)
AS $$
- SELECT moran, quads, significance, rowid, vals, spatial_lag
- FROM cdb_crankshaft._CDB_MoransILocalRate(
- subquery, numerator, denominator, w_type,
- num_ngbrs, permutations, geom_col, id_col);
+SELECT
+ quads, significance, spatial_lag, spatial_lag_std,
+ orig_val, orig_val_std, moran_stat, rowid
+FROM cdb_crankshaft._CDB_MoransILocalRate(
+ subquery, numerator, denominator, w_type,
+ num_ngbrs, permutations, geom_col, id_col);
$$ LANGUAGE SQL VOLATILE PARALLEL UNSAFE;
diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py
index ff4501b..cce5670 100644
--- a/src/py/crankshaft/crankshaft/clustering/moran.py
+++ b/src/py/crankshaft/crankshaft/clustering/moran.py
@@ -1,6 +1,9 @@
"""
Moran's I geostatistics (global clustering & outliers presence)
-Functionality relies PySAL: http://pysal.readthedocs.io/en/latest/
+Functionality relies on a combination of `PySAL
+`__ and the data providered provided in
+the class instantiation (which defaults to PostgreSQL's plpy module's `database
+access functions `__).
"""
from collections import OrderedDict
@@ -97,6 +100,18 @@ class Moran(object):
geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index.
+
+ Returns:
+ list of tuples: Where each tuple consists of the following values:
+ - quadrants classification (one of `HH`, `HL`, `LL`, or `LH`)
+ - p-value
+ - spatial lag
+ - standardized spatial lag (centered on the mean, normalized by the
+ standard deviation)
+ - original value
+ - standardized value
+ - Moran's I statistic
+ - original row index
"""
# geometries with attributes that are null are ignored
@@ -122,9 +137,18 @@ class Moran(object):
# calculate spatial lag
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
+ lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
- return zip(lisa.Is, quads, lisa.p_sim, weight.id_order,
- lisa.y, lag)
+ return zip(
+ quads,
+ lisa.p_sim,
+ lag,
+ lag_std,
+ lisa.y,
+ lisa.z,
+ lisa.Is,
+ weight.id_order
+ )
def global_rate_stat(self, subquery, numerator, denominator,
w_type, num_ngbrs, permutations, geom_col, id_col):
@@ -196,6 +220,18 @@ class Moran(object):
geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index.
+
+ Returns:
+ list of tuples: Where each tuple consists of the following values:
+ - quadrants classification (one of `HH`, `HL`, `LL`, or `LH`)
+ - p-value
+ - spatial lag
+ - standardized spatial lag (centered on the mean, normalized by the
+ standard deviation)
+ - original value (roughly numerator divided by denominator)
+ - standardized value
+ - Moran's I statistic
+ - original row index
"""
# geometries with values that are null are ignored
# resulting in a collection of not as near neighbors
@@ -224,8 +260,18 @@ class Moran(object):
# spatial lag
lag = ps.weights.spatial_lag.lag_spatial(weight, lisa.y)
+ lag_std = ps.weights.spatial_lag.lag_spatial(weight, lisa.z)
- return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y, lag)
+ return zip(
+ quads,
+ lisa.p_sim,
+ lag,
+ lag_std,
+ lisa.y,
+ lisa.z,
+ lisa.Is,
+ weight.id_order
+ )
def local_bivariate_stat(self, subquery, attr1, attr2,
permutations, geom_col, id_col,