diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index 070392d..bd3f96d 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -10,9 +10,11 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran NUMERIC, significance NUMERIC) AS $$ - from crankshaft.clustering import moran + from crankshaft.clustering import Moran # TODO: use named parameters or a dictionary - return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + moran = Moran() + return moran.global_stat(subquery, column_name, w_type, + num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local (internal function) @@ -27,9 +29,11 @@ CREATE OR REPLACE FUNCTION id_col TEXT) RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - from crankshaft.clustering import moran_local + from crankshaft.clustering import Moran + moran = Moran() # TODO: use named parameters or a dictionary - return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + return moran.local_stat(subquery, column_name, w_type, + num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local (public-facing function) @@ -120,9 +124,11 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran FLOAT, significance FLOAT) AS $$ - from crankshaft.clustering import moran_local + from crankshaft.clustering import Moran + moran = Moran() # TODO: use named parameters or a dictionary - return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + return moran.global_rate_stat(subquery, numerator, denominator, w_type, + num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; @@ -140,9 +146,10 @@ CREATE OR REPLACE FUNCTION RETURNS TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - from crankshaft.clustering import moran_local_rate + from crankshaft.clustering import Moran + moran = Moran() # TODO: use named parameters or a dictionary - return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + return moran.local_rate_stat(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local Rate (public-facing function) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 4e7086e..ee82932 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -15,204 +15,181 @@ import crankshaft.pysal_utils as pu # High level interface --------------------------------------- -def moran(subquery, attr_name, - w_type, num_ngbrs, permutations, geom_col, id_col): - """ - Moran's I (global) - Implementation building neighbors with a PostGIS database and Moran's I - core clusters with PySAL. - Andy Eschbacher - """ - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr_name), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) - - query = pu.construct_neighbor_query(w_type, qvals) - - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(2) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(2) - - # collect attributes - attr_vals = pu.get_attributes(result) - - # calculate weights - weight = pu.get_weight(result, w_type, num_ngbrs) - - # calculate moran global - moran_global = ps.esda.moran.Moran(attr_vals, weight, - permutations=permutations) - - return zip([moran_global.I], [moran_global.EI]) +class QueryRunner: + def get_result(self, query): + try: + data = plpy.execute(query) + except plpy.SPIError, err: + plpy.error("k-means (spatial) cluster analysis failed: %s" % err) + return data -def moran_local(subquery, attr, - w_type, num_ngbrs, permutations, geom_col, id_col): - """ - Moran's I implementation for PL/Python - Andy Eschbacher - """ +class Moran: + def __init__(self, query_runner=None): + if query_runner is None: + self.query_runner = QueryRunner() + else: + self.query_runner = query_runner - # geometries with attributes that are null are ignored - # resulting in a collection of not as near neighbors + def global_stat(self, subquery, attr_name, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I (global) + Implementation building neighbors with a PostGIS database and Moran's I + core clusters with PySAL. + Andy Eschbacher + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr_name), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + query = pu.construct_neighbor_query(w_type, qvals) - query = pu.construct_neighbor_query(w_type, qvals) + result = self.query_runner.get_result(query) - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(5) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(5) + # collect attributes + attr_vals = pu.get_attributes(result) - attr_vals = pu.get_attributes(result) - weight = pu.get_weight(result, w_type, num_ngbrs) + # calculate weights + weight = pu.get_weight(result, w_type, num_ngbrs) - # calculate LISA values - lisa = ps.esda.moran.Moran_Local(attr_vals, weight, - permutations=permutations) + # calculate moran global + moran_global = ps.esda.moran.Moran(attr_vals, weight, + permutations=permutations) - # find quadrants for each geometry - quads = quad_position(lisa.q) + return zip([moran_global.I], [moran_global.EI]) - return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + def local_stat(self, subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I implementation for PL/Python + Andy Eschbacher + """ + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors -def moran_rate(subquery, numerator, denominator, - w_type, num_ngbrs, permutations, geom_col, id_col): - """ - Moran's I Rate (global) - Andy Eschbacher - """ - qvals = OrderedDict([("id_col", id_col), - ("attr1", numerator), - ("attr2", denominator) - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - query = pu.construct_neighbor_query(w_type, qvals) + query = pu.construct_neighbor_query(w_type, qvals) - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(2) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(2) + result = self.query_runner.get_result(query) - # collect attributes - numer = pu.get_attributes(result, 1) - denom = pu.get_attributes(result, 2) + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) - weight = pu.get_weight(result, w_type, num_ngbrs) - - # calculate moran global rate - lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + # calculate LISA values + lisa = ps.esda.moran.Moran_Local(attr_vals, weight, permutations=permutations) - return zip([lisa_rate.I], [lisa_rate.EI]) + # find quadrants for each geometry + quads = quad_position(lisa.q) + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) -def moran_local_rate(subquery, numerator, denominator, - w_type, num_ngbrs, permutations, geom_col, id_col): - """ - Moran's I Local Rate + def global_rate_stat(self, subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Rate (global) Andy Eschbacher - """ - # geometries with values that are null are ignored - # resulting in a collection of not as near neighbors + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", numerator), + ("attr2", denominator) + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - qvals = OrderedDict([("id_col", id_col), - ("numerator", numerator), - ("denominator", denominator), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + query = pu.construct_neighbor_query(w_type, qvals) - query = pu.construct_neighbor_query(w_type, qvals) + result = self.query_runner.get_result(query) - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(5) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(5) + # collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) - # collect attributes - numer = pu.get_attributes(result, 1) - denom = pu.get_attributes(result, 2) + weight = pu.get_weight(result, w_type, num_ngbrs) - weight = pu.get_weight(result, w_type, num_ngbrs) + # calculate moran global rate + lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + permutations=permutations) - # calculate LISA values - lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, - permutations=permutations) + return zip([lisa_rate.I], [lisa_rate.EI]) - # find quadrants for each geometry - quads = quad_position(lisa.q) + def local_rate_stat(self, subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Local Rate + Andy Eschbacher + """ + # geometries with values that are null are ignored + # resulting in a collection of not as near neighbors - return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + qvals = OrderedDict([("id_col", id_col), + ("numerator", numerator), + ("denominator", denominator), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + query = pu.construct_neighbor_query(w_type, qvals) -def moran_local_bv(subquery, attr1, attr2, - permutations, geom_col, id_col, w_type, num_ngbrs): - """ - Moran's I (local) Bivariate (untested) - """ + result = self.query_runner.get_result(query) - qvals = OrderedDict([("id_col", id_col), - ("attr1", attr1), - ("attr2", attr2), - ("geom_col", geom_col), - ("subquery", subquery), - ("num_ngbrs", num_ngbrs)]) + # collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) - query = pu.construct_neighbor_query(w_type, qvals) + weight = pu.get_weight(result, w_type, num_ngbrs) - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(4) - except plpy.SPIError: - plpy.error("Error: areas of interest query failed, " - "check input parameters") - return pu.empty_zipped_array(4) + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, + permutations=permutations) - # collect attributes - attr1_vals = pu.get_attributes(result, 1) - attr2_vals = pu.get_attributes(result, 2) + # find quadrants for each geometry + quads = quad_position(lisa.q) - # create weights - weight = pu.get_weight(result, w_type, num_ngbrs) + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) - # calculate LISA values - lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, - permutations=permutations) + def local_bivariate_stat(self, subquery, attr1, attr2, + permutations, geom_col, id_col, + w_type, num_ngbrs): + """ + Moran's I (local) Bivariate (untested) + """ - # find clustering of significance - lisa_sig = quad_position(lisa.q) + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr1), + ("attr2", attr2), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) - return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) + query = pu.construct_neighbor_query(w_type, qvals) + + result = self.query_runner.get_result(query) + + # collect attributes + attr1_vals = pu.get_attributes(result, 1) + attr2_vals = pu.get_attributes(result, 2) + + # create weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, + permutations=permutations) + + # find clustering of significance + lisa_sig = quad_position(lisa.q) + + return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) # Low level functions ----------------------------------------