From 3eda8ecd166d21b79aa2ad8ad537c2e3ffdef2a3 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 22 Mar 2016 10:34:22 -0400 Subject: [PATCH 01/12] new signatures for moran (w/o significance) --- src/pg/sql/10_moran.sql | 104 ++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 26 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index 49c70c2..dba7069 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -1,37 +1,89 @@ --- Moran's I +-- Moran's I (global) CREATE OR REPLACE FUNCTION - cdb_moran_local ( - t TEXT, - attr TEXT, - significance float DEFAULT 0.05, - num_ngbrs INT DEFAULT 5, - permutations INT DEFAULT 99, - geom_column TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn') + cdb_moran ( + subquery TEXT, + attr_name TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) AS $$ - plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local - # TODO: use named parameters or a dictionary - return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type) + # TODO: use named parameters or a dictionary + return moran(subquery, attr, num_ngbrs, permutations, geom_col, id_col, w_type) $$ LANGUAGE plpythonu; +-- Moran's I Local +CREATE OR REPLACE FUNCTION + cdb_moran_local ( + subquery TEXT, + attr TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) +$$ LANGUAGE plpythonu; + +-- Moran's I Rate (global) +CREATE OR REPLACE FUNCTION + cdb_moran_rate ( + subquery TEXT, + numerator TEXT, + denominator TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) +RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) +$$ LANGUAGE plpythonu; + + -- Moran's I Local Rate CREATE OR REPLACE FUNCTION - cdb_moran_local_rate(t TEXT, - numerator TEXT, - denominator TEXT, - significance FLOAT DEFAULT 0.05, - num_ngbrs INT DEFAULT 5, - permutations INT DEFAULT 99, - geom_column TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn') -RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric) + cdb_moran_local_rate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ - plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local_rate - # TODO: use named parameters or a dictionary - return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type) + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) $$ LANGUAGE plpythonu; + +-- -- Moran's I Local Bivariate +-- CREATE OR REPLACE FUNCTION +-- cdb_moran_local_bv( +-- subquery TEXT, +-- attr1 TEXT, +-- attr2 TEXT, +-- permutations INT DEFAULT 99, +-- geom_col TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs INT DEFAULT 5) +-- RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric) +-- AS $$ +-- from crankshaft.clustering import moran_local_bv +-- # TODO: use named parameters or a dictionary +-- return moran_local_bv(t, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; \ No newline at end of file From 1578b17eb87c60c91e67980f829ed58a9a54b2ce Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 22 Mar 2016 10:42:06 -0400 Subject: [PATCH 02/12] updated function flow without significance --- .../crankshaft/crankshaft/clustering/moran.py | 163 +++++++++++++----- 1 file changed, 117 insertions(+), 46 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 9dd976e..2d65db5 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -11,7 +11,51 @@ import plpy # High level interface --------------------------------------- -def moran_local(subquery, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type): +def moran(subquery, attr_name, permutations, geom_col, id_col, w_type, num_ngbrs): + """ + Moran's I (global) + Andy Eschbacher + """ + qvals = {"id_col": id_col, + "attr1": attr_name, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + q = get_query(w_type, qvals) + + plpy.notice('** Query: %s' % q) + + try: + r = plpy.execute(q) + if (len(r) == 0) & (w_type != 'knn'): + plpy.notice('** Query returned with 0 rows, trying kNN weights') + q = get_query('knn', qvals) + r = plpy.execute(q) + plpy.notice('** Query returned with %d rows' % len(r)) + except plpy.SPIError: + plpy.error('** Moran rate failed executing query to build weight object') + plpy.notice('** Query failed: "%s"' % q) + plpy.notice('** Error: %s' % plpy.SPIError) + plpy.notice('** Exiting function') + return zip([None], [None]) + + ## if there are no neighbors, exit + if len(r) == 0: + return zip([None], [None]) + + ## collect attributes + attr_vals = get_attributes(r, 1) + + ## calculate weights + weight = get_weight(r, w_type, num_ngbrs) + + ## calculate moran global + moran_global = ps.esda.moran.Moran(attr_vals, weight, permutations=permutations) + + return zip([moran_global.I],[moran_global.EI]) + +def moran_local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs): """ Moran's I implementation for PL/Python Andy Eschbacher @@ -25,8 +69,8 @@ def moran_local(subquery, attr, significance, num_ngbrs, permutations, geom_colu # resulting in a collection of not as near neighbors qvals = {"id_col": id_col, - "attr1": attr, - "geom_col": geom_column, + "attr1": attr, + "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs} @@ -38,23 +82,68 @@ def moran_local(subquery, attr, significance, num_ngbrs, permutations, geom_colu except plpy.SPIError: plpy.notice('** Query failed: "%s"' % q) plpy.notice('** Exiting function') - return zip([None], [None], [None], [None]) + return zip([None], [None], [None], [None], [None]) y = get_attributes(r, 1) w = get_weight(r, w_type) # calculate LISA values - lisa = ps.Moran_Local(y, w) + lisa = ps.esda.moran.Moran_Local(y, w) - # find units of significance - lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance) + # find quadrants for each geometry + quads = quad_position(lisa.q) + + plpy.notice('** Finished calculations') + return zip(lisa.Is, quads, lisa.p_sim, w.id_order, lisa.y) + +def moran_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs): + """ + Moran's I Rate (global) + Andy Eschbacher + """ + qvals = {"id_col": id_col, + "attr1": numerator, + "attr2": denominator, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + q = get_query(w_type, qvals) + + plpy.notice('** Query: %s' % q) + + try: + r = plpy.execute(q) + if len(r) == 0: + plpy.notice('** Query returned with 0 rows, trying kNN weights') + q = get_query('knn', qvals) + r = plpy.execute(q) + plpy.notice('** Query returned with %d rows' % len(r)) + except plpy.SPIError: + plpy.error('Moran rate failed executing query to build weight object') + plpy.notice('** Query failed: "%s"' % q) + plpy.notice('** Error: %s' % plpy.SPIError) + plpy.notice('** Exiting function') + return zip([None], [None]) + + ## if there are no values returned, exit + if len(r) == 0: + return zip([None], [None]) + + ## collect attributes + numer = get_attributes(r, 1) + denom = get_attributes(r, 2) + + w = get_weight(r, w_type, num_ngbrs) + + ## calculate moran global rate + mr = ps.esda.moran.Moran_Rate(numer, denom, w, permutations=permutations) plpy.notice('** Finished calculations') - return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order) + return zip([mr.I],[mr.EI]) - -def moran_local_rate(subquery, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type): +def moran_local_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs): """ Moran's I Local Rate Andy Eschbacher @@ -68,7 +157,7 @@ def moran_local_rate(subquery, numerator, denominator, significance, num_ngbrs, qvals = {"id_col": id_col, "numerator": numerator, "denominator": denominator, - "geom_col": geom_column, + "geom_col": geom_col, "subquery": subquery, "num_ngbrs": num_ngbrs} @@ -81,7 +170,7 @@ def moran_local_rate(subquery, numerator, denominator, significance, num_ngbrs, plpy.notice('** Query failed: "%s"' % q) plpy.notice('** Error: %s' % plpy.SPIError) plpy.notice('** Exiting function') - return zip([None], [None], [None], [None]) + return zip([None], [None], [None], [None], [None]) plpy.notice('r.nrows() = %d' % r.nrows()) @@ -95,21 +184,20 @@ def moran_local_rate(subquery, numerator, denominator, significance, num_ngbrs, lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, w, permutations=permutations) # find units of significance - lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance) + quads = quad_position(lisa.q) plpy.notice('** Finished calculations') - ## TODO: Decide on which return values here - return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order, lisa.y) + return zip(lisa.Is, quads, lisa.p_sim, w.id_order, lisa.y) -def moran_local_bv(t, attr1, attr2, significance, num_ngbrs, permutations, geom_column, id_col, w_type): +def moran_local_bv(subquery, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs): plpy.notice('** Constructing query') qvals = {"num_ngbrs": num_ngbrs, "attr1": attr1, "attr2": attr2, - "table": t, - "geom_col": geom_column, + "subquery": subquery, + "geom_col": geom_col, "id_col": id_col} q = get_query(w_type, qvals) @@ -136,7 +224,7 @@ def moran_local_bv(t, attr1, attr2, significance, num_ngbrs, permutations, geom_ plpy.notice("len of Is: %d" % len(lisa.Is)) # find clustering of significance - lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance) + lisa_sig = quad_position(lisa.q) plpy.notice('** Finished calculations') @@ -171,7 +259,7 @@ def query_attr_select(params): """ attrs = [k for k in params - if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs', 'subquery')] + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')] template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, " @@ -187,7 +275,7 @@ def query_attr_where(params): Create portion of WHERE clauses for weeding out NULL-valued geometries """ attrs = sorted([k for k in params - if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs', 'subquery')]) + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]) attr_string = [] @@ -217,12 +305,12 @@ def knn(params): "i.\"{id_col}\" As id, " \ "%(attr_select)s" \ "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ - "FROM \"({subquery})\" As j " \ + "FROM ({subquery}) As j " \ "WHERE %(attr_where_j)s " \ "ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ "LIMIT {num_ngbrs} OFFSET 1 ) " \ ") As neighbors " \ - "FROM \"({subquery})\" As i " \ + "FROM ({subquery}) As i " \ "WHERE " \ "%(attr_where_i)s " \ "ORDER BY i.\"{id_col}\" ASC;" % replacements @@ -245,11 +333,11 @@ def queen(params): "i.\"{id_col}\" As id, " \ "%(attr_select)s" \ "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ - "FROM \"({subquery})\" As j " \ + "FROM ({subquery}) As j " \ "WHERE ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ "%(attr_where_j)s)" \ ") As neighbors " \ - "FROM \"({subquery})\" As i " \ + "FROM ({subquery}) As i " \ "WHERE " \ "%(attr_where_i)s " \ "ORDER BY i.\"{id_col}\" ASC;" % replacements @@ -285,10 +373,10 @@ def get_weight(query_res, w_type='queen', num_ngbrs=5): if w_type == 'knn': row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs weights = {x['id']: row_normed_weights for x in query_res} - elif w_type == 'queen': + else: weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) - if len(x['neighbors']) > 0 - else [] for x in query_res} + if len(x['neighbors']) > 0 + else [] for x in query_res} neighbors = {x['id']: x['neighbors'] for x in query_res} @@ -301,21 +389,4 @@ def quad_position(quads): lisa_sig = np.array([map_quads(q) for q in quads]) - return lisa_sig - -def lisa_sig_vals(pvals, quads, threshold): - """ - Produce Moran's I classification based of n - """ - - sig = (pvals <= threshold) - - lisa_sig = np.empty(len(sig), np.chararray) - - for idx, val in enumerate(sig): - if val: - lisa_sig[idx] = map_quads(quads[idx]) - else: - lisa_sig[idx] = 'Not significant' - - return lisa_sig + return lisa_sig \ No newline at end of file From eecbe39547ff2e8aa3e36050b5cf7d39452dd032 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 22 Mar 2016 10:42:44 -0400 Subject: [PATCH 03/12] updating tests --- src/pg/test/expected/02_moran_test.out | 122 ++++++++++++++++-- src/py/crankshaft/test/fixtures/moran.json | 70 +++++----- .../crankshaft/test/test_clustering_moran.py | 19 +-- 3 files changed, 157 insertions(+), 54 deletions(-) diff --git a/src/pg/test/expected/02_moran_test.out b/src/pg/test/expected/02_moran_test.out index 66ccaaa..92cb218 100644 --- a/src/pg/test/expected/02_moran_test.out +++ b/src/pg/test/expected/02_moran_test.out @@ -126,13 +126,65 @@ SELECT ppoints.code, m.quads ORDER BY ppoints.code; NOTICE: ** Constructing query CONTEXT: PL/Python function "cdb_moran_local" -NOTICE: ** Query failed: "SELECT i."cartodb_id" As id, i."value"::numeric As attr1, (SELECT ARRAY(SELECT j."cartodb_id" FROM "(SELECT * FROM ppoints)" As j WHERE j."value" IS NOT NULL ORDER BY j."the_geom" <-> i."the_geom" ASC LIMIT 5 OFFSET 1 ) ) As neighbors FROM "(SELECT * FROM ppoints)" As i WHERE i."value" IS NOT NULL ORDER BY i."cartodb_id" ASC;" +NOTICE: ** Query returned with 52 rows CONTEXT: PL/Python function "cdb_moran_local" -NOTICE: ** Exiting function +NOTICE: ** Finished calculations CONTEXT: PL/Python function "cdb_moran_local" code | quads ------+------- -(0 rows) + 01 | HH + 02 | HL + 03 | LL + 04 | LL + 05 | LH + 06 | LL + 07 | HH + 08 | HH + 09 | HH + 10 | LL + 11 | LL + 12 | LL + 13 | HL + 14 | LL + 15 | LL + 16 | HH + 17 | HH + 18 | LL + 19 | HH + 20 | HH + 21 | LL + 22 | HH + 23 | LL + 24 | LL + 25 | HH + 26 | HH + 27 | LL + 28 | HH + 29 | LL + 30 | LL + 31 | HH + 32 | LL + 33 | HL + 34 | LH + 35 | LL + 36 | LL + 37 | HL + 38 | HL + 39 | HH + 40 | HH + 41 | HL + 42 | LH + 43 | LH + 44 | LL + 45 | LH + 46 | LL + 47 | LL + 48 | HH + 49 | LH + 50 | HH + 51 | LL + 52 | LL +(52 rows) SELECT cdb_crankshaft._cdb_random_seeds(1234); _cdb_random_seeds @@ -147,12 +199,62 @@ SELECT ppoints2.code, m.quads ORDER BY ppoints2.code; NOTICE: ** Constructing query CONTEXT: PL/Python function "cdb_moran_local_rate" -NOTICE: ** Query failed: "SELECT i."cartodb_id" As id, i."denominator"::numeric As attr1, i."numerator"::numeric As attr2, (SELECT ARRAY(SELECT j."cartodb_id" FROM "(SELECT * FROM ppoints2)" As j WHERE j."denominator" IS NOT NULL AND j."numerator" IS NOT NULL AND j."numerator" <> 0 ORDER BY j."the_geom" <-> i."the_geom" ASC LIMIT 5 OFFSET 1 ) ) As neighbors FROM "(SELECT * FROM ppoints2)" As i WHERE i."denominator" IS NOT NULL AND i."numerator" IS NOT NULL AND i."numerator" <> 0 ORDER BY i."cartodb_id" ASC;" +NOTICE: ** Query returned with 51 rows CONTEXT: PL/Python function "cdb_moran_local_rate" -NOTICE: ** Error: +NOTICE: ** Finished calculations CONTEXT: PL/Python function "cdb_moran_local_rate" -NOTICE: ** Exiting function -CONTEXT: PL/Python function "cdb_moran_local_rate" -ERROR: length of returned sequence did not match number of columns in row -CONTEXT: while creating return value -PL/Python function "cdb_moran_local_rate" + code | quads +------+------- + 01 | LL + 02 | LH + 03 | HH + 04 | HH + 05 | LL + 06 | HH + 07 | LL + 08 | LL + 09 | LL + 10 | HH + 11 | HH + 12 | HL + 13 | LL + 14 | HH + 15 | LL + 16 | LL + 17 | LL + 18 | LH + 19 | LL + 20 | LL + 21 | HH + 22 | LL + 23 | HL + 24 | LL + 25 | LL + 26 | LL + 27 | LL + 28 | LL + 29 | LH + 30 | HH + 31 | LL + 32 | LL + 33 | LL + 34 | LL + 35 | LH + 36 | HL + 37 | LH + 38 | LH + 39 | LL + 40 | LL + 41 | LH + 42 | HL + 43 | LL + 44 | HL + 45 | LL + 46 | HL + 47 | LL + 48 | LL + 49 | HL + 50 | LL + 51 | HH +(51 rows) + diff --git a/src/py/crankshaft/test/fixtures/moran.json b/src/py/crankshaft/test/fixtures/moran.json index 0530c18..2f75cf1 100644 --- a/src/py/crankshaft/test/fixtures/moran.json +++ b/src/py/crankshaft/test/fixtures/moran.json @@ -1,52 +1,52 @@ [[0.9319096128346788, "HH"], [-1.135787401862846, "HL"], -[0.11732030672508517, "Not significant"], -[0.6152779669180425, "Not significant"], -[-0.14657336660125297, "Not significant"], -[0.6967858120189607, "Not significant"], -[0.07949310115714454, "Not significant"], -[0.4703198759258987, "Not significant"], -[0.4421125200498064, "Not significant"], -[0.5724288737143592, "Not significant"], +[0.11732030672508517, "LL"], +[0.6152779669180425, "LL"], +[-0.14657336660125297, "LH"], +[0.6967858120189607, "LL"], +[0.07949310115714454, "HH"], +[0.4703198759258987, "HH"], +[0.4421125200498064, "HH"], +[0.5724288737143592, "LL"], [0.8970743435692062, "LL"], -[0.18327334401918674, "Not significant"], -[-0.01466729201304962, "Not significant"], -[0.3481559372544409, "Not significant"], -[0.06547094736902978, "Not significant"], +[0.18327334401918674, "LL"], +[-0.01466729201304962, "HL"], +[0.3481559372544409, "LL"], +[0.06547094736902978, "LL"], [0.15482141569329988, "HH"], -[0.4373841193538136, "Not significant"], -[0.15971286468915544, "Not significant"], -[1.0543588860308968, "Not significant"], +[0.4373841193538136, "HH"], +[0.15971286468915544, "LL"], +[1.0543588860308968, "HH"], [1.7372866900020818, "HH"], [1.091998586053999, "LL"], -[0.1171572584252222, "Not significant"], -[0.08438455015300014, "Not significant"], -[0.06547094736902978, "Not significant"], +[0.1171572584252222, "HH"], +[0.08438455015300014, "LL"], +[0.06547094736902978, "LL"], [0.15482141569329985, "HH"], [1.1627044812890683, "HH"], -[0.06547094736902978, "Not significant"], -[0.795275137550483, "Not significant"], +[0.06547094736902978, "LL"], +[0.795275137550483, "HH"], [0.18562939195219, "LL"], -[0.3010757406693439, "Not significant"], +[0.3010757406693439, "LL"], [2.8205795942839376, "HH"], -[0.11259190602909264, "Not significant"], -[-0.07116352791516614, "Not significant"], -[-0.09945240794119009, "Not significant"], +[0.11259190602909264, "LL"], +[-0.07116352791516614, "HL"], +[-0.09945240794119009, "LH"], [0.18562939195219, "LL"], -[0.1832733440191868, "Not significant"], -[-0.39054253768447705, "Not significant"], +[0.1832733440191868, "LL"], +[-0.39054253768447705, "HL"], [-0.1672071289487642, "HL"], -[0.3337669247916343, "Not significant"], -[0.2584386102554792, "Not significant"], +[0.3337669247916343, "HH"], +[0.2584386102554792, "HH"], [-0.19733845476322634, "HL"], [-0.9379282899805409, "LH"], -[-0.028770969951095866, "Not significant"], -[0.051367269430983485, "Not significant"], +[-0.028770969951095866, "LH"], +[0.051367269430983485, "LL"], [-0.2172548045913472, "LH"], -[0.05136726943098351, "Not significant"], -[0.04191046803899837, "Not significant"], +[0.05136726943098351, "LL"], +[0.04191046803899837, "LL"], [0.7482357030403517, "HH"], -[-0.014585767863118111, "Not significant"], -[0.5410013139159929, "Not significant"], +[-0.014585767863118111, "LH"], +[0.5410013139159929, "HH"], [1.0223932668429925, "LL"], -[1.4179402898927476, "LL"]] +[1.4179402898927476, "LL"]] \ No newline at end of file diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index b48b8d6..c7cc71a 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -60,10 +60,10 @@ class MoranTest(unittest.TestCase): ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT j.\"cartodb_id\" " \ - "FROM \"(SELECT * FROM a_list)\" As j WHERE j.\"andy\" IS NOT NULL AND " \ + "FROM (SELECT * FROM a_list) As j WHERE j.\"andy\" IS NOT NULL AND " \ "j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 ORDER BY " \ "j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 OFFSET 1 ) ) " \ - "As neighbors FROM \"(SELECT * FROM a_list)\" As i WHERE i.\"andy\" IS NOT " \ + "As neighbors FROM (SELECT * FROM a_list) As i WHERE i.\"andy\" IS NOT " \ "NULL AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER " \ "BY i.\"cartodb_id\" ASC;" @@ -74,10 +74,10 @@ class MoranTest(unittest.TestCase): ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ - "j.\"cartodb_id\" FROM \"(SELECT * FROM a_list)\" As j WHERE ST_Touches(" \ + "j.\"cartodb_id\" FROM (SELECT * FROM a_list) As j WHERE ST_Touches(" \ "i.\"the_geom\", j.\"the_geom\") AND j.\"andy\" IS NOT NULL " \ "AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0)) As " \ - "neighbors FROM \"(SELECT * FROM a_list)\" As i WHERE i.\"andy\" IS NOT NULL " \ + "neighbors FROM (SELECT * FROM a_list) As i WHERE i.\"andy\" IS NOT NULL " \ "AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER BY " \ "i.\"cartodb_id\" ASC;" @@ -88,10 +88,10 @@ class MoranTest(unittest.TestCase): ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ - "j.\"cartodb_id\" FROM \"(SELECT * FROM a_list)\" As j WHERE j.\"andy\" IS " \ + "j.\"cartodb_id\" FROM (SELECT * FROM a_list) As j WHERE j.\"andy\" IS " \ "NOT NULL AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 " \ "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 " \ - "OFFSET 1 ) ) As neighbors FROM \"(SELECT * FROM a_list)\" As i WHERE " \ + "OFFSET 1 ) ) As neighbors FROM (SELECT * FROM a_list) As i WHERE " \ "i.\"andy\" IS NOT NULL AND i.\"jay_z\" IS NOT NULL AND " \ "i.\"jay_z\" <> 0 ORDER BY i.\"cartodb_id\" ASC;" @@ -125,7 +125,7 @@ class MoranTest(unittest.TestCase): data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.moran_local('table', 'value', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn') + result = cc.moran_local('table', 'value', 99, 'the_geom', 'cartodb_id', 'knn', 5) result = [(row[0], row[1]) for row in result] expected = self.moran_data for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): @@ -137,8 +137,9 @@ class MoranTest(unittest.TestCase): data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.moran_local_rate('table', 'numerator', 'denominator', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn') + result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 99, 'the_geom', 'cartodb_id', 'knn', 5) + print 'result == None? ', result == None result = [(row[0], row[1]) for row in result] expected = self.moran_data for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): - self.assertAlmostEqual(res_val, exp_val) + self.assertAlmostEqual(res_val, exp_val) \ No newline at end of file From bc67ae8f69e710e0eddaf6f63a5304b68131558e Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 29 Mar 2016 12:18:52 -0700 Subject: [PATCH 04/12] changed name of functions for observatory --- src/pg/sql/10_moran.sql | 10 +++++----- src/pg/test/expected/02_moran_test.out | 18 +++++++++--------- src/pg/test/sql/02_moran_test.sql | 6 +++--- src/pg/test/sql/90_permissions.sql | 2 +- .../crankshaft/crankshaft/clustering/moran.py | 5 ++--- 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index dba7069..3089fc6 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -1,6 +1,6 @@ -- Moran's I (global) CREATE OR REPLACE FUNCTION - cdb_moran ( + CDB_AreasOfInterest_Global ( subquery TEXT, attr_name TEXT, permutations INT DEFAULT 99, @@ -17,7 +17,7 @@ $$ LANGUAGE plpythonu; -- Moran's I Local CREATE OR REPLACE FUNCTION - cdb_moran_local ( + CDB_AreasOfInterest_Local( subquery TEXT, attr TEXT, permutations INT DEFAULT 99, @@ -34,7 +34,7 @@ $$ LANGUAGE plpythonu; -- Moran's I Rate (global) CREATE OR REPLACE FUNCTION - cdb_moran_rate ( + CDB_AreasOfInterest_Global_Rate( subquery TEXT, numerator TEXT, denominator TEXT, @@ -53,7 +53,7 @@ $$ LANGUAGE plpythonu; -- Moran's I Local Rate CREATE OR REPLACE FUNCTION - cdb_moran_local_rate( + CDB_AreasOfInterest_Local_Rate( subquery TEXT, numerator TEXT, denominator TEXT, @@ -86,4 +86,4 @@ $$ LANGUAGE plpythonu; -- from crankshaft.clustering import moran_local_bv -- # TODO: use named parameters or a dictionary -- return moran_local_bv(t, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs) --- $$ LANGUAGE plpythonu; \ No newline at end of file +-- $$ LANGUAGE plpythonu; diff --git a/src/pg/test/expected/02_moran_test.out b/src/pg/test/expected/02_moran_test.out index 92cb218..20b92cd 100644 --- a/src/pg/test/expected/02_moran_test.out +++ b/src/pg/test/expected/02_moran_test.out @@ -110,7 +110,7 @@ INSERT INTO ppoints2 VALUES (24,'0101000020E61000009C5F91C5095C17C0C78784B15A4F4540'::geometry,'24','07',0.3, 1.0), (29,'0101000020E6100000C34D4A5B48E712C092E680892C684240'::geometry,'29','01',0.3, 1.0), (52,'0101000020E6100000406A545EB29A07C04E5F0BDA39A54140'::geometry,'52','19',0.0, 1.01) --- Moral functions perform some nondeterministic computations +-- Areas of Interest functions perform some nondeterministic computations -- (to estimate the significance); we will set the seeds for the RNGs -- that affect those results to have repeateble results SELECT cdb_crankshaft._cdb_random_seeds(1234); @@ -121,15 +121,15 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads FROM ppoints - JOIN cdb_crankshaft.cdb_moran_local('SELECT * FROM ppoints', 'value') m + JOIN cdb_crankshaft.CDB_AreasOfInterest_Local('SELECT * FROM ppoints', 'value') m ON ppoints.cartodb_id = m.ids ORDER BY ppoints.code; NOTICE: ** Constructing query -CONTEXT: PL/Python function "cdb_moran_local" +CONTEXT: PL/Python function "cdb_areasofinterest_local" NOTICE: ** Query returned with 52 rows -CONTEXT: PL/Python function "cdb_moran_local" +CONTEXT: PL/Python function "cdb_areasofinterest_local" NOTICE: ** Finished calculations -CONTEXT: PL/Python function "cdb_moran_local" +CONTEXT: PL/Python function "cdb_areasofinterest_local" code | quads ------+------- 01 | HH @@ -194,15 +194,15 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints2.code, m.quads FROM ppoints2 - JOIN cdb_crankshaft.cdb_moran_local_rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m + JOIN cdb_crankshaft.CDB_AreasOfInterest_Local_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; NOTICE: ** Constructing query -CONTEXT: PL/Python function "cdb_moran_local_rate" +CONTEXT: PL/Python function "cdb_areasofinterest_local_rate" NOTICE: ** Query returned with 51 rows -CONTEXT: PL/Python function "cdb_moran_local_rate" +CONTEXT: PL/Python function "cdb_areasofinterest_local_rate" NOTICE: ** Finished calculations -CONTEXT: PL/Python function "cdb_moran_local_rate" +CONTEXT: PL/Python function "cdb_areasofinterest_local_rate" code | quads ------+------- 01 | LL diff --git a/src/pg/test/sql/02_moran_test.sql b/src/pg/test/sql/02_moran_test.sql index a0bc401..de9c6cf 100644 --- a/src/pg/test/sql/02_moran_test.sql +++ b/src/pg/test/sql/02_moran_test.sql @@ -1,14 +1,14 @@ \i test/fixtures/ppoints.sql \i test/fixtures/ppoints2.sql --- Moral functions perform some nondeterministic computations +-- Areas of Interest functions perform some nondeterministic computations -- (to estimate the significance); we will set the seeds for the RNGs -- that affect those results to have repeateble results SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads FROM ppoints - JOIN cdb_crankshaft.cdb_moran_local('SELECT * FROM ppoints', 'value') m + JOIN cdb_crankshaft.CDB_AreasOfInterest_Local('SELECT * FROM ppoints', 'value') m ON ppoints.cartodb_id = m.ids ORDER BY ppoints.code; @@ -16,6 +16,6 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints2.code, m.quads FROM ppoints2 - JOIN cdb_crankshaft.cdb_moran_local_rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m + JOIN cdb_crankshaft.CDB_AreasOfInterest_Local_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; diff --git a/src/pg/test/sql/90_permissions.sql b/src/pg/test/sql/90_permissions.sql index 2e7a89c..187f795 100644 --- a/src/pg/test/sql/90_permissions.sql +++ b/src/pg/test/sql/90_permissions.sql @@ -9,7 +9,7 @@ SET search_path TO public,cartodb,cdb_crankshaft; -- Exercise public functions SELECT ppoints.code, m.quads FROM ppoints - JOIN cdb_moran_local('ppoints', 'value') m + JOIN CDB_AreasOfInterest_Local('ppoints', 'value') m ON ppoints.cartodb_id = m.ids ORDER BY ppoints.code; SELECT round(cdb_overlap_sum( diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 2d65db5..728e72f 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -14,6 +14,7 @@ import plpy def moran(subquery, attr_name, permutations, geom_col, id_col, w_type, num_ngbrs): """ Moran's I (global) + Implementation building neighors with a PostGIS database and Moran's I core clusters with PySAL. Andy Eschbacher """ qvals = {"id_col": id_col, @@ -172,8 +173,6 @@ def moran_local_rate(subquery, numerator, denominator, permutations, geom_col, i plpy.notice('** Exiting function') return zip([None], [None], [None], [None], [None]) - plpy.notice('r.nrows() = %d' % r.nrows()) - ## collect attributes numer = get_attributes(r, 1) denom = get_attributes(r, 2) @@ -389,4 +388,4 @@ def quad_position(quads): lisa_sig = np.array([map_quads(q) for q in quads]) - return lisa_sig \ No newline at end of file + return lisa_sig From 06f5cf9951719ad413e22a3a3e924360ff180046 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 29 Mar 2016 12:34:23 -0700 Subject: [PATCH 05/12] standarizing error reporting --- src/py/crankshaft/crankshaft/clustering/moran.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 728e72f..8db23e4 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -35,7 +35,7 @@ def moran(subquery, attr_name, permutations, geom_col, id_col, w_type, num_ngbrs r = plpy.execute(q) plpy.notice('** Query returned with %d rows' % len(r)) except plpy.SPIError: - plpy.error('** Moran rate failed executing query to build weight object') + plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % q) plpy.notice('** Error: %s' % plpy.SPIError) plpy.notice('** Exiting function') @@ -81,6 +81,7 @@ def moran_local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbr r = plpy.execute(q) plpy.notice('** Query returned with %d rows' % len(r)) except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % q) plpy.notice('** Exiting function') return zip([None], [None], [None], [None], [None]) @@ -121,7 +122,7 @@ def moran_rate(subquery, numerator, denominator, permutations, geom_col, id_col, r = plpy.execute(q) plpy.notice('** Query returned with %d rows' % len(r)) except plpy.SPIError: - plpy.error('Moran rate failed executing query to build weight object') + plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % q) plpy.notice('** Error: %s' % plpy.SPIError) plpy.notice('** Exiting function') @@ -168,6 +169,7 @@ def moran_local_rate(subquery, numerator, denominator, permutations, geom_col, i r = plpy.execute(q) plpy.notice('** Query returned with %d rows' % len(r)) except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % q) plpy.notice('** Error: %s' % plpy.SPIError) plpy.notice('** Exiting function') @@ -205,6 +207,7 @@ def moran_local_bv(subquery, attr1, attr2, permutations, geom_col, id_col, w_typ r = plpy.execute(q) plpy.notice('** Query returned with %d rows' % len(r)) except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % q) plpy.notice('** Error: %s' % plpy.SPIError) plpy.notice('** Exiting function') From 8dd8ab37a50bb5c0511f7b14b4bbcf75b6e8046b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 29 Mar 2016 22:49:31 -0700 Subject: [PATCH 06/12] refactored from pylint --- .../crankshaft/crankshaft/clustering/moran.py | 209 +++++++++--------- .../crankshaft/test/test_clustering_moran.py | 19 +- 2 files changed, 112 insertions(+), 116 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 8db23e4..3b407c0 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -11,10 +11,12 @@ import plpy # High level interface --------------------------------------- -def moran(subquery, attr_name, permutations, geom_col, id_col, w_type, num_ngbrs): +def moran(subquery, attr_name, + permutations, geom_col, id_col, w_type, num_ngbrs): """ Moran's I (global) - Implementation building neighors with a PostGIS database and Moran's I core clusters with PySAL. + Implementation building neighbors with a PostGIS database and Moran's I + core clusters with PySAL. Andy Eschbacher """ qvals = {"id_col": id_col, @@ -23,48 +25,39 @@ def moran(subquery, attr_name, permutations, geom_col, id_col, w_type, num_ngbrs "subquery": subquery, "num_ngbrs": num_ngbrs} - q = get_query(w_type, qvals) + query = construct_neighbor_query(w_type, qvals) - plpy.notice('** Query: %s' % q) + plpy.notice('** Query: %s' % query) try: - r = plpy.execute(q) - if (len(r) == 0) & (w_type != 'knn'): - plpy.notice('** Query returned with 0 rows, trying kNN weights') - q = get_query('knn', qvals) - r = plpy.execute(q) - plpy.notice('** Query returned with %d rows' % len(r)) + result = plpy.execute(query) + ## if there are no neighbors, exit + if len(result) == 0: + return zip([None], [None]) + plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') - plpy.notice('** Query failed: "%s"' % q) + plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - plpy.notice('** Exiting function') - return zip([None], [None]) - - ## if there are no neighbors, exit - if len(r) == 0: return zip([None], [None]) ## collect attributes - attr_vals = get_attributes(r, 1) + attr_vals = get_attributes(result) ## calculate weights - weight = get_weight(r, w_type, num_ngbrs) + weight = get_weight(result, w_type, num_ngbrs) ## calculate moran global moran_global = ps.esda.moran.Moran(attr_vals, weight, permutations=permutations) - return zip([moran_global.I],[moran_global.EI]) + return zip([moran_global.I], [moran_global.EI]) -def moran_local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs): +def moran_local(subquery, attr, + permutations, geom_col, id_col, w_type, num_ngbrs): """ Moran's I implementation for PL/Python Andy Eschbacher """ - # TODO: ensure that the significance output can be smaller that 1e-3 (0.001) - # TODO: make a wishlist of output features (zscores, pvalues, raw local lisa, what else?) - - plpy.notice('** Constructing query') # geometries with attributes that are null are ignored # resulting in a collection of not as near neighbors @@ -75,30 +68,32 @@ def moran_local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbr "subquery": subquery, "num_ngbrs": num_ngbrs} - q = get_query(w_type, qvals) + query = construct_neighbor_query(w_type, qvals) try: - r = plpy.execute(q) - plpy.notice('** Query returned with %d rows' % len(r)) + result = plpy.execute(query) + if len(result) == 0: + return zip([None], [None], [None], [None], [None]) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') - plpy.notice('** Query failed: "%s"' % q) - plpy.notice('** Exiting function') + plpy.notice('** Query failed: "%s"' % query) return zip([None], [None], [None], [None], [None]) - y = get_attributes(r, 1) - w = get_weight(r, w_type) + attr_vals = get_attributes(result) + weight = get_weight(result, w_type) # calculate LISA values - lisa = ps.esda.moran.Moran_Local(y, w) + lisa = ps.esda.moran.Moran_Local(attr_vals, weight, + permutations=permutations) # find quadrants for each geometry quads = quad_position(lisa.q) plpy.notice('** Finished calculations') - return zip(lisa.Is, quads, lisa.p_sim, w.id_order, lisa.y) + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) -def moran_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs): +def moran_rate(subquery, numerator, denominator, + permutations, geom_col, id_col, w_type, num_ngbrs): """ Moran's I Rate (global) Andy Eschbacher @@ -110,88 +105,82 @@ def moran_rate(subquery, numerator, denominator, permutations, geom_col, id_col, "subquery": subquery, "num_ngbrs": num_ngbrs} - q = get_query(w_type, qvals) + query = construct_neighbor_query(w_type, qvals) - plpy.notice('** Query: %s' % q) + plpy.notice('** Query: %s' % query) try: - r = plpy.execute(q) - if len(r) == 0: - plpy.notice('** Query returned with 0 rows, trying kNN weights') - q = get_query('knn', qvals) - r = plpy.execute(q) - plpy.notice('** Query returned with %d rows' % len(r)) + result = plpy.execute(query) + if len(result) == 0: + ## if there are no values returned, exit + return zip([None], [None]) + plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') - plpy.notice('** Query failed: "%s"' % q) + plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - plpy.notice('** Exiting function') - return zip([None], [None]) - - ## if there are no values returned, exit - if len(r) == 0: return zip([None], [None]) ## collect attributes - numer = get_attributes(r, 1) - denom = get_attributes(r, 2) + numer = get_attributes(result, 1) + denom = get_attributes(result, 2) - w = get_weight(r, w_type, num_ngbrs) + weight = get_weight(result, w_type, num_ngbrs) ## calculate moran global rate - mr = ps.esda.moran.Moran_Rate(numer, denom, w, permutations=permutations) + lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + permutations=permutations) - plpy.notice('** Finished calculations') + return zip([lisa_rate.I], [lisa_rate.EI]) - return zip([mr.I],[mr.EI]) - -def moran_local_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs): +def moran_local_rate(subquery, numerator, denominator, + permutations, geom_col, id_col, w_type, num_ngbrs): """ - Moran's I Local Rate - Andy Eschbacher + Moran's I Local Rate + Andy Eschbacher """ - - plpy.notice('** Constructing query') - - # geometries with attributes that are null are ignored + # geometries with values that are null are ignored # resulting in a collection of not as near neighbors - qvals = {"id_col": id_col, - "numerator": numerator, - "denominator": denominator, - "geom_col": geom_col, - "subquery": subquery, - "num_ngbrs": num_ngbrs} - - q = get_query(w_type, qvals) + query = construct_neighbor_query(w_type, + {"id_col": id_col, + "numerator": numerator, + "denominator": denominator, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs}) try: - r = plpy.execute(q) - plpy.notice('** Query returned with %d rows' % len(r)) + result = plpy.execute(query) + plpy.notice('** Query returned with %d rows' % len(result)) + if len(result) == 0: + return zip([None], [None], [None], [None], [None]) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') - plpy.notice('** Query failed: "%s"' % q) + plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - plpy.notice('** Exiting function') return zip([None], [None], [None], [None], [None]) ## collect attributes - numer = get_attributes(r, 1) - denom = get_attributes(r, 2) + numer = get_attributes(result, 1) + denom = get_attributes(result, 2) - w = get_weight(r, w_type, num_ngbrs) + weight = get_weight(result, w_type, num_ngbrs) # calculate LISA values - lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, w, permutations=permutations) + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, + permutations=permutations) # find units of significance - quads = quad_position(lisa.q) + quads = quad_position(lisa.q) - plpy.notice('** Finished calculations') + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) - return zip(lisa.Is, quads, lisa.p_sim, w.id_order, lisa.y) - -def moran_local_bv(subquery, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs): +def moran_local_bv(subquery, attr1, attr2, + permutations, geom_col, id_col, w_type, num_ngbrs): + """ + Moran's I (local) Bivariate (untested) + """ plpy.notice('** Constructing query') qvals = {"num_ngbrs": num_ngbrs, @@ -201,27 +190,28 @@ def moran_local_bv(subquery, attr1, attr2, permutations, geom_col, id_col, w_typ "geom_col": geom_col, "id_col": id_col} - q = get_query(w_type, qvals) + query = construct_neighbor_query(w_type, qvals) try: - r = plpy.execute(q) - plpy.notice('** Query returned with %d rows' % len(r)) + result = plpy.execute(query) + plpy.notice('** Query returned with %d rows' % len(result)) + if len(result) == 0: + return zip([None], [None], [None], [None]) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') - plpy.notice('** Query failed: "%s"' % q) - plpy.notice('** Error: %s' % plpy.SPIError) - plpy.notice('** Exiting function') + plpy.notice('** Query failed: "%s"' % query) return zip([None], [None], [None], [None]) ## collect attributes - attr1_vals = get_attributes(r, 1) - attr2_vals = get_attributes(r, 2) + attr1_vals = get_attributes(result, 1) + attr2_vals = get_attributes(result, 2) # create weights - w = get_weight(r, w_type, num_ngbrs) + weight = get_weight(result, w_type, num_ngbrs) # calculate LISA values - lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, w) + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, + permutations=permutations) plpy.notice("len of Is: %d" % len(lisa.Is)) @@ -230,7 +220,7 @@ def moran_local_bv(subquery, attr1, attr2, permutations, geom_col, id_col, w_typ plpy.notice('** Finished calculations') - return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order) + return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) # Low level functions ---------------------------------------- @@ -240,7 +230,7 @@ def map_quads(coord): Map a quadrant number to Moran's I designation HH=1, LH=2, LL=3, HL=4 Input: - :param coord (int): quadrant of a specific measurement + @param coord (int): quadrant of a specific measurement """ if coord == 1: return 'HH' @@ -256,7 +246,7 @@ def map_quads(coord): def query_attr_select(params): """ Create portion of SELECT statement for attributes inolved in query. - :param params: dict of information used in query (column names, + @param params: dict of information used in query (column names, table name, etc.) """ @@ -293,7 +283,7 @@ def query_attr_where(params): def knn(params): """SQL query for k-nearest neighbors. - :param vars: dict of values to fill template + @param vars: dict of values to fill template """ attr_select = query_attr_select(params) @@ -322,7 +312,7 @@ def knn(params): ## SQL query for finding queens neighbors (all contiguous polygons) def queen(params): """SQL query for queen neighbors. - :param params: dict of information to fill query + @param params dict: information to fill query """ attr_select = query_attr_select(params) attr_where = query_attr_where(params) @@ -348,10 +338,10 @@ def queen(params): ## to add more weight methods open a ticket or pull request -def get_query(w_type, query_vals): +def construct_neighbor_query(w_type, query_vals): """Return requested query. - :param w_type: type of neighbors to calculate (knn or queen) - :param query_vals: values used to construct the query + @param w_type text: type of neighbors to calculate ('knn' or 'queen') + @param query_vals dict: values used to construct the query """ if w_type == 'knn': @@ -359,10 +349,10 @@ def get_query(w_type, query_vals): else: return queen(query_vals) -def get_attributes(query_res, attr_num): +def get_attributes(query_res, attr_num=1): """ - :param query_res: query results with attributes and neighbors - :param attr_num: attribute number (1, 2, ...) + @param query_res: query results with attributes and neighbors + @param attr_num: attribute number (1, 2, ...) """ return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float) @@ -370,15 +360,15 @@ def get_attributes(query_res, attr_num): def get_weight(query_res, w_type='queen', num_ngbrs=5): """ Construct PySAL weight from return value of query - :param query_res: query results with attributes and neighbors + @param query_res: query results with attributes and neighbors """ if w_type == 'knn': row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs weights = {x['id']: row_normed_weights for x in query_res} else: weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) - if len(x['neighbors']) > 0 - else [] for x in query_res} + if len(x['neighbors']) > 0 + else [] for x in query_res} neighbors = {x['id']: x['neighbors'] for x in query_res} @@ -387,6 +377,11 @@ def get_weight(query_res, w_type='queen', num_ngbrs=5): def quad_position(quads): """ Produce Moran's I classification based of n + Input: + @param quads ndarray: an array of quads classified by + 1-4 (PySAL default) + Output: + @param ndarray: an array of quads classied by 'HH', 'LL', etc. """ lisa_sig = np.array([map_quads(q) for q in quads]) diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index c7cc71a..95d959c 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -56,7 +56,7 @@ class MoranTest(unittest.TestCase): self.assertEqual(cc.query_attr_where(self.params), ans) def test_knn(self): - """Test knn function.""" + """Test knn neighbors constructor""" ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT j.\"cartodb_id\" " \ @@ -70,7 +70,7 @@ class MoranTest(unittest.TestCase): self.assertEqual(cc.knn(self.params), ans) def test_queen(self): - """Test queen neighbors function.""" + """Test queen neighbors constructor""" ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ @@ -83,19 +83,20 @@ class MoranTest(unittest.TestCase): self.assertEqual(cc.queen(self.params), ans) - def test_get_query(self): - """Test get_query.""" + def test_construct_neighbor_query(self): + """Test construct_neighbor_query""" ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ "j.\"cartodb_id\" FROM (SELECT * FROM a_list) As j WHERE j.\"andy\" IS " \ "NOT NULL AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 " \ "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 " \ - "OFFSET 1 ) ) As neighbors FROM (SELECT * FROM a_list) As i WHERE " \ - "i.\"andy\" IS NOT NULL AND i.\"jay_z\" IS NOT NULL AND " \ - "i.\"jay_z\" <> 0 ORDER BY i.\"cartodb_id\" ASC;" + "OFFSET 1 ) ) As neighbors FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" - self.assertEqual(cc.get_query('knn', self.params), ans) + self.assertEqual(cc.construct_neighbor_query('knn', self.params), ans) def test_get_attributes(self): """Test get_attributes.""" @@ -142,4 +143,4 @@ class MoranTest(unittest.TestCase): result = [(row[0], row[1]) for row in result] expected = self.moran_data for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): - self.assertAlmostEqual(res_val, exp_val) \ No newline at end of file + self.assertAlmostEqual(res_val, exp_val) From e56519f599e2b4462be7a644824a8ca6da558c37 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 29 Mar 2016 23:39:29 -0700 Subject: [PATCH 07/12] removed unneded comments, make outputs more consistent --- .../crankshaft/crankshaft/clustering/moran.py | 51 +++++++---- .../crankshaft/test/test_clustering_moran.py | 87 +++++++++++-------- 2 files changed, 84 insertions(+), 54 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 3b407c0..7f402a8 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -31,15 +31,15 @@ def moran(subquery, attr_name, try: result = plpy.execute(query) - ## if there are no neighbors, exit + # if there are no neighbors, exit if len(result) == 0: - return zip([None], [None]) + return empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - return zip([None], [None]) + return empty_zipped_array(2) ## collect attributes attr_vals = get_attributes(result) @@ -72,12 +72,13 @@ def moran_local(subquery, attr, try: result = plpy.execute(query) + # if there are no neighbors, exit if len(result) == 0: - return zip([None], [None], [None], [None], [None]) + return empty_zipped_array(5) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) - return zip([None], [None], [None], [None], [None]) + return empty_zipped_array(5) attr_vals = get_attributes(result) weight = get_weight(result, w_type) @@ -111,15 +112,15 @@ def moran_rate(subquery, numerator, denominator, try: result = plpy.execute(query) + # if there are no neighbors, exit if len(result) == 0: - ## if there are no values returned, exit - return zip([None], [None]) + return empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - return zip([None], [None]) + return empty_zipped_array(2) ## collect attributes numer = get_attributes(result, 1) @@ -152,14 +153,14 @@ def moran_local_rate(subquery, numerator, denominator, try: result = plpy.execute(query) - plpy.notice('** Query returned with %d rows' % len(result)) + # if there are no neighbors, exit if len(result) == 0: - return zip([None], [None], [None], [None], [None]) + return empty_zipped_array(5) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - return zip([None], [None], [None], [None], [None]) + return empty_zipped_array(5) ## collect attributes numer = get_attributes(result, 1) @@ -194,13 +195,14 @@ def moran_local_bv(subquery, attr1, attr2, try: result = plpy.execute(query) - plpy.notice('** Query returned with %d rows' % len(result)) + # if there are no neighbors, exit if len(result) == 0: - return zip([None], [None], [None], [None]) + return empty_zipped_array(4) except plpy.SPIError: - plpy.error('Error: areas of interest query failed, check input parameters') + plpy.error("Error: areas of interest query failed, " \ + "check input parameters") plpy.notice('** Query failed: "%s"' % query) - return zip([None], [None], [None], [None]) + return empty_zipped_array(4) ## collect attributes attr1_vals = get_attributes(result, 1) @@ -222,7 +224,6 @@ def moran_local_bv(subquery, attr1, attr2, return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) - # Low level functions ---------------------------------------- def map_quads(coord): @@ -231,6 +232,8 @@ def map_quads(coord): HH=1, LH=2, LL=3, HL=4 Input: @param coord (int): quadrant of a specific measurement + Output: + classification (one of 'HH', 'LH', 'LL', or 'HL') """ if coord == 1: return 'HH' @@ -298,9 +301,10 @@ def knn(params): "%(attr_select)s" \ "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ "FROM ({subquery}) As j " \ - "WHERE %(attr_where_j)s " \ + "WHERE %(attr_where_j)s AND " \ + "i.\"{id_col}\" <> j.\"{id_col}\" " \ "ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ - "LIMIT {num_ngbrs} OFFSET 1 ) " \ + "LIMIT {num_ngbrs}) " \ ") As neighbors " \ "FROM ({subquery}) As i " \ "WHERE " \ @@ -387,3 +391,14 @@ def quad_position(quads): lisa_sig = np.array([map_quads(q) for q in quads]) return lisa_sig + +def return_empty_zipped_array(num_nones): + """ + prepare return values for cases of empty weights objects (no neighbors) + Input: + @param num_nones int: number of columns (e.g., 4) + Output: + [(None, None, None, None)] + """ + + return [tuple([None] * num_nones)] diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 95d959c..e2d2a50 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -16,7 +16,7 @@ from crankshaft import random_seeds import json class MoranTest(unittest.TestCase): - """Testing class for Moran's I functions.""" + """Testing class for Moran's I functions""" def setUp(self): plpy._reset() @@ -30,7 +30,7 @@ class MoranTest(unittest.TestCase): self.moran_data = json.loads(open(fixture_file('moran.json')).read()) def test_map_quads(self): - """Test map_quads.""" + """Test map_quads""" self.assertEqual(cc.map_quads(1), 'HH') self.assertEqual(cc.map_quads(2), 'LH') self.assertEqual(cc.map_quads(3), 'LL') @@ -39,7 +39,7 @@ class MoranTest(unittest.TestCase): self.assertEqual(cc.map_quads('andy'), None) def test_query_attr_select(self): - """Test query_attr_select.""" + """Test query_attr_select""" ans = "i.\"{attr1}\"::numeric As attr1, " \ "i.\"{attr2}\"::numeric As attr2, " @@ -47,10 +47,10 @@ class MoranTest(unittest.TestCase): self.assertEqual(cc.query_attr_select(self.params), ans) def test_query_attr_where(self): - """Test query_attr_where.""" + """Test query_attr_where""" - ans = "idx_replace.\"{attr1}\" IS NOT NULL AND "\ - "idx_replace.\"{attr2}\" IS NOT NULL AND "\ + ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \ + "idx_replace.\"{attr2}\" IS NOT NULL AND " \ "idx_replace.\"{attr2}\" <> 0" self.assertEqual(cc.query_attr_where(self.params), ans) @@ -58,61 +58,76 @@ class MoranTest(unittest.TestCase): def test_knn(self): """Test knn neighbors constructor""" - ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT j.\"cartodb_id\" " \ - "FROM (SELECT * FROM a_list) As j WHERE j.\"andy\" IS NOT NULL AND " \ - "j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 ORDER BY " \ - "j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 OFFSET 1 ) ) " \ - "As neighbors FROM (SELECT * FROM a_list) As i WHERE i.\"andy\" IS NOT " \ - "NULL AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER " \ - "BY i.\"cartodb_id\" ASC;" + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0 AND " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" " \ + "ORDER BY " \ + "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321) ) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" self.assertEqual(cc.knn(self.params), ans) def test_queen(self): """Test queen neighbors constructor""" - ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ - "j.\"cartodb_id\" FROM (SELECT * FROM a_list) As j WHERE ST_Touches(" \ - "i.\"the_geom\", j.\"the_geom\") AND j.\"andy\" IS NOT NULL " \ - "AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0)) As " \ - "neighbors FROM (SELECT * FROM a_list) As i WHERE i.\"andy\" IS NOT NULL " \ - "AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER BY " \ - "i.\"cartodb_id\" ASC;" + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE ST_Touches(i.\"the_geom\", " \ + "j.\"the_geom\") AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0) + ") As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" self.assertEqual(cc.queen(self.params), ans) def test_construct_neighbor_query(self): """Test construct_neighbor_query""" - ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ - "j.\"cartodb_id\" FROM (SELECT * FROM a_list) As j WHERE j.\"andy\" IS " \ - "NOT NULL AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 " \ - "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 " \ - "OFFSET 1 ) ) As neighbors FROM (SELECT * FROM a_list) As i " \ - "WHERE i.\"andy\" IS NOT NULL AND i.\"jay_z\" IS NOT NULL AND " \ - "i.\"jay_z\" <> 0 " \ - "ORDER BY i.\"cartodb_id\" ASC;" - - self.assertEqual(cc.construct_neighbor_query('knn', self.params), ans) + # Compare to raw knn query + self.assertEqual(cc.construct_neighbor_query('knn', self.params), + cc.knn(self.params)) def test_get_attributes(self): - """Test get_attributes.""" + """Test get_attributes""" ## need to add tests self.assertEqual(True, True) def test_get_weight(self): - """Test get_weight.""" + """Test get_weight""" self.assertEqual(True, True) + def test_empty_zipped_array(self): + """Test empty_zipped_array""" + ans2 = [(None, None)] + ans4 = [(None, None, None, None)] + self.assertEqual(cc.empty_zipped_array(2), ans2) + self.assertEqual(cc.empty_zipped_array(4), ans4) def test_quad_position(self): - """Test lisa_sig_vals.""" + """Test lisa_sig_vals""" quads = np.array([1, 2, 3, 4], np.int) From 5a46f65e5916fb88ebf185fce240c49a4fdcca4b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 30 Mar 2016 08:09:48 -0400 Subject: [PATCH 08/12] update tests to remove plpy notices --- src/pg/test/expected/02_moran_test.out | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/pg/test/expected/02_moran_test.out b/src/pg/test/expected/02_moran_test.out index 20b92cd..eb2afa5 100644 --- a/src/pg/test/expected/02_moran_test.out +++ b/src/pg/test/expected/02_moran_test.out @@ -124,12 +124,6 @@ SELECT ppoints.code, m.quads JOIN cdb_crankshaft.CDB_AreasOfInterest_Local('SELECT * FROM ppoints', 'value') m ON ppoints.cartodb_id = m.ids ORDER BY ppoints.code; -NOTICE: ** Constructing query -CONTEXT: PL/Python function "cdb_areasofinterest_local" -NOTICE: ** Query returned with 52 rows -CONTEXT: PL/Python function "cdb_areasofinterest_local" -NOTICE: ** Finished calculations -CONTEXT: PL/Python function "cdb_areasofinterest_local" code | quads ------+------- 01 | HH @@ -197,12 +191,6 @@ SELECT ppoints2.code, m.quads JOIN cdb_crankshaft.CDB_AreasOfInterest_Local_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; -NOTICE: ** Constructing query -CONTEXT: PL/Python function "cdb_areasofinterest_local_rate" -NOTICE: ** Query returned with 51 rows -CONTEXT: PL/Python function "cdb_areasofinterest_local_rate" -NOTICE: ** Finished calculations -CONTEXT: PL/Python function "cdb_areasofinterest_local_rate" code | quads ------+------- 01 | LL From 6bb4f36df5f18d94306cd508dd65639d894a06e1 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 30 Mar 2016 08:10:35 -0400 Subject: [PATCH 09/12] extracting util code to new submodule --- .../crankshaft/crankshaft/clustering/moran.py | 214 +++--------------- .../crankshaft/pysal_utils/__init__.py | 1 + .../crankshaft/pysal_utils/pysal_utils.py | 149 ++++++++++++ .../crankshaft/test/test_clustering_moran.py | 21 +- 4 files changed, 196 insertions(+), 189 deletions(-) create mode 100644 src/py/crankshaft/crankshaft/pysal_utils/__init__.py create mode 100644 src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 7f402a8..2a043c3 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -5,10 +5,12 @@ Moran's I geostatistics (global clustering & outliers presence) # TODO: Fill in local neighbors which have null/NoneType values with the # average of the their neighborhood -import numpy as np import pysal as ps import plpy +# crankshaft module +import crankshaft.pysal_utils as pu + # High level interface --------------------------------------- def moran(subquery, attr_name, @@ -25,7 +27,7 @@ def moran(subquery, attr_name, "subquery": subquery, "num_ngbrs": num_ngbrs} - query = construct_neighbor_query(w_type, qvals) + query = pu.construct_neighbor_query(w_type, qvals) plpy.notice('** Query: %s' % query) @@ -33,22 +35,23 @@ def moran(subquery, attr_name, result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: - return empty_zipped_array(2) + return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - return empty_zipped_array(2) + return pu.empty_zipped_array(2) ## collect attributes - attr_vals = get_attributes(result) + attr_vals = pu.get_attributes(result) ## calculate weights - weight = get_weight(result, w_type, num_ngbrs) + weight = pu.get_weight(result, w_type, num_ngbrs) ## calculate moran global - moran_global = ps.esda.moran.Moran(attr_vals, weight, permutations=permutations) + moran_global = ps.esda.moran.Moran(attr_vals, weight, + permutations=permutations) return zip([moran_global.I], [moran_global.EI]) @@ -68,20 +71,20 @@ def moran_local(subquery, attr, "subquery": subquery, "num_ngbrs": num_ngbrs} - query = construct_neighbor_query(w_type, qvals) + query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: - return empty_zipped_array(5) + return pu.empty_zipped_array(5) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) - return empty_zipped_array(5) + return pu.empty_zipped_array(5) - attr_vals = get_attributes(result) - weight = get_weight(result, w_type) + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local(attr_vals, weight, @@ -90,7 +93,6 @@ def moran_local(subquery, attr, # find quadrants for each geometry quads = quad_position(lisa.q) - plpy.notice('** Finished calculations') return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) def moran_rate(subquery, numerator, denominator, @@ -106,7 +108,7 @@ def moran_rate(subquery, numerator, denominator, "subquery": subquery, "num_ngbrs": num_ngbrs} - query = construct_neighbor_query(w_type, qvals) + query = pu.construct_neighbor_query(w_type, qvals) plpy.notice('** Query: %s' % query) @@ -114,19 +116,19 @@ def moran_rate(subquery, numerator, denominator, result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: - return empty_zipped_array(2) + return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - return empty_zipped_array(2) + return pu.empty_zipped_array(2) ## collect attributes - numer = get_attributes(result, 1) - denom = get_attributes(result, 2) + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) - weight = get_weight(result, w_type, num_ngbrs) + weight = pu.get_weight(result, w_type, num_ngbrs) ## calculate moran global rate lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, @@ -143,7 +145,7 @@ def moran_local_rate(subquery, numerator, denominator, # geometries with values that are null are ignored # resulting in a collection of not as near neighbors - query = construct_neighbor_query(w_type, + query = pu.construct_neighbor_query(w_type, {"id_col": id_col, "numerator": numerator, "denominator": denominator, @@ -155,18 +157,18 @@ def moran_local_rate(subquery, numerator, denominator, result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: - return empty_zipped_array(5) + return pu.empty_zipped_array(5) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - return empty_zipped_array(5) + return pu.empty_zipped_array(5) ## collect attributes - numer = get_attributes(result, 1) - denom = get_attributes(result, 2) + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) - weight = get_weight(result, w_type, num_ngbrs) + weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, @@ -191,25 +193,25 @@ def moran_local_bv(subquery, attr1, attr2, "geom_col": geom_col, "id_col": id_col} - query = construct_neighbor_query(w_type, qvals) + query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: - return empty_zipped_array(4) + return pu.empty_zipped_array(4) except plpy.SPIError: plpy.error("Error: areas of interest query failed, " \ "check input parameters") plpy.notice('** Query failed: "%s"' % query) - return empty_zipped_array(4) + return pu.empty_zipped_array(4) ## collect attributes - attr1_vals = get_attributes(result, 1) - attr2_vals = get_attributes(result, 2) + attr1_vals = pu.get_attributes(result, 1) + attr2_vals = pu.get_attributes(result, 2) # create weights - weight = get_weight(result, w_type, num_ngbrs) + weight = pu.get_weight(result, w_type, num_ngbrs) # calculate LISA values lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, @@ -246,138 +248,6 @@ def map_quads(coord): else: return None -def query_attr_select(params): - """ - Create portion of SELECT statement for attributes inolved in query. - @param params: dict of information used in query (column names, - table name, etc.) - """ - - attrs = [k for k in params - if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')] - - template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, " - - attr_string = "" - - for idx, val in enumerate(sorted(attrs)): - attr_string += template % {"col": val, "alias_num": idx + 1} - - return attr_string - -def query_attr_where(params): - """ - Create portion of WHERE clauses for weeding out NULL-valued geometries - """ - attrs = sorted([k for k in params - if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]) - - attr_string = [] - - for attr in attrs: - attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr) - - if len(attrs) == 2: - attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1]) - - out = " AND ".join(attr_string) - - return out - -def knn(params): - """SQL query for k-nearest neighbors. - @param vars: dict of values to fill template - """ - - attr_select = query_attr_select(params) - attr_where = query_attr_where(params) - - replacements = {"attr_select": attr_select, - "attr_where_i": attr_where.replace("idx_replace", "i"), - "attr_where_j": attr_where.replace("idx_replace", "j")} - - query = "SELECT " \ - "i.\"{id_col}\" As id, " \ - "%(attr_select)s" \ - "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ - "FROM ({subquery}) As j " \ - "WHERE %(attr_where_j)s AND " \ - "i.\"{id_col}\" <> j.\"{id_col}\" " \ - "ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ - "LIMIT {num_ngbrs}) " \ - ") As neighbors " \ - "FROM ({subquery}) As i " \ - "WHERE " \ - "%(attr_where_i)s " \ - "ORDER BY i.\"{id_col}\" ASC;" % replacements - - return query.format(**params) - -## SQL query for finding queens neighbors (all contiguous polygons) -def queen(params): - """SQL query for queen neighbors. - @param params dict: information to fill query - """ - attr_select = query_attr_select(params) - attr_where = query_attr_where(params) - - replacements = {"attr_select": attr_select, - "attr_where_i": attr_where.replace("idx_replace", "i"), - "attr_where_j": attr_where.replace("idx_replace", "j")} - - query = "SELECT " \ - "i.\"{id_col}\" As id, " \ - "%(attr_select)s" \ - "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ - "FROM ({subquery}) As j " \ - "WHERE ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ - "%(attr_where_j)s)" \ - ") As neighbors " \ - "FROM ({subquery}) As i " \ - "WHERE " \ - "%(attr_where_i)s " \ - "ORDER BY i.\"{id_col}\" ASC;" % replacements - - return query.format(**params) - -## to add more weight methods open a ticket or pull request - -def construct_neighbor_query(w_type, query_vals): - """Return requested query. - @param w_type text: type of neighbors to calculate ('knn' or 'queen') - @param query_vals dict: values used to construct the query - """ - - if w_type == 'knn': - return knn(query_vals) - else: - return queen(query_vals) - -def get_attributes(query_res, attr_num=1): - """ - @param query_res: query results with attributes and neighbors - @param attr_num: attribute number (1, 2, ...) - """ - return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float) - -## Build weight object -def get_weight(query_res, w_type='queen', num_ngbrs=5): - """ - Construct PySAL weight from return value of query - @param query_res: query results with attributes and neighbors - """ - if w_type == 'knn': - row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs - weights = {x['id']: row_normed_weights for x in query_res} - else: - weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) - if len(x['neighbors']) > 0 - else [] for x in query_res} - - neighbors = {x['id']: x['neighbors'] for x in query_res} - - return ps.W(neighbors, weights) - def quad_position(quads): """ Produce Moran's I classification based of n @@ -385,20 +255,6 @@ def quad_position(quads): @param quads ndarray: an array of quads classified by 1-4 (PySAL default) Output: - @param ndarray: an array of quads classied by 'HH', 'LL', etc. + @param list: an array of quads classied by 'HH', 'LL', etc. """ - - lisa_sig = np.array([map_quads(q) for q in quads]) - - return lisa_sig - -def return_empty_zipped_array(num_nones): - """ - prepare return values for cases of empty weights objects (no neighbors) - Input: - @param num_nones int: number of columns (e.g., 4) - Output: - [(None, None, None, None)] - """ - - return [tuple([None] * num_nones)] + return [map_quads(q) for q in quads] diff --git a/src/py/crankshaft/crankshaft/pysal_utils/__init__.py b/src/py/crankshaft/crankshaft/pysal_utils/__init__.py new file mode 100644 index 0000000..835880d --- /dev/null +++ b/src/py/crankshaft/crankshaft/pysal_utils/__init__.py @@ -0,0 +1 @@ +from pysal_utils import * diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py new file mode 100644 index 0000000..5482cc7 --- /dev/null +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -0,0 +1,149 @@ +""" + Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects +""" + +import numpy as np +import pysal as ps + +def construct_neighbor_query(w_type, query_vals): + """Return query (a string) used for finding neighbors + @param w_type text: type of neighbors to calculate ('knn' or 'queen') + @param query_vals dict: values used to construct the query + """ + + if w_type == 'knn': + return knn(query_vals) + else: + return queen(query_vals) + +## Build weight object +def get_weight(query_res, w_type='knn', num_ngbrs=5): + """ + Construct PySAL weight from return value of query + @param query_res: query results with attributes and neighbors + """ + if w_type == 'knn': + row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs + weights = {x['id']: row_normed_weights for x in query_res} + else: + weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) + if len(x['neighbors']) > 0 + else [] for x in query_res} + + neighbors = {x['id']: x['neighbors'] for x in query_res} + + return ps.W(neighbors, weights) + +def query_attr_select(params): + """ + Create portion of SELECT statement for attributes inolved in query. + @param params: dict of information used in query (column names, + table name, etc.) + """ + + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')] + + template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, " + + attr_string = "" + + for idx, val in enumerate(sorted(attrs)): + attr_string += template % {"col": val, "alias_num": idx + 1} + + return attr_string + +def query_attr_where(params): + """ + Create portion of WHERE clauses for weeding out NULL-valued geometries + """ + attrs = sorted([k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]) + + attr_string = [] + + for attr in attrs: + attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr) + + if len(attrs) == 2: + attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1]) + + out = " AND ".join(attr_string) + + return out + +def knn(params): + """SQL query for k-nearest neighbors. + @param vars: dict of values to fill template + """ + + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE %(attr_where_j)s AND " \ + "i.\"{id_col}\" <> j.\"{id_col}\" " \ + "ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ + "LIMIT {num_ngbrs}) " \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## SQL query for finding queens neighbors (all contiguous polygons) +def queen(params): + """SQL query for queen neighbors. + @param params dict: information to fill query + """ + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ + "%(attr_where_j)s)" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## to add more weight methods open a ticket or pull request + +def get_attributes(query_res, attr_num=1): + """ + @param query_res: query results with attributes and neighbors + @param attr_num: attribute number (1, 2, ...) + """ + return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float) + +def empty_zipped_array(num_nones): + """ + prepare return values for cases of empty weights objects (no neighbors) + Input: + @param num_nones int: number of columns (e.g., 4) + Output: + [(None, None, None, None)] + """ + + return [tuple([None] * num_nones)] diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index e2d2a50..399e0cb 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -12,6 +12,7 @@ import unittest from helper import plpy, fixture_file import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json @@ -44,16 +45,16 @@ class MoranTest(unittest.TestCase): ans = "i.\"{attr1}\"::numeric As attr1, " \ "i.\"{attr2}\"::numeric As attr2, " - self.assertEqual(cc.query_attr_select(self.params), ans) + self.assertEqual(pu.query_attr_select(self.params), ans) def test_query_attr_where(self): - """Test query_attr_where""" + """Test pu.query_attr_where""" ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \ "idx_replace.\"{attr2}\" IS NOT NULL AND " \ "idx_replace.\"{attr2}\" <> 0" - self.assertEqual(cc.query_attr_where(self.params), ans) + self.assertEqual(pu.query_attr_where(self.params), ans) def test_knn(self): """Test knn neighbors constructor""" @@ -76,7 +77,7 @@ class MoranTest(unittest.TestCase): "i.\"jay_z\" <> 0 " \ "ORDER BY i.\"cartodb_id\" ASC;" - self.assertEqual(cc.knn(self.params), ans) + self.assertEqual(pu.knn(self.params), ans) def test_queen(self): """Test queen neighbors constructor""" @@ -90,7 +91,7 @@ class MoranTest(unittest.TestCase): "j.\"the_geom\") AND " \ "j.\"andy\" IS NOT NULL AND " \ "j.\"jay_z\" IS NOT NULL AND " \ - "j.\"jay_z\" <> 0) + "j.\"jay_z\" <> 0)" \ ") As neighbors " \ "FROM (SELECT * FROM a_list) As i " \ "WHERE i.\"andy\" IS NOT NULL AND " \ @@ -98,14 +99,14 @@ class MoranTest(unittest.TestCase): "i.\"jay_z\" <> 0 " \ "ORDER BY i.\"cartodb_id\" ASC;" - self.assertEqual(cc.queen(self.params), ans) + self.assertEqual(pu.queen(self.params), ans) def test_construct_neighbor_query(self): """Test construct_neighbor_query""" # Compare to raw knn query - self.assertEqual(cc.construct_neighbor_query('knn', self.params), - cc.knn(self.params)) + self.assertEqual(pu.construct_neighbor_query('knn', self.params), + pu.knn(self.params)) def test_get_attributes(self): """Test get_attributes""" @@ -123,8 +124,8 @@ class MoranTest(unittest.TestCase): """Test empty_zipped_array""" ans2 = [(None, None)] ans4 = [(None, None, None, None)] - self.assertEqual(cc.empty_zipped_array(2), ans2) - self.assertEqual(cc.empty_zipped_array(4), ans4) + self.assertEqual(pu.empty_zipped_array(2), ans2) + self.assertEqual(pu.empty_zipped_array(4), ans4) def test_quad_position(self): """Test lisa_sig_vals""" From b0150d4fec612a8e6f92220c70f6b2c7454c636f Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 30 Mar 2016 08:27:14 -0400 Subject: [PATCH 10/12] adding tests for pysal_utils --- .../crankshaft/pysal_utils/pysal_utils.py | 2 +- .../crankshaft/test/test_clustering_moran.py | 90 --------------- src/py/crankshaft/test/test_pysal_utils.py | 104 ++++++++++++++++++ 3 files changed, 105 insertions(+), 91 deletions(-) create mode 100644 src/py/crankshaft/test/test_pysal_utils.py diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py index 5482cc7..9fdbfb3 100644 --- a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -92,7 +92,7 @@ def knn(params): "WHERE %(attr_where_j)s AND " \ "i.\"{id_col}\" <> j.\"{id_col}\" " \ "ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ - "LIMIT {num_ngbrs}) " \ + "LIMIT {num_ngbrs})" \ ") As neighbors " \ "FROM ({subquery}) As i " \ "WHERE " \ diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 399e0cb..fbe11d6 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -1,8 +1,6 @@ import unittest import numpy as np -import unittest - # from mock_plpy import MockPlPy # plpy = MockPlPy() @@ -39,94 +37,6 @@ class MoranTest(unittest.TestCase): self.assertEqual(cc.map_quads(33), None) self.assertEqual(cc.map_quads('andy'), None) - def test_query_attr_select(self): - """Test query_attr_select""" - - ans = "i.\"{attr1}\"::numeric As attr1, " \ - "i.\"{attr2}\"::numeric As attr2, " - - self.assertEqual(pu.query_attr_select(self.params), ans) - - def test_query_attr_where(self): - """Test pu.query_attr_where""" - - ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \ - "idx_replace.\"{attr2}\" IS NOT NULL AND " \ - "idx_replace.\"{attr2}\" <> 0" - - self.assertEqual(pu.query_attr_where(self.params), ans) - - def test_knn(self): - """Test knn neighbors constructor""" - - ans = "SELECT i.\"cartodb_id\" As id, " \ - "i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, " \ - "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ - "FROM (SELECT * FROM a_list) As j " \ - "WHERE j.\"andy\" IS NOT NULL AND " \ - "j.\"jay_z\" IS NOT NULL AND " \ - "j.\"jay_z\" <> 0 AND " \ - "i.\"cartodb_id\" <> j.\"cartodb_id\" " \ - "ORDER BY " \ - "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ - "LIMIT 321) ) As neighbors " \ - "FROM (SELECT * FROM a_list) As i " \ - "WHERE i.\"andy\" IS NOT NULL AND " \ - "i.\"jay_z\" IS NOT NULL AND " \ - "i.\"jay_z\" <> 0 " \ - "ORDER BY i.\"cartodb_id\" ASC;" - - self.assertEqual(pu.knn(self.params), ans) - - def test_queen(self): - """Test queen neighbors constructor""" - - ans = "SELECT i.\"cartodb_id\" As id, " \ - "i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, " \ - "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ - "FROM (SELECT * FROM a_list) As j " \ - "WHERE ST_Touches(i.\"the_geom\", " \ - "j.\"the_geom\") AND " \ - "j.\"andy\" IS NOT NULL AND " \ - "j.\"jay_z\" IS NOT NULL AND " \ - "j.\"jay_z\" <> 0)" \ - ") As neighbors " \ - "FROM (SELECT * FROM a_list) As i " \ - "WHERE i.\"andy\" IS NOT NULL AND " \ - "i.\"jay_z\" IS NOT NULL AND " \ - "i.\"jay_z\" <> 0 " \ - "ORDER BY i.\"cartodb_id\" ASC;" - - self.assertEqual(pu.queen(self.params), ans) - - def test_construct_neighbor_query(self): - """Test construct_neighbor_query""" - - # Compare to raw knn query - self.assertEqual(pu.construct_neighbor_query('knn', self.params), - pu.knn(self.params)) - - def test_get_attributes(self): - """Test get_attributes""" - - ## need to add tests - - self.assertEqual(True, True) - - def test_get_weight(self): - """Test get_weight""" - - self.assertEqual(True, True) - - def test_empty_zipped_array(self): - """Test empty_zipped_array""" - ans2 = [(None, None)] - ans4 = [(None, None, None, None)] - self.assertEqual(pu.empty_zipped_array(2), ans2) - self.assertEqual(pu.empty_zipped_array(4), ans4) - def test_quad_position(self): """Test lisa_sig_vals""" diff --git a/src/py/crankshaft/test/test_pysal_utils.py b/src/py/crankshaft/test/test_pysal_utils.py new file mode 100644 index 0000000..8e9b7da --- /dev/null +++ b/src/py/crankshaft/test/test_pysal_utils.py @@ -0,0 +1,104 @@ +import unittest + +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds + + +class PysalUtilsTest(unittest.TestCase): + """Testing class for utility functions related to PySAL integrations""" + + def setUp(self): + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + def test_query_attr_select(self): + """Test query_attr_select""" + + ans = "i.\"{attr1}\"::numeric As attr1, " \ + "i.\"{attr2}\"::numeric As attr2, " + + self.assertEqual(pu.query_attr_select(self.params), ans) + + def test_query_attr_where(self): + """Test pu.query_attr_where""" + + ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \ + "idx_replace.\"{attr2}\" IS NOT NULL AND " \ + "idx_replace.\"{attr2}\" <> 0" + + self.assertEqual(pu.query_attr_where(self.params), ans) + + def test_knn(self): + """Test knn neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0 AND " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" " \ + "ORDER BY " \ + "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.knn(self.params), ans) + + def test_queen(self): + """Test queen neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE ST_Touches(i.\"the_geom\", " \ + "j.\"the_geom\") AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0)" \ + ") As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.queen(self.params), ans) + + def test_construct_neighbor_query(self): + """Test construct_neighbor_query""" + + # Compare to raw knn query + self.assertEqual(pu.construct_neighbor_query('knn', self.params), + pu.knn(self.params)) + + def test_get_attributes(self): + """Test get_attributes""" + + ## need to add tests + + self.assertEqual(True, True) + + def test_get_weight(self): + """Test get_weight""" + + self.assertEqual(True, True) + + def test_empty_zipped_array(self): + """Test empty_zipped_array""" + ans2 = [(None, None)] + ans4 = [(None, None, None, None)] + self.assertEqual(pu.empty_zipped_array(2), ans2) + self.assertEqual(pu.empty_zipped_array(4), ans4) From 4c243bf1d35dcb802bee60b910dee1eae748eab9 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 30 Mar 2016 11:44:44 -0400 Subject: [PATCH 11/12] correct func signatures --- src/pg/sql/10_moran.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index 3089fc6..85d69f0 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -8,7 +8,7 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id', w_type TEXT DEFAULT 'knn', num_ngbrs INT DEFAULT 5) -RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +RETURNS TABLE (moran NUMERIC, significance NUMERIC) AS $$ from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary @@ -43,7 +43,7 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id', w_type TEXT DEFAULT 'knn', num_ngbrs INT DEFAULT 5) -RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +RETURNS TABLE (moran FLOAT, significance FLOAT) AS $$ from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary From 02b74813aca7c7ebac7caae04a4d503b9357bca1 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 30 Mar 2016 12:09:49 -0400 Subject: [PATCH 12/12] add test for global moran --- src/py/crankshaft/test/test_clustering_moran.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index fbe11d6..29c5bde 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -52,7 +52,7 @@ class MoranTest(unittest.TestCase): data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.moran_local('table', 'value', 99, 'the_geom', 'cartodb_id', 'knn', 5) + result = cc.moran_local('subquery', 'value', 99, 'the_geom', 'cartodb_id', 'knn', 5) result = [(row[0], row[1]) for row in result] expected = self.moran_data for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): @@ -70,3 +70,14 @@ class MoranTest(unittest.TestCase): expected = self.moran_data for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): self.assertAlmostEqual(res_val, exp_val) + + def test_moran(self): + """Test Moran's I global""" + data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1235) + result = cc.moran('table', 'value', 99, 'the_geom', 'cartodb_id', 'knn', 5) + print 'result == None?', result == None + result_moran = result[0][0] + expected_moran = np.array([row[0] for row in self.moran_data]).mean() + self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)