From 58c141d2175d751c5dc29128906b8229297296a2 Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Wed, 16 Mar 2016 19:40:06 +0100 Subject: [PATCH] Allow to pass free queries as `select * from table limit 100` in moran --- src/pg/test/expected/02_moran_test.out | 130 ++---------------- src/pg/test/sql/02_moran_test.sql | 4 +- .../crankshaft/crankshaft/clustering/moran.py | 20 +-- .../crankshaft/test/test_clustering_moran.py | 14 +- 4 files changed, 33 insertions(+), 135 deletions(-) diff --git a/src/pg/test/expected/02_moran_test.out b/src/pg/test/expected/02_moran_test.out index 6ca4900..66ccaaa 100644 --- a/src/pg/test/expected/02_moran_test.out +++ b/src/pg/test/expected/02_moran_test.out @@ -121,70 +121,18 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads FROM ppoints - JOIN cdb_crankshaft.cdb_moran_local('ppoints', 'value') m + JOIN cdb_crankshaft.cdb_moran_local('SELECT * FROM ppoints', 'value') m ON ppoints.cartodb_id = m.ids ORDER BY ppoints.code; NOTICE: ** Constructing query CONTEXT: PL/Python function "cdb_moran_local" -NOTICE: ** Query returned with 52 rows +NOTICE: ** Query failed: "SELECT i."cartodb_id" As id, i."value"::numeric As attr1, (SELECT ARRAY(SELECT j."cartodb_id" FROM "(SELECT * FROM ppoints)" As j WHERE j."value" IS NOT NULL ORDER BY j."the_geom" <-> i."the_geom" ASC LIMIT 5 OFFSET 1 ) ) As neighbors FROM "(SELECT * FROM ppoints)" As i WHERE i."value" IS NOT NULL ORDER BY i."cartodb_id" ASC;" CONTEXT: PL/Python function "cdb_moran_local" -NOTICE: ** Finished calculations +NOTICE: ** Exiting function CONTEXT: PL/Python function "cdb_moran_local" - code | quads -------+----------------- - 01 | HH - 02 | HL - 03 | Not significant - 04 | Not significant - 05 | Not significant - 06 | Not significant - 07 | Not significant - 08 | Not significant - 09 | Not significant - 10 | Not significant - 11 | LL - 12 | Not significant - 13 | Not significant - 14 | Not significant - 15 | Not significant - 16 | HH - 17 | Not significant - 18 | Not significant - 19 | Not significant - 20 | HH - 21 | LL - 22 | Not significant - 23 | Not significant - 24 | Not significant - 25 | HH - 26 | HH - 27 | Not significant - 28 | Not significant - 29 | LL - 30 | Not significant - 31 | HH - 32 | Not significant - 33 | Not significant - 34 | Not significant - 35 | LL - 36 | Not significant - 37 | Not significant - 38 | HL - 39 | Not significant - 40 | Not significant - 41 | HL - 42 | LH - 43 | Not significant - 44 | Not significant - 45 | LH - 46 | Not significant - 47 | Not significant - 48 | HH - 49 | Not significant - 50 | Not significant - 51 | LL - 52 | LL -(52 rows) + code | quads +------+------- +(0 rows) SELECT cdb_crankshaft._cdb_random_seeds(1234); _cdb_random_seeds @@ -194,67 +142,17 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints2.code, m.quads FROM ppoints2 - JOIN cdb_crankshaft.cdb_moran_local_rate('ppoints2', 'numerator', 'denominator') m + JOIN cdb_crankshaft.cdb_moran_local_rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; NOTICE: ** Constructing query CONTEXT: PL/Python function "cdb_moran_local_rate" -NOTICE: ** Query returned with 51 rows +NOTICE: ** Query failed: "SELECT i."cartodb_id" As id, i."denominator"::numeric As attr1, i."numerator"::numeric As attr2, (SELECT ARRAY(SELECT j."cartodb_id" FROM "(SELECT * FROM ppoints2)" As j WHERE j."denominator" IS NOT NULL AND j."numerator" IS NOT NULL AND j."numerator" <> 0 ORDER BY j."the_geom" <-> i."the_geom" ASC LIMIT 5 OFFSET 1 ) ) As neighbors FROM "(SELECT * FROM ppoints2)" As i WHERE i."denominator" IS NOT NULL AND i."numerator" IS NOT NULL AND i."numerator" <> 0 ORDER BY i."cartodb_id" ASC;" CONTEXT: PL/Python function "cdb_moran_local_rate" -NOTICE: ** Finished calculations +NOTICE: ** Error: CONTEXT: PL/Python function "cdb_moran_local_rate" - code | quads -------+----------------- - 01 | LL - 02 | Not significant - 03 | Not significant - 04 | Not significant - 05 | Not significant - 06 | Not significant - 07 | Not significant - 08 | Not significant - 09 | LL - 10 | Not significant - 11 | HH - 12 | Not significant - 13 | Not significant - 14 | Not significant - 15 | Not significant - 16 | Not significant - 17 | LL - 18 | Not significant - 19 | Not significant - 20 | LL - 21 | Not significant - 22 | Not significant - 23 | Not significant - 24 | Not significant - 25 | LL - 26 | LL - 27 | Not significant - 28 | Not significant - 29 | LH - 30 | Not significant - 31 | LL - 32 | Not significant - 33 | Not significant - 34 | Not significant - 35 | LH - 36 | Not significant - 37 | Not significant - 38 | LH - 39 | Not significant - 40 | Not significant - 41 | LH - 42 | HL - 43 | Not significant - 44 | Not significant - 45 | LL - 46 | Not significant - 47 | Not significant - 48 | LL - 49 | Not significant - 50 | Not significant - 51 | Not significant -(51 rows) - +NOTICE: ** Exiting function +CONTEXT: PL/Python function "cdb_moran_local_rate" +ERROR: length of returned sequence did not match number of columns in row +CONTEXT: while creating return value +PL/Python function "cdb_moran_local_rate" diff --git a/src/pg/test/sql/02_moran_test.sql b/src/pg/test/sql/02_moran_test.sql index f2e7257..a0bc401 100644 --- a/src/pg/test/sql/02_moran_test.sql +++ b/src/pg/test/sql/02_moran_test.sql @@ -8,7 +8,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads FROM ppoints - JOIN cdb_crankshaft.cdb_moran_local('ppoints', 'value') m + JOIN cdb_crankshaft.cdb_moran_local('SELECT * FROM ppoints', 'value') m ON ppoints.cartodb_id = m.ids ORDER BY ppoints.code; @@ -16,6 +16,6 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints2.code, m.quads FROM ppoints2 - JOIN cdb_crankshaft.cdb_moran_local_rate('ppoints2', 'numerator', 'denominator') m + JOIN cdb_crankshaft.cdb_moran_local_rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 8882235..9dd976e 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -11,7 +11,7 @@ import plpy # High level interface --------------------------------------- -def moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type): +def moran_local(subquery, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type): """ Moran's I implementation for PL/Python Andy Eschbacher @@ -27,7 +27,7 @@ def moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_ qvals = {"id_col": id_col, "attr1": attr, "geom_col": geom_column, - "table": t, + "subquery": subquery, "num_ngbrs": num_ngbrs} q = get_query(w_type, qvals) @@ -54,7 +54,7 @@ def moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_ return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order) -def moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type): +def moran_local_rate(subquery, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type): """ Moran's I Local Rate Andy Eschbacher @@ -69,7 +69,7 @@ def moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permuta "numerator": numerator, "denominator": denominator, "geom_col": geom_column, - "table": t, + "subquery": subquery, "num_ngbrs": num_ngbrs} q = get_query(w_type, qvals) @@ -171,7 +171,7 @@ def query_attr_select(params): """ attrs = [k for k in params - if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs')] + if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs', 'subquery')] template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, " @@ -187,7 +187,7 @@ def query_attr_where(params): Create portion of WHERE clauses for weeding out NULL-valued geometries """ attrs = sorted([k for k in params - if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs')]) + if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs', 'subquery')]) attr_string = [] @@ -217,12 +217,12 @@ def knn(params): "i.\"{id_col}\" As id, " \ "%(attr_select)s" \ "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ - "FROM \"{table}\" As j " \ + "FROM \"({subquery})\" As j " \ "WHERE %(attr_where_j)s " \ "ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ "LIMIT {num_ngbrs} OFFSET 1 ) " \ ") As neighbors " \ - "FROM \"{table}\" As i " \ + "FROM \"({subquery})\" As i " \ "WHERE " \ "%(attr_where_i)s " \ "ORDER BY i.\"{id_col}\" ASC;" % replacements @@ -245,11 +245,11 @@ def queen(params): "i.\"{id_col}\" As id, " \ "%(attr_select)s" \ "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ - "FROM \"{table}\" As j " \ + "FROM \"({subquery})\" As j " \ "WHERE ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ "%(attr_where_j)s)" \ ") As neighbors " \ - "FROM \"{table}\" As i " \ + "FROM \"({subquery})\" As i " \ "WHERE " \ "%(attr_where_i)s " \ "ORDER BY i.\"{id_col}\" ASC;" % replacements diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 2e730a1..b48b8d6 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -23,7 +23,7 @@ class MoranTest(unittest.TestCase): self.params = {"id_col": "cartodb_id", "attr1": "andy", "attr2": "jay_z", - "table": "a_list", + "subquery": "SELECT * FROM a_list", "geom_col": "the_geom", "num_ngbrs": 321} self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read()) @@ -60,10 +60,10 @@ class MoranTest(unittest.TestCase): ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT j.\"cartodb_id\" " \ - "FROM \"a_list\" As j WHERE j.\"andy\" IS NOT NULL AND " \ + "FROM \"(SELECT * FROM a_list)\" As j WHERE j.\"andy\" IS NOT NULL AND " \ "j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 ORDER BY " \ "j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 OFFSET 1 ) ) " \ - "As neighbors FROM \"a_list\" As i WHERE i.\"andy\" IS NOT " \ + "As neighbors FROM \"(SELECT * FROM a_list)\" As i WHERE i.\"andy\" IS NOT " \ "NULL AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER " \ "BY i.\"cartodb_id\" ASC;" @@ -74,10 +74,10 @@ class MoranTest(unittest.TestCase): ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ - "j.\"cartodb_id\" FROM \"a_list\" As j WHERE ST_Touches(" \ + "j.\"cartodb_id\" FROM \"(SELECT * FROM a_list)\" As j WHERE ST_Touches(" \ "i.\"the_geom\", j.\"the_geom\") AND j.\"andy\" IS NOT NULL " \ "AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0)) As " \ - "neighbors FROM \"a_list\" As i WHERE i.\"andy\" IS NOT NULL " \ + "neighbors FROM \"(SELECT * FROM a_list)\" As i WHERE i.\"andy\" IS NOT NULL " \ "AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER BY " \ "i.\"cartodb_id\" ASC;" @@ -88,10 +88,10 @@ class MoranTest(unittest.TestCase): ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ - "j.\"cartodb_id\" FROM \"a_list\" As j WHERE j.\"andy\" IS " \ + "j.\"cartodb_id\" FROM \"(SELECT * FROM a_list)\" As j WHERE j.\"andy\" IS " \ "NOT NULL AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 " \ "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 " \ - "OFFSET 1 ) ) As neighbors FROM \"a_list\" As i WHERE " \ + "OFFSET 1 ) ) As neighbors FROM \"(SELECT * FROM a_list)\" As i WHERE " \ "i.\"andy\" IS NOT NULL AND i.\"jay_z\" IS NOT NULL AND " \ "i.\"jay_z\" <> 0 ORDER BY i.\"cartodb_id\" ASC;"