From 06452562b905f21dc0adb6a0fa403cda333bb737 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 26 Sep 2016 10:10:52 -0400 Subject: [PATCH 1/2] fix ordering problems in input columns --- src/py/crankshaft/test/test_pysal_utils.py | 81 ++++++++++------------ 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/src/py/crankshaft/test/test_pysal_utils.py b/src/py/crankshaft/test/test_pysal_utils.py index aad9b20..92b528b 100644 --- a/src/py/crankshaft/test/test_pysal_utils.py +++ b/src/py/crankshaft/test/test_pysal_utils.py @@ -9,23 +9,23 @@ class PysalUtilsTest(unittest.TestCase): """Testing class for utility functions related to PySAL integrations""" def setUp(self): - self.params = OrderedDict([("id_col", "cartodb_id"), - ("attr1", "andy"), - ("attr2", "jay_z"), - ("subquery", "SELECT * FROM a_list"), - ("geom_col", "the_geom"), - ("num_ngbrs", 321)]) - - self.params2 = OrderedDict([("id_col", "cartodb_id"), - ("attr1", "price"), - ("attr2", "sq_meters"), + self.params1 = OrderedDict([("id_col", "cartodb_id"), + ("attr1", "andy"), + ("attr2", "jay_z"), ("subquery", "SELECT * FROM a_list"), ("geom_col", "the_geom"), ("num_ngbrs", 321)]) + self.params2 = OrderedDict([("id_col", "cartodb_id"), + ("numerator", "price"), + ("denominator", "sq_meters"), + ("subquery", "SELECT * FROM pecan"), + ("geom_col", "the_geom"), + ("num_ngbrs", 321)]) + self.params3 = OrderedDict([("id_col", "cartodb_id"), - ("attr1", "numerator"), - ("attr2", "denominator"), + ("numerator", "sq_meters"), + ("denominator", "price"), ("subquery", "SELECT * FROM pecan"), ("geom_col", "the_geom"), ("num_ngbrs", 321)]) @@ -39,20 +39,20 @@ class PysalUtilsTest(unittest.TestCase): def test_query_attr_select(self): """Test query_attr_select""" - ans = "i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, " + ans1 = ("i.\"andy\"::numeric As attr1, " + "i.\"jay_z\"::numeric As attr2, ") - ans2 = "i.\"price\"::numeric As attr1, " \ - "i.\"sq_meters\"::numeric As attr2, " + ans2 = ("i.\"price\"::numeric As attr1, " + "i.\"sq_meters\"::numeric As attr2, ") - ans3 = "i.\"numerator\"::numeric As attr1, " \ - "i.\"denominator\"::numeric As attr2, " + ans3 = ("i.\"sq_meters\"::numeric As attr1, " + "i.\"price\"::numeric As attr2, ") - ans_array = "i.\"_2013_dec\"::numeric As attr1, " \ - "i.\"_2014_jan\"::numeric As attr2, " \ - "i.\"_2014_feb\"::numeric As attr3, " + ans_array = ("i.\"_2013_dec\"::numeric As attr1, " + "i.\"_2014_jan\"::numeric As attr2, " + "i.\"_2014_feb\"::numeric As attr3, ") - self.assertEqual(pu.query_attr_select(self.params), ans) + self.assertEqual(pu.query_attr_select(self.params1), ans1) self.assertEqual(pu.query_attr_select(self.params2), ans2) self.assertEqual(pu.query_attr_select(self.params3), ans3) self.assertEqual(pu.query_attr_select(self.params_array), ans_array) @@ -60,21 +60,20 @@ class PysalUtilsTest(unittest.TestCase): def test_query_attr_where(self): """Test pu.query_attr_where""" - ans = "idx_replace.\"andy\" IS NOT NULL AND " \ - "idx_replace.\"jay_z\" IS NOT NULL AND " \ - "idx_replace.\"jay_z\" <> 0" + ans1 = ("idx_replace.\"andy\" IS NOT NULL AND " + "idx_replace.\"jay_z\" IS NOT NULL") - ans_array = "idx_replace.\"_2013_dec\" IS NOT NULL AND " \ - "idx_replace.\"_2014_jan\" IS NOT NULL AND " \ - "idx_replace.\"_2014_feb\" IS NOT NULL" + ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND " + "idx_replace.\"_2014_jan\" IS NOT NULL AND " + "idx_replace.\"_2014_feb\" IS NOT NULL") - self.assertEqual(pu.query_attr_where(self.params), ans) + self.assertEqual(pu.query_attr_where(self.params1), ans1) self.assertEqual(pu.query_attr_where(self.params_array), ans_array) def test_knn(self): """Test knn neighbors constructor""" - ans = "SELECT i.\"cartodb_id\" As id, " \ + ans1 = "SELECT i.\"cartodb_id\" As id, " \ "i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, " \ "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ @@ -82,15 +81,13 @@ class PysalUtilsTest(unittest.TestCase): "WHERE " \ "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ "j.\"andy\" IS NOT NULL AND " \ - "j.\"jay_z\" IS NOT NULL AND " \ - "j.\"jay_z\" <> 0 " \ + "j.\"jay_z\" IS NOT NULL " \ "ORDER BY " \ "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ "LIMIT 321)) As neighbors " \ "FROM (SELECT * FROM a_list) As i " \ "WHERE i.\"andy\" IS NOT NULL AND " \ - "i.\"jay_z\" IS NOT NULL AND " \ - "i.\"jay_z\" <> 0 " \ + "i.\"jay_z\" IS NOT NULL " \ "ORDER BY i.\"cartodb_id\" ASC;" ans_array = "SELECT i.\"cartodb_id\" As id, " \ @@ -111,13 +108,13 @@ class PysalUtilsTest(unittest.TestCase): "i.\"_2014_feb\" IS NOT NULL "\ "ORDER BY i.\"cartodb_id\" ASC;" - self.assertEqual(pu.knn(self.params), ans) + self.assertEqual(pu.knn(self.params1), ans1) self.assertEqual(pu.knn(self.params_array), ans_array) def test_queen(self): """Test queen neighbors constructor""" - ans = "SELECT i.\"cartodb_id\" As id, " \ + ans1 = "SELECT i.\"cartodb_id\" As id, " \ "i.\"andy\"::numeric As attr1, " \ "i.\"jay_z\"::numeric As attr2, " \ "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ @@ -127,23 +124,21 @@ class PysalUtilsTest(unittest.TestCase): "ST_Touches(i.\"the_geom\", " \ "j.\"the_geom\") AND " \ "j.\"andy\" IS NOT NULL AND " \ - "j.\"jay_z\" IS NOT NULL AND " \ - "j.\"jay_z\" <> 0)" \ + "j.\"jay_z\" IS NOT NULL)" \ ") As neighbors " \ "FROM (SELECT * FROM a_list) As i " \ "WHERE i.\"andy\" IS NOT NULL AND " \ - "i.\"jay_z\" IS NOT NULL AND " \ - "i.\"jay_z\" <> 0 " \ + "i.\"jay_z\" IS NOT NULL " \ "ORDER BY i.\"cartodb_id\" ASC;" - self.assertEqual(pu.queen(self.params), ans) + self.assertEqual(pu.queen(self.params1), ans1) def test_construct_neighbor_query(self): """Test construct_neighbor_query""" # Compare to raw knn query - self.assertEqual(pu.construct_neighbor_query('knn', self.params), - pu.knn(self.params)) + self.assertEqual(pu.construct_neighbor_query('knn', self.params1), + pu.knn(self.params1)) def test_get_attributes(self): """Test get_attributes""" From f1d420a6f7fc83cfc58862065c3a845bf31a5ba6 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 26 Sep 2016 10:11:16 -0400 Subject: [PATCH 2/2] ordering fixes --- .../crankshaft/pysal_utils/pysal_utils.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py index f0c73ba..c0ec19d 100644 --- a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -42,15 +42,16 @@ def get_weight(query_res, w_type='knn', num_ngbrs=5): def query_attr_select(params): """ Create portion of SELECT statement for attributes inolved in query. + Defaults to order in the params @param params: dict of information used in query (column names, table name, etc.) Example: - OrderedDict([('attr1', 'numerator'), - ('attr2', 'denominator'), + OrderedDict([('numerator', 'price'), + ('denominator', 'sq_meters'), ('subquery', 'SELECT * FROM interesting_data')]) Output: - "i.\"numerator\"::numeric As attr1, " \ - "i.\"denominator\"::numeric As attr2, " + "i.\"price\"::numeric As attr1, " \ + "i.\"sq_meters\"::numeric As attr2, " """ attr_string = "" @@ -81,7 +82,8 @@ def query_attr_where(params): 'numerator': 'data1', 'denominator': 'data2', '': ...} - Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL' + Output: + 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL' Input: {'subquery': ..., 'time_cols': ['time1', 'time2', 'time3'], @@ -102,14 +104,16 @@ def query_attr_where(params): ## moran where clauses # get keys - attrs = sorted([k for k in params - if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')]) + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', + 'num_ngbrs', 'subquery')] # add values to template for attr in attrs: attr_string.append(template % params[attr]) - if len(attrs) == 2: - attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]]) + if 'denominator' in attrs: + attr_string.append( + "idx_replace.\"%s\" <> 0" % params['denominator']) out = " AND ".join(attr_string)