Merge branch 'moran-query-ordering-fix' of https://github.com/CartoDB/crankshaft into moran-query-ordering-fix

This commit is contained in:
Andy Eschbacher 2016-09-26 10:15:49 -04:00
commit 803816f5c9
2 changed files with 52 additions and 54 deletions

View File

@ -45,15 +45,16 @@ def get_weight(query_res, w_type='knn', num_ngbrs=5):
def query_attr_select(params): def query_attr_select(params):
""" """
Create portion of SELECT statement for attributes inolved in query. Create portion of SELECT statement for attributes inolved in query.
Defaults to order in the params
@param params: dict of information used in query (column names, @param params: dict of information used in query (column names,
table name, etc.) table name, etc.)
Example: Example:
OrderedDict([('attr1', 'numerator'), OrderedDict([('numerator', 'price'),
('attr2', 'denominator'), ('denominator', 'sq_meters'),
('subquery', 'SELECT * FROM interesting_data')]) ('subquery', 'SELECT * FROM interesting_data')])
Output: Output:
"i.\"numerator\"::numeric As attr1, " \ "i.\"price\"::numeric As attr1, " \
"i.\"denominator\"::numeric As attr2, " "i.\"sq_meters\"::numeric As attr2, "
""" """
attr_string = "" attr_string = ""
@ -87,8 +88,8 @@ def query_attr_where(params):
'numerator': 'data1', 'numerator': 'data1',
'denominator': 'data2', 'denominator': 'data2',
'': ...} '': ...}
Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" Output:
IS NOT NULL' 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL'
Input: Input:
{'subquery': ..., {'subquery': ...,
'time_cols': ['time1', 'time2', 'time3'], 'time_cols': ['time1', 'time2', 'time3'],
@ -109,15 +110,17 @@ def query_attr_where(params):
# moran where clauses # moran where clauses
# get keys # get keys
attrs = sorted([k for k in params attrs = [k for k in params
if k not in ('id_col', 'geom_col', 'subquery', if k not in ('id_col', 'geom_col', 'subquery',
'num_ngbrs', 'subquery')]) 'num_ngbrs', 'subquery')]
# add values to template # add values to template
for attr in attrs: for attr in attrs:
attr_string.append(template % params[attr]) attr_string.append(template % params[attr])
if len(attrs) == 2: if 'denominator' in attrs:
attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]]) attr_string.append(
"idx_replace.\"%s\" <> 0" % params['denominator'])
out = " AND ".join(attr_string) out = " AND ".join(attr_string)

View File

@ -9,7 +9,7 @@ class PysalUtilsTest(unittest.TestCase):
"""Testing class for utility functions related to PySAL integrations""" """Testing class for utility functions related to PySAL integrations"""
def setUp(self): def setUp(self):
self.params = OrderedDict([("id_col", "cartodb_id"), self.params1 = OrderedDict([("id_col", "cartodb_id"),
("attr1", "andy"), ("attr1", "andy"),
("attr2", "jay_z"), ("attr2", "jay_z"),
("subquery", "SELECT * FROM a_list"), ("subquery", "SELECT * FROM a_list"),
@ -17,15 +17,15 @@ class PysalUtilsTest(unittest.TestCase):
("num_ngbrs", 321)]) ("num_ngbrs", 321)])
self.params2 = OrderedDict([("id_col", "cartodb_id"), self.params2 = OrderedDict([("id_col", "cartodb_id"),
("attr1", "price"), ("numerator", "price"),
("attr2", "sq_meters"), ("denominator", "sq_meters"),
("subquery", "SELECT * FROM a_list"), ("subquery", "SELECT * FROM pecan"),
("geom_col", "the_geom"), ("geom_col", "the_geom"),
("num_ngbrs", 321)]) ("num_ngbrs", 321)])
self.params3 = OrderedDict([("id_col", "cartodb_id"), self.params3 = OrderedDict([("id_col", "cartodb_id"),
("attr1", "numerator"), ("numerator", "sq_meters"),
("attr2", "denominator"), ("denominator", "price"),
("subquery", "SELECT * FROM pecan"), ("subquery", "SELECT * FROM pecan"),
("geom_col", "the_geom"), ("geom_col", "the_geom"),
("num_ngbrs", 321)]) ("num_ngbrs", 321)])
@ -39,20 +39,20 @@ class PysalUtilsTest(unittest.TestCase):
def test_query_attr_select(self): def test_query_attr_select(self):
"""Test query_attr_select""" """Test query_attr_select"""
ans = "i.\"andy\"::numeric As attr1, " \ ans1 = ("i.\"andy\"::numeric As attr1, "
"i.\"jay_z\"::numeric As attr2, " "i.\"jay_z\"::numeric As attr2, ")
ans2 = "i.\"price\"::numeric As attr1, " \ ans2 = ("i.\"price\"::numeric As attr1, "
"i.\"sq_meters\"::numeric As attr2, " "i.\"sq_meters\"::numeric As attr2, ")
ans3 = "i.\"numerator\"::numeric As attr1, " \ ans3 = ("i.\"sq_meters\"::numeric As attr1, "
"i.\"denominator\"::numeric As attr2, " "i.\"price\"::numeric As attr2, ")
ans_array = "i.\"_2013_dec\"::numeric As attr1, " \ ans_array = ("i.\"_2013_dec\"::numeric As attr1, "
"i.\"_2014_jan\"::numeric As attr2, " \ "i.\"_2014_jan\"::numeric As attr2, "
"i.\"_2014_feb\"::numeric As attr3, " "i.\"_2014_feb\"::numeric As attr3, ")
self.assertEqual(pu.query_attr_select(self.params), ans) self.assertEqual(pu.query_attr_select(self.params1), ans1)
self.assertEqual(pu.query_attr_select(self.params2), ans2) self.assertEqual(pu.query_attr_select(self.params2), ans2)
self.assertEqual(pu.query_attr_select(self.params3), ans3) self.assertEqual(pu.query_attr_select(self.params3), ans3)
self.assertEqual(pu.query_attr_select(self.params_array), ans_array) self.assertEqual(pu.query_attr_select(self.params_array), ans_array)
@ -60,21 +60,20 @@ class PysalUtilsTest(unittest.TestCase):
def test_query_attr_where(self): def test_query_attr_where(self):
"""Test pu.query_attr_where""" """Test pu.query_attr_where"""
ans = "idx_replace.\"andy\" IS NOT NULL AND " \ ans1 = ("idx_replace.\"andy\" IS NOT NULL AND "
"idx_replace.\"jay_z\" IS NOT NULL AND " \ "idx_replace.\"jay_z\" IS NOT NULL")
"idx_replace.\"jay_z\" <> 0"
ans_array = "idx_replace.\"_2013_dec\" IS NOT NULL AND " \ ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND "
"idx_replace.\"_2014_jan\" IS NOT NULL AND " \ "idx_replace.\"_2014_jan\" IS NOT NULL AND "
"idx_replace.\"_2014_feb\" IS NOT NULL" "idx_replace.\"_2014_feb\" IS NOT NULL")
self.assertEqual(pu.query_attr_where(self.params), ans) self.assertEqual(pu.query_attr_where(self.params1), ans1)
self.assertEqual(pu.query_attr_where(self.params_array), ans_array) self.assertEqual(pu.query_attr_where(self.params_array), ans_array)
def test_knn(self): def test_knn(self):
"""Test knn neighbors constructor""" """Test knn neighbors constructor"""
ans = "SELECT i.\"cartodb_id\" As id, " \ ans1 = "SELECT i.\"cartodb_id\" As id, " \
"i.\"andy\"::numeric As attr1, " \ "i.\"andy\"::numeric As attr1, " \
"i.\"jay_z\"::numeric As attr2, " \ "i.\"jay_z\"::numeric As attr2, " \
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
@ -82,15 +81,13 @@ class PysalUtilsTest(unittest.TestCase):
"WHERE " \ "WHERE " \
"i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
"j.\"andy\" IS NOT NULL AND " \ "j.\"andy\" IS NOT NULL AND " \
"j.\"jay_z\" IS NOT NULL AND " \ "j.\"jay_z\" IS NOT NULL " \
"j.\"jay_z\" <> 0 " \
"ORDER BY " \ "ORDER BY " \
"j.\"the_geom\" <-> i.\"the_geom\" ASC " \ "j.\"the_geom\" <-> i.\"the_geom\" ASC " \
"LIMIT 321)) As neighbors " \ "LIMIT 321)) As neighbors " \
"FROM (SELECT * FROM a_list) As i " \ "FROM (SELECT * FROM a_list) As i " \
"WHERE i.\"andy\" IS NOT NULL AND " \ "WHERE i.\"andy\" IS NOT NULL AND " \
"i.\"jay_z\" IS NOT NULL AND " \ "i.\"jay_z\" IS NOT NULL " \
"i.\"jay_z\" <> 0 " \
"ORDER BY i.\"cartodb_id\" ASC;" "ORDER BY i.\"cartodb_id\" ASC;"
ans_array = "SELECT i.\"cartodb_id\" As id, " \ ans_array = "SELECT i.\"cartodb_id\" As id, " \
@ -111,13 +108,13 @@ class PysalUtilsTest(unittest.TestCase):
"i.\"_2014_feb\" IS NOT NULL "\ "i.\"_2014_feb\" IS NOT NULL "\
"ORDER BY i.\"cartodb_id\" ASC;" "ORDER BY i.\"cartodb_id\" ASC;"
self.assertEqual(pu.knn(self.params), ans) self.assertEqual(pu.knn(self.params1), ans1)
self.assertEqual(pu.knn(self.params_array), ans_array) self.assertEqual(pu.knn(self.params_array), ans_array)
def test_queen(self): def test_queen(self):
"""Test queen neighbors constructor""" """Test queen neighbors constructor"""
ans = "SELECT i.\"cartodb_id\" As id, " \ ans1 = "SELECT i.\"cartodb_id\" As id, " \
"i.\"andy\"::numeric As attr1, " \ "i.\"andy\"::numeric As attr1, " \
"i.\"jay_z\"::numeric As attr2, " \ "i.\"jay_z\"::numeric As attr2, " \
"(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
@ -127,23 +124,21 @@ class PysalUtilsTest(unittest.TestCase):
"ST_Touches(i.\"the_geom\", " \ "ST_Touches(i.\"the_geom\", " \
"j.\"the_geom\") AND " \ "j.\"the_geom\") AND " \
"j.\"andy\" IS NOT NULL AND " \ "j.\"andy\" IS NOT NULL AND " \
"j.\"jay_z\" IS NOT NULL AND " \ "j.\"jay_z\" IS NOT NULL)" \
"j.\"jay_z\" <> 0)" \
") As neighbors " \ ") As neighbors " \
"FROM (SELECT * FROM a_list) As i " \ "FROM (SELECT * FROM a_list) As i " \
"WHERE i.\"andy\" IS NOT NULL AND " \ "WHERE i.\"andy\" IS NOT NULL AND " \
"i.\"jay_z\" IS NOT NULL AND " \ "i.\"jay_z\" IS NOT NULL " \
"i.\"jay_z\" <> 0 " \
"ORDER BY i.\"cartodb_id\" ASC;" "ORDER BY i.\"cartodb_id\" ASC;"
self.assertEqual(pu.queen(self.params), ans) self.assertEqual(pu.queen(self.params1), ans1)
def test_construct_neighbor_query(self): def test_construct_neighbor_query(self):
"""Test construct_neighbor_query""" """Test construct_neighbor_query"""
# Compare to raw knn query # Compare to raw knn query
self.assertEqual(pu.construct_neighbor_query('knn', self.params), self.assertEqual(pu.construct_neighbor_query('knn', self.params1),
pu.knn(self.params)) pu.knn(self.params1))
def test_get_attributes(self): def test_get_attributes(self):
"""Test get_attributes""" """Test get_attributes"""