diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 3b407c0..7f402a8 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -31,15 +31,15 @@ def moran(subquery, attr_name, try: result = plpy.execute(query) - ## if there are no neighbors, exit + # if there are no neighbors, exit if len(result) == 0: - return zip([None], [None]) + return empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - return zip([None], [None]) + return empty_zipped_array(2) ## collect attributes attr_vals = get_attributes(result) @@ -72,12 +72,13 @@ def moran_local(subquery, attr, try: result = plpy.execute(query) + # if there are no neighbors, exit if len(result) == 0: - return zip([None], [None], [None], [None], [None]) + return empty_zipped_array(5) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) - return zip([None], [None], [None], [None], [None]) + return empty_zipped_array(5) attr_vals = get_attributes(result) weight = get_weight(result, w_type) @@ -111,15 +112,15 @@ def moran_rate(subquery, numerator, denominator, try: result = plpy.execute(query) + # if there are no neighbors, exit if len(result) == 0: - ## if there are no values returned, exit - return zip([None], [None]) + return empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - return zip([None], [None]) + return empty_zipped_array(2) ## collect attributes numer = get_attributes(result, 1) @@ -152,14 +153,14 @@ def moran_local_rate(subquery, numerator, denominator, try: result = plpy.execute(query) - plpy.notice('** Query returned with %d rows' % len(result)) + # if there are no neighbors, exit if len(result) == 0: - return zip([None], [None], [None], [None], [None]) + return empty_zipped_array(5) except plpy.SPIError: plpy.error('Error: areas of interest query failed, check input parameters') plpy.notice('** Query failed: "%s"' % query) plpy.notice('** Error: %s' % plpy.SPIError) - return zip([None], [None], [None], [None], [None]) + return empty_zipped_array(5) ## collect attributes numer = get_attributes(result, 1) @@ -194,13 +195,14 @@ def moran_local_bv(subquery, attr1, attr2, try: result = plpy.execute(query) - plpy.notice('** Query returned with %d rows' % len(result)) + # if there are no neighbors, exit if len(result) == 0: - return zip([None], [None], [None], [None]) + return empty_zipped_array(4) except plpy.SPIError: - plpy.error('Error: areas of interest query failed, check input parameters') + plpy.error("Error: areas of interest query failed, " \ + "check input parameters") plpy.notice('** Query failed: "%s"' % query) - return zip([None], [None], [None], [None]) + return empty_zipped_array(4) ## collect attributes attr1_vals = get_attributes(result, 1) @@ -222,7 +224,6 @@ def moran_local_bv(subquery, attr1, attr2, return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) - # Low level functions ---------------------------------------- def map_quads(coord): @@ -231,6 +232,8 @@ def map_quads(coord): HH=1, LH=2, LL=3, HL=4 Input: @param coord (int): quadrant of a specific measurement + Output: + classification (one of 'HH', 'LH', 'LL', or 'HL') """ if coord == 1: return 'HH' @@ -298,9 +301,10 @@ def knn(params): "%(attr_select)s" \ "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ "FROM ({subquery}) As j " \ - "WHERE %(attr_where_j)s " \ + "WHERE %(attr_where_j)s AND " \ + "i.\"{id_col}\" <> j.\"{id_col}\" " \ "ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ - "LIMIT {num_ngbrs} OFFSET 1 ) " \ + "LIMIT {num_ngbrs}) " \ ") As neighbors " \ "FROM ({subquery}) As i " \ "WHERE " \ @@ -387,3 +391,14 @@ def quad_position(quads): lisa_sig = np.array([map_quads(q) for q in quads]) return lisa_sig + +def return_empty_zipped_array(num_nones): + """ + prepare return values for cases of empty weights objects (no neighbors) + Input: + @param num_nones int: number of columns (e.g., 4) + Output: + [(None, None, None, None)] + """ + + return [tuple([None] * num_nones)] diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 95d959c..e2d2a50 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -16,7 +16,7 @@ from crankshaft import random_seeds import json class MoranTest(unittest.TestCase): - """Testing class for Moran's I functions.""" + """Testing class for Moran's I functions""" def setUp(self): plpy._reset() @@ -30,7 +30,7 @@ class MoranTest(unittest.TestCase): self.moran_data = json.loads(open(fixture_file('moran.json')).read()) def test_map_quads(self): - """Test map_quads.""" + """Test map_quads""" self.assertEqual(cc.map_quads(1), 'HH') self.assertEqual(cc.map_quads(2), 'LH') self.assertEqual(cc.map_quads(3), 'LL') @@ -39,7 +39,7 @@ class MoranTest(unittest.TestCase): self.assertEqual(cc.map_quads('andy'), None) def test_query_attr_select(self): - """Test query_attr_select.""" + """Test query_attr_select""" ans = "i.\"{attr1}\"::numeric As attr1, " \ "i.\"{attr2}\"::numeric As attr2, " @@ -47,10 +47,10 @@ class MoranTest(unittest.TestCase): self.assertEqual(cc.query_attr_select(self.params), ans) def test_query_attr_where(self): - """Test query_attr_where.""" + """Test query_attr_where""" - ans = "idx_replace.\"{attr1}\" IS NOT NULL AND "\ - "idx_replace.\"{attr2}\" IS NOT NULL AND "\ + ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \ + "idx_replace.\"{attr2}\" IS NOT NULL AND " \ "idx_replace.\"{attr2}\" <> 0" self.assertEqual(cc.query_attr_where(self.params), ans) @@ -58,61 +58,76 @@ class MoranTest(unittest.TestCase): def test_knn(self): """Test knn neighbors constructor""" - ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT j.\"cartodb_id\" " \ - "FROM (SELECT * FROM a_list) As j WHERE j.\"andy\" IS NOT NULL AND " \ - "j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 ORDER BY " \ - "j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 OFFSET 1 ) ) " \ - "As neighbors FROM (SELECT * FROM a_list) As i WHERE i.\"andy\" IS NOT " \ - "NULL AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER " \ - "BY i.\"cartodb_id\" ASC;" + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0 AND " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" " \ + "ORDER BY " \ + "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321) ) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" self.assertEqual(cc.knn(self.params), ans) def test_queen(self): """Test queen neighbors constructor""" - ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ - "j.\"cartodb_id\" FROM (SELECT * FROM a_list) As j WHERE ST_Touches(" \ - "i.\"the_geom\", j.\"the_geom\") AND j.\"andy\" IS NOT NULL " \ - "AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0)) As " \ - "neighbors FROM (SELECT * FROM a_list) As i WHERE i.\"andy\" IS NOT NULL " \ - "AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER BY " \ - "i.\"cartodb_id\" ASC;" + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE ST_Touches(i.\"the_geom\", " \ + "j.\"the_geom\") AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0) + ") As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" self.assertEqual(cc.queen(self.params), ans) def test_construct_neighbor_query(self): """Test construct_neighbor_query""" - ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \ - "i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \ - "j.\"cartodb_id\" FROM (SELECT * FROM a_list) As j WHERE j.\"andy\" IS " \ - "NOT NULL AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 " \ - "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 " \ - "OFFSET 1 ) ) As neighbors FROM (SELECT * FROM a_list) As i " \ - "WHERE i.\"andy\" IS NOT NULL AND i.\"jay_z\" IS NOT NULL AND " \ - "i.\"jay_z\" <> 0 " \ - "ORDER BY i.\"cartodb_id\" ASC;" - - self.assertEqual(cc.construct_neighbor_query('knn', self.params), ans) + # Compare to raw knn query + self.assertEqual(cc.construct_neighbor_query('knn', self.params), + cc.knn(self.params)) def test_get_attributes(self): - """Test get_attributes.""" + """Test get_attributes""" ## need to add tests self.assertEqual(True, True) def test_get_weight(self): - """Test get_weight.""" + """Test get_weight""" self.assertEqual(True, True) + def test_empty_zipped_array(self): + """Test empty_zipped_array""" + ans2 = [(None, None)] + ans4 = [(None, None, None, None)] + self.assertEqual(cc.empty_zipped_array(2), ans2) + self.assertEqual(cc.empty_zipped_array(4), ans4) def test_quad_position(self): - """Test lisa_sig_vals.""" + """Test lisa_sig_vals""" quads = np.array([1, 2, 3, 4], np.int)