diff --git a/pg/sql/0.0.1/02_moran.sql b/pg/sql/0.0.1/02_moran.sql index da2cb64..d061b45 100644 --- a/pg/sql/0.0.1/02_moran.sql +++ b/pg/sql/0.0.1/02_moran.sql @@ -1,3 +1,4 @@ +-- Moran's I CREATE OR REPLACE FUNCTION cdb_moran_local ( t TEXT, @@ -7,12 +8,28 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 99, geom_column TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - random_seed INTEGER DEFAULT NULL - ) + w_type TEXT DEFAULT 'knn') RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) AS $$ from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary - return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type, random_seed) + return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate +CREATE OR REPLACE FUNCTION + cdb_moran_local_rate(t TEXT, + numerator TEXT, + denominator TEXT, + significance FLOAT DEFAULT 0.05, + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_column TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn') +RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type) $$ LANGUAGE plpythonu; diff --git a/python/crankshaft/crankshaft/clustering/moran.py b/python/crankshaft/crankshaft/clustering/moran.py index e6d9707..8882235 100644 --- a/python/crankshaft/crankshaft/clustering/moran.py +++ b/python/crankshaft/crankshaft/clustering/moran.py @@ -54,6 +54,95 @@ def moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_ return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order) +def moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type): + """ + Moran's I Local Rate + Andy Eschbacher + """ + + plpy.notice('** Constructing query') + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = {"id_col": id_col, + "numerator": numerator, + "denominator": denominator, + "geom_col": geom_column, + "table": t, + "num_ngbrs": num_ngbrs} + + q = get_query(w_type, qvals) + + try: + r = plpy.execute(q) + plpy.notice('** Query returned with %d rows' % len(r)) + except plpy.SPIError: + plpy.notice('** Query failed: "%s"' % q) + plpy.notice('** Error: %s' % plpy.SPIError) + plpy.notice('** Exiting function') + return zip([None], [None], [None], [None]) + + plpy.notice('r.nrows() = %d' % r.nrows()) + + ## collect attributes + numer = get_attributes(r, 1) + denom = get_attributes(r, 2) + + w = get_weight(r, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, w, permutations=permutations) + + # find units of significance + lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance) + + plpy.notice('** Finished calculations') + + ## TODO: Decide on which return values here + return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order, lisa.y) + +def moran_local_bv(t, attr1, attr2, significance, num_ngbrs, permutations, geom_column, id_col, w_type): + plpy.notice('** Constructing query') + + qvals = {"num_ngbrs": num_ngbrs, + "attr1": attr1, + "attr2": attr2, + "table": t, + "geom_col": geom_column, + "id_col": id_col} + + q = get_query(w_type, qvals) + + try: + r = plpy.execute(q) + plpy.notice('** Query returned with %d rows' % len(r)) + except plpy.SPIError: + plpy.notice('** Query failed: "%s"' % q) + plpy.notice('** Error: %s' % plpy.SPIError) + plpy.notice('** Exiting function') + return zip([None], [None], [None], [None]) + + ## collect attributes + attr1_vals = get_attributes(r, 1) + attr2_vals = get_attributes(r, 2) + + # create weights + w = get_weight(r, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, w) + + plpy.notice("len of Is: %d" % len(lisa.Is)) + + # find clustering of significance + lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance) + + plpy.notice('** Finished calculations') + + return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order) + + # Low level functions ---------------------------------------- def map_quads(coord): diff --git a/python/crankshaft/test/fixtures/moran.json b/python/crankshaft/test/fixtures/moran.json new file mode 100644 index 0000000..0530c18 --- /dev/null +++ b/python/crankshaft/test/fixtures/moran.json @@ -0,0 +1,52 @@ +[[0.9319096128346788, "HH"], +[-1.135787401862846, "HL"], +[0.11732030672508517, "Not significant"], +[0.6152779669180425, "Not significant"], +[-0.14657336660125297, "Not significant"], +[0.6967858120189607, "Not significant"], +[0.07949310115714454, "Not significant"], +[0.4703198759258987, "Not significant"], +[0.4421125200498064, "Not significant"], +[0.5724288737143592, "Not significant"], +[0.8970743435692062, "LL"], +[0.18327334401918674, "Not significant"], +[-0.01466729201304962, "Not significant"], +[0.3481559372544409, "Not significant"], +[0.06547094736902978, "Not significant"], +[0.15482141569329988, "HH"], +[0.4373841193538136, "Not significant"], +[0.15971286468915544, "Not significant"], +[1.0543588860308968, "Not significant"], +[1.7372866900020818, "HH"], +[1.091998586053999, "LL"], +[0.1171572584252222, "Not significant"], +[0.08438455015300014, "Not significant"], +[0.06547094736902978, "Not significant"], +[0.15482141569329985, "HH"], +[1.1627044812890683, "HH"], +[0.06547094736902978, "Not significant"], +[0.795275137550483, "Not significant"], +[0.18562939195219, "LL"], +[0.3010757406693439, "Not significant"], +[2.8205795942839376, "HH"], +[0.11259190602909264, "Not significant"], +[-0.07116352791516614, "Not significant"], +[-0.09945240794119009, "Not significant"], +[0.18562939195219, "LL"], +[0.1832733440191868, "Not significant"], +[-0.39054253768447705, "Not significant"], +[-0.1672071289487642, "HL"], +[0.3337669247916343, "Not significant"], +[0.2584386102554792, "Not significant"], +[-0.19733845476322634, "HL"], +[-0.9379282899805409, "LH"], +[-0.028770969951095866, "Not significant"], +[0.051367269430983485, "Not significant"], +[-0.2172548045913472, "LH"], +[0.05136726943098351, "Not significant"], +[0.04191046803899837, "Not significant"], +[0.7482357030403517, "HH"], +[-0.014585767863118111, "Not significant"], +[0.5410013139159929, "Not significant"], +[1.0223932668429925, "LL"], +[1.4179402898927476, "LL"]] diff --git a/python/crankshaft/test/fixtures/neighbors.json b/python/crankshaft/test/fixtures/neighbors.json new file mode 100644 index 0000000..055b359 --- /dev/null +++ b/python/crankshaft/test/fixtures/neighbors.json @@ -0,0 +1,54 @@ +[ + {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5}, + {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7}, + {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2}, + {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1}, + {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3}, + {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05}, + {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4}, + {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7}, + {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5}, + {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04}, + {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08}, + {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2}, + {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4}, + {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2}, + {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3}, + {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4}, + {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6}, + {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3}, + {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7}, + {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8}, + {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1}, + {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4}, + {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1}, + {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3}, + {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4}, + {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6}, + {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3}, + {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8}, + {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3}, + {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1}, + {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9}, + {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3}, + {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4}, + {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3}, + {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3}, + {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2}, + {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5}, + {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4}, + {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6}, + {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5}, + {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4}, + {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2}, + {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3}, + {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2}, + {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3}, + {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2}, + {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3}, + {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5}, + {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2}, + {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6}, + {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01}, + {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01} + ] diff --git a/python/crankshaft/test/test_clustering_moran.py b/python/crankshaft/test/test_clustering_moran.py index 5a2b0d8..6f3fa14 100644 --- a/python/crankshaft/test/test_clustering_moran.py +++ b/python/crankshaft/test/test_clustering_moran.py @@ -11,9 +11,9 @@ import unittest # sys.modules['plpy'] = plpy from helper import plpy -# import crankshaft.clustering as cc import crankshaft.clustering as cc - +from crankshaft import random_seeds +import json class MoranTest(unittest.TestCase): """Testing class for Moran's I functions.""" @@ -26,6 +26,8 @@ class MoranTest(unittest.TestCase): "table": "a_list", "geom_col": "the_geom", "num_ngbrs": 321} + self.neighbors_data = json.loads(open('test/fixtures/neighbors.json').read()) + self.moran_data = json.loads(open('test/fixtures/moran.json').read()) def test_map_quads(self): """Test map_quads.""" @@ -119,17 +121,24 @@ class MoranTest(unittest.TestCase): self.assertTrue((test_ans == ans).all()) def test_moran_local(self): - """Test Moran's I local""" - plpy._define_result('select', [ - { 'id': 1, 'attr1': 100.0, 'neighbors': [2,4,5,7,8] }, - { 'id': 2, 'attr1': 110.0, 'neighbors': [1,4,5,6,7] }, - { 'id': 3, 'attr1': 90.0, 'neighbors': [1,4,5,7,8] }, - { 'id': 4, 'attr1': 100.0, 'neighbors': [1,2,5,7,8] }, - { 'id': 5, 'attr1': 100.0, 'neighbors': [1,2,3,7,8] }, - { 'id': 6, 'attr1': 105.0, 'neighbors': [1,2,3,7,8] }, - { 'id': 7, 'attr1': 105.0, 'neighbors': [1,2,3,6,8] }, - { 'id': 8, 'attr1': 105.0, 'neighbors': [1,2,3,6,7] }, - { 'id': 9, 'attr1': 120.0, 'neighbors': [1,2,5,6,7] } - ]) - result = cc.moran_local('table', 'value', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn') - # TODO: check results! + """Test Moran's I local""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local('table', 'value', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn') + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + self.assertEqual(res_quad, exp_quad) + + def test_moran_local_rate(self): + """Test Moran's I rate""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local_rate('table', 'numerator', 'denominator', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn') + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val)