From f571e59a95da014f676eb00200bb02042f934584 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 20 May 2016 14:05:59 -0400 Subject: [PATCH 01/14] adding hot/cold/outlier getters --- src/pg/sql/10_moran.sql | 174 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 164 insertions(+), 10 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index 85d69f0..5b7c68f 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -10,6 +10,7 @@ CREATE OR REPLACE FUNCTION num_ngbrs INT DEFAULT 5) RETURNS TABLE (moran NUMERIC, significance NUMERIC) AS $$ + plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary return moran(subquery, attr, num_ngbrs, permutations, geom_col, id_col, w_type) @@ -17,21 +18,93 @@ $$ LANGUAGE plpythonu; -- Moran's I Local CREATE OR REPLACE FUNCTION - CDB_AreasOfInterest_Local( + _CDB_AreasOfInterest_Local( subquery TEXT, attr TEXT, - permutations INT DEFAULT 99, - geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + permutations INT, + geom_col TEXT, + id_col TEXT, + w_type TEXT, + num_ngbrs INT) RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ + plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary return moran_local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) $$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterest_Local( + subquery TEXT, + attr TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +AS $$ + + SELECT moran, quads, significance, ids, y + FROM cdb_crankshaft._CDB_AreasOfInterest_Local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs); + +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + attr TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +AS $$ + + SELECT moran, quads, significance, ids, y + FROM cdb_crankshaft._CDB_AreasOfInterest_Local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +AS $$ + + SELECT moran, quads, significance, ids, y + FROM cdb_crankshaft._CDB_AreasOfInterest_Local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +AS $$ + + SELECT moran, quads, significance, ids, y + FROM cdb_crankshaft._CDB_AreasOfInterest_Local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + -- Moran's I Rate (global) CREATE OR REPLACE FUNCTION CDB_AreasOfInterest_Global_Rate( @@ -45,6 +118,7 @@ CREATE OR REPLACE FUNCTION num_ngbrs INT DEFAULT 5) RETURNS TABLE (moran FLOAT, significance FLOAT) AS $$ + plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary return moran_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) @@ -52,6 +126,25 @@ $$ LANGUAGE plpythonu; -- Moran's I Local Rate +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterest_Local_Rate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + permutations INT, + geom_col TEXT, + id_col TEXT, + w_type TEXT, + num_ngbrs INT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +AS $$ + plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) +$$ LANGUAGE plpythonu; + CREATE OR REPLACE FUNCTION CDB_AreasOfInterest_Local_Rate( subquery TEXT, @@ -65,10 +158,71 @@ CREATE OR REPLACE FUNCTION RETURNS TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ - from crankshaft.clustering import moran_local_rate - # TODO: use named parameters or a dictionary - return moran_local_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) -$$ LANGUAGE plpythonu; + + SELECT moran, quads, significance, ids, y + FROM cdb_crankshaft._CDB_AreasOfInterest_Local_Rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs); + +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots_Rate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +AS $$ + + SELECT moran, quads, significance, ids, y + FROM cdb_crankshaft._CDB_AreasOfInterest_Local_Rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots_Rate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +AS $$ + + SELECT moran, quads, significance, ids, y + FROM cdb_crankshaft._CDB_AreasOfInterest_Local_Rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers_Rate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +AS $$ + + SELECT moran, quads, significance, ids, y + FROM cdb_crankshaft._CDB_AreasOfInterest_Local_Rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; -- -- Moran's I Local Bivariate -- CREATE OR REPLACE FUNCTION From c3913459d9a8a45d5b6c2af54ed1d21626e0f82f Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 20 May 2016 14:54:42 -0400 Subject: [PATCH 02/14] adding tests for hotspot, coldspot, and outlier functions --- src/pg/test/expected/02_moran_test.out | 432 +++++++++++-------------- src/pg/test/sql/02_moran_test.sql | 29 ++ 2 files changed, 220 insertions(+), 241 deletions(-) diff --git a/src/pg/test/expected/02_moran_test.out b/src/pg/test/expected/02_moran_test.out index eb2afa5..0b9267a 100644 --- a/src/pg/test/expected/02_moran_test.out +++ b/src/pg/test/expected/02_moran_test.out @@ -1,248 +1,198 @@ +\pset format unaligned +\set ECHO all \i test/fixtures/ppoints.sql --- test table (spanish province centroids with some invented values) -CREATE TABLE ppoints (cartodb_id integer, the_geom geometry, the_geom_webmercator geometry, code text, region_code text, value float); -INSERT INTO ppoints VALUES -( 1,'0101000020E6100000A8306DC0CBC305C051D14B6CE56A4540'::geometry,ST_Transform('0101000020E6100000A8306DC0CBC305C051D14B6CE56A4540'::geometry, 3857),'01','16',0.5), -( 4,'0101000020E6100000E220A4362DC202C0FD8AFA5119994240'::geometry,ST_Transform('0101000020E6100000E220A4362DC202C0FD8AFA5119994240'::geometry, 3857),'04','01',0.1), -( 5,'0101000020E610000004377E573AC813C0CB5871BB17494440'::geometry,ST_Transform('0101000020E610000004377E573AC813C0CB5871BB17494440'::geometry, 3857),'05','07',0.3), -( 2,'0101000020E610000000F49BE19BAFFFBF639958FDA6694340'::geometry,ST_Transform('0101000020E610000000F49BE19BAFFFBF639958FDA6694340'::geometry, 3857),'02','08',0.7), -( 3,'0101000020E61000005D0B7E63C832E2BFDB63EB00443D4340'::geometry,ST_Transform('0101000020E61000005D0B7E63C832E2BFDB63EB00443D4340'::geometry, 3857),'03','10',0.2), -( 6,'0101000020E61000006F3742B7FB9018C0DD967DC4D95A4340'::geometry,ST_Transform('0101000020E61000006F3742B7FB9018C0DD967DC4D95A4340'::geometry, 3857),'06','11',0.05), -( 7,'0101000020E6100000E4BB36995F4C0740EAC0E5CA9FC94340'::geometry,ST_Transform('0101000020E6100000E4BB36995F4C0740EAC0E5CA9FC94340'::geometry, 3857),'07','04',0.4), -( 8,'0101000020E61000003D43CC6CAFBEFF3F6B52E66F91DD4440'::geometry,ST_Transform('0101000020E61000003D43CC6CAFBEFF3F6B52E66F91DD4440'::geometry, 3857),'08','09',0.7), -( 9,'0101000020E61000003CC797BD99AF0CC0495A87FA312F4540'::geometry,ST_Transform('0101000020E61000003CC797BD99AF0CC0495A87FA312F4540'::geometry, 3857),'09','07',0.5), -(13,'0101000020E61000001CAA00A9F19F0EC05DF9267B7A764340'::geometry,ST_Transform('0101000020E61000001CAA00A9F19F0EC05DF9267B7A764340'::geometry, 3857),'13','08',0.4), -(16,'0101000020E6100000D8208F3CBC9001C065638DC1B1F24340'::geometry,ST_Transform('0101000020E6100000D8208F3CBC9001C065638DC1B1F24340'::geometry, 3857),'16','08',0.4), -(17,'0101000020E6100000E9E6A94A71630540AD7A0CB062104540'::geometry,ST_Transform('0101000020E6100000E9E6A94A71630540AD7A0CB062104540'::geometry, 3857),'17','09',0.6), -(18,'0101000020E6100000719792D59E240AC098AC548E00A84240'::geometry,ST_Transform('0101000020E6100000719792D59E240AC098AC548E00A84240'::geometry, 3857),'18','01',0.3), -(19,'0101000020E6100000972C878B50FD04C0123C881D1F684440'::geometry,ST_Transform('0101000020E6100000972C878B50FD04C0123C881D1F684440'::geometry, 3857),'19','08',0.7), -(21,'0101000020E6100000F7893E9934511BC0EAA4BF03E1C94240'::geometry,ST_Transform('0101000020E6100000F7893E9934511BC0EAA4BF03E1C94240'::geometry, 3857),'21','01',0.1), -(22,'0101000020E6100000572C2123B2A8B2BF7ED7FABAFD194540'::geometry,ST_Transform('0101000020E6100000572C2123B2A8B2BF7ED7FABAFD194540'::geometry, 3857),'22','02',0.4), -(25,'0101000020E6100000461B67D688C4F03FD990EEC3A0054540'::geometry,ST_Transform('0101000020E6100000461B67D688C4F03FD990EEC3A0054540'::geometry, 3857),'25','09',0.4), -(26,'0101000020E6100000A139FB06E82204C0539D84F62E234540'::geometry,ST_Transform('0101000020E6100000A139FB06E82204C0539D84F62E234540'::geometry, 3857),'26','17',0.6), -(27,'0101000020E6100000A92E54E618C91DC00D3A947B81814540'::geometry,ST_Transform('0101000020E6100000A92E54E618C91DC00D3A947B81814540'::geometry, 3857),'27','12',0.3), -(28,'0101000020E6100000971DC8B682BC0DC016D0E8055F3F4440'::geometry,ST_Transform('0101000020E6100000971DC8B682BC0DC016D0E8055F3F4440'::geometry, 3857),'28','13',0.8), -(30,'0101000020E6100000A2DC1964A8C5F7BF19299C994D004340'::geometry,ST_Transform('0101000020E6100000A2DC1964A8C5F7BF19299C994D004340'::geometry, 3857),'30','14',0.1), -(31,'0101000020E6100000DCA1FCC87B56FABF9B88E9D866554540'::geometry,ST_Transform('0101000020E6100000DCA1FCC87B56FABF9B88E9D866554540'::geometry, 3857),'31','15',0.9), -(32,'0101000020E6100000E1517AFCD15E1EC0A18D8D4825194540'::geometry,ST_Transform('0101000020E6100000E1517AFCD15E1EC0A18D8D4825194540'::geometry, 3857),'32','12',0.3), -(33,'0101000020E6100000A7FF33825AF917C0FABE7DFB6BA54540'::geometry,ST_Transform('0101000020E6100000A7FF33825AF917C0FABE7DFB6BA54540'::geometry, 3857),'33','03',0.4), -(34,'0101000020E6100000FB4E4EBEB72412C0898E7240982F4540'::geometry,ST_Transform('0101000020E6100000FB4E4EBEB72412C0898E7240982F4540'::geometry, 3857),'34','07',0.3), -(35,'0101000020E6100000224682B01B1A2DC011091656CC5C3C40'::geometry,ST_Transform('0101000020E6100000224682B01B1A2DC011091656CC5C3C40'::geometry, 3857),'35','05',0.3), -(36,'0101000020E6100000F7C9447110EC20C04C5D4823C7374540'::geometry,ST_Transform('0101000020E6100000F7C9447110EC20C04C5D4823C7374540'::geometry, 3857),'36','12',0.2), -(37,'0101000020E610000053D6A26DFB4218C09D58FAE209674440'::geometry,ST_Transform('0101000020E610000053D6A26DFB4218C09D58FAE209674440'::geometry, 3857),'37','07',0.5), -(38,'0101000020E6100000B1D1B5FC910431C03C0C89BA03503C40'::geometry,ST_Transform('0101000020E6100000B1D1B5FC910431C03C0C89BA03503C40'::geometry, 3857),'38','05',0.4), -(39,'0101000020E610000086E6FEE1BD1E10C00417096748994540'::geometry,ST_Transform('0101000020E610000086E6FEE1BD1E10C00417096748994540'::geometry, 3857),'39','06',0.6), -(40,'0101000020E6100000FB51C33F733710C038D01729E4954440'::geometry,ST_Transform('0101000020E6100000FB51C33F733710C038D01729E4954440'::geometry, 3857),'40','07',0.5), -(41,'0101000020E6100000912D6FDA28BB16C031321F08C4B74240'::geometry,ST_Transform('0101000020E6100000912D6FDA28BB16C031321F08C4B74240'::geometry, 3857),'41','01',0.4), -(42,'0101000020E6100000554432EABEB504C069ECD78775CF4440'::geometry,ST_Transform('0101000020E6100000554432EABEB504C069ECD78775CF4440'::geometry, 3857),'42','07',0.2), -(43,'0101000020E6100000157F117C1A2EEA3F027CD1F2368B4440'::geometry,ST_Transform('0101000020E6100000157F117C1A2EEA3F027CD1F2368B4440'::geometry, 3857),'43','09',0.3), -(44,'0101000020E610000051AA5B1BD718EABFEE67613BA4544440'::geometry,ST_Transform('0101000020E610000051AA5B1BD718EABFEE67613BA4544440'::geometry, 3857),'44','02',0.2), -(45,'0101000020E610000022C5C01BB69710C08563BC1499E54340'::geometry,ST_Transform('0101000020E610000022C5C01BB69710C08563BC1499E54340'::geometry, 3857),'45','08',0.3), -(46,'0101000020E6100000D5FCF78A11A0E9BFDEA46F8E64AF4340'::geometry,ST_Transform('0101000020E6100000D5FCF78A11A0E9BFDEA46F8E64AF4340'::geometry, 3857),'46','10',0.2), -(47,'0101000020E61000003AE63525866313C02100050B2BD14440'::geometry,ST_Transform('0101000020E61000003AE63525866313C02100050B2BD14440'::geometry, 3857),'47','07',0.3), -(48,'0101000020E610000030F187FD1FD206C0C767E1496C9E4540'::geometry,ST_Transform('0101000020E610000030F187FD1FD206C0C767E1496C9E4540'::geometry, 3857),'48','16',0.5), -(49,'0101000020E61000009C22867B12EC17C006C5F40C14DD4440'::geometry,ST_Transform('0101000020E61000009C22867B12EC17C006C5F40C14DD4440'::geometry, 3857),'49','07',0.2), -(50,'0101000020E6100000F7D5EFC62D08F1BF69D1231D68CF4440'::geometry,ST_Transform('0101000020E6100000F7D5EFC62D08F1BF69D1231D68CF4440'::geometry, 3857),'50','02',0.6), -(51,'0101000020E61000005B0E1F8DAA5F15C0530BFE285BF24140'::geometry,ST_Transform('0101000020E61000005B0E1F8DAA5F15C0530BFE285BF24140'::geometry, 3857),'51','18',0.01), -(10,'0101000020E61000000FD65D82AEA418C06192D1351FDB4340'::geometry,ST_Transform('0101000020E61000000FD65D82AEA418C06192D1351FDB4340'::geometry, 3857),'10','11',0.04), -(11,'0101000020E6100000B305531DAB0A17C0DEAFCD4EE5464240'::geometry,ST_Transform('0101000020E6100000B305531DAB0A17C0DEAFCD4EE5464240'::geometry, 3857),'11','01',0.08), -(12,'0101000020E610000059721A7297C9C2BF9EBE383BE51E4440'::geometry,ST_Transform('0101000020E610000059721A7297C9C2BF9EBE383BE51E4440'::geometry, 3857),'12','10',0.2), -(14,'0101000020E610000000C86313AF3C13C0E530879C10FF4240'::geometry,ST_Transform('0101000020E610000000C86313AF3C13C0E530879C10FF4240'::geometry, 3857),'14','01',0.2), -(15,'0101000020E61000002A475497B6ED20C06643D4131A904540'::geometry,ST_Transform('0101000020E61000002A475497B6ED20C06643D4131A904540'::geometry, 3857),'15','12',0.3), -(20,'0101000020E6100000F975566FAD8D01C0E840C33F67924540'::geometry,ST_Transform('0101000020E6100000F975566FAD8D01C0E840C33F67924540'::geometry, 3857),'20','16',0.8), -(23,'0101000020E610000025FA13E595880BC022BB07131D024340'::geometry,ST_Transform('0101000020E610000025FA13E595880BC022BB07131D024340'::geometry, 3857),'23','01',0.1), -(24,'0101000020E61000009C5F91C5095C17C0C78784B15A4F4540'::geometry,ST_Transform('0101000020E61000009C5F91C5095C17C0C78784B15A4F4540'::geometry, 3857),'24','07',0.3), -(29,'0101000020E6100000C34D4A5B48E712C092E680892C684240'::geometry,ST_Transform('0101000020E6100000C34D4A5B48E712C092E680892C684240'::geometry, 3857),'29','01',0.3), -(52,'0101000020E6100000406A545EB29A07C04E5F0BDA39A54140'::geometry,ST_Transform('0101000020E6100000406A545EB29A07C04E5F0BDA39A54140'::geometry, 3857),'52','19',0.01) -\i test/fixtures/ppoints2.sql --- test table (spanish province centroids with some invented values) -CREATE TABLE ppoints2 (cartodb_id integer, the_geom geometry, code text, region_code text, numerator float, denominator float); -INSERT INTO ppoints2 VALUES -( 1,'0101000020E6100000A8306DC0CBC305C051D14B6CE56A4540'::geometry,'01','16',0.5, 1.0), -( 4,'0101000020E6100000E220A4362DC202C0FD8AFA5119994240'::geometry,'04','01',0.1, 1.0), -( 5,'0101000020E610000004377E573AC813C0CB5871BB17494440'::geometry,'05','07',0.3, 1.0), -( 2,'0101000020E610000000F49BE19BAFFFBF639958FDA6694340'::geometry,'02','08',0.7, 1.0), -( 3,'0101000020E61000005D0B7E63C832E2BFDB63EB00443D4340'::geometry,'03','10',0.2, 1.0), -( 6,'0101000020E61000006F3742B7FB9018C0DD967DC4D95A4340'::geometry,'06','11',0.05, 1.0), -( 7,'0101000020E6100000E4BB36995F4C0740EAC0E5CA9FC94340'::geometry,'07','04',0.4, 1.0), -( 8,'0101000020E61000003D43CC6CAFBEFF3F6B52E66F91DD4440'::geometry,'08','09',0.7, 1.0), -( 9,'0101000020E61000003CC797BD99AF0CC0495A87FA312F4540'::geometry,'09','07',0.5, 1.0), -(13,'0101000020E61000001CAA00A9F19F0EC05DF9267B7A764340'::geometry,'13','08',0.4, 1.0), -(16,'0101000020E6100000D8208F3CBC9001C065638DC1B1F24340'::geometry,'16','08',0.4, 1.0), -(17,'0101000020E6100000E9E6A94A71630540AD7A0CB062104540'::geometry,'17','09',0.6, 1.0), -(18,'0101000020E6100000719792D59E240AC098AC548E00A84240'::geometry,'18','01',0.3, 1.0), -(19,'0101000020E6100000972C878B50FD04C0123C881D1F684440'::geometry,'19','08',0.7, 1.0), -(21,'0101000020E6100000F7893E9934511BC0EAA4BF03E1C94240'::geometry,'21','01',0.1, 1.0), -(22,'0101000020E6100000572C2123B2A8B2BF7ED7FABAFD194540'::geometry,'22','02',0.4, 1.0), -(25,'0101000020E6100000461B67D688C4F03FD990EEC3A0054540'::geometry,'25','09',0.4, 1.0), -(26,'0101000020E6100000A139FB06E82204C0539D84F62E234540'::geometry,'26','17',0.6, 1.0), -(27,'0101000020E6100000A92E54E618C91DC00D3A947B81814540'::geometry,'27','12',0.3, 1.0), -(28,'0101000020E6100000971DC8B682BC0DC016D0E8055F3F4440'::geometry,'28','13',0.8, 1.0), -(30,'0101000020E6100000A2DC1964A8C5F7BF19299C994D004340'::geometry,'30','14',0.1, 1.0), -(31,'0101000020E6100000DCA1FCC87B56FABF9B88E9D866554540'::geometry,'31','15',0.9, 1.0), -(32,'0101000020E6100000E1517AFCD15E1EC0A18D8D4825194540'::geometry,'32','12',0.3, 1.0), -(33,'0101000020E6100000A7FF33825AF917C0FABE7DFB6BA54540'::geometry,'33','03',0.4, 1.0), -(34,'0101000020E6100000FB4E4EBEB72412C0898E7240982F4540'::geometry,'34','07',0.3, 1.0), -(35,'0101000020E6100000224682B01B1A2DC011091656CC5C3C40'::geometry,'35','05',0.3, 1.0), -(36,'0101000020E6100000F7C9447110EC20C04C5D4823C7374540'::geometry,'36','12',0.2, 1.0), -(37,'0101000020E610000053D6A26DFB4218C09D58FAE209674440'::geometry,'37','07',0.5, 1.0), -(38,'0101000020E6100000B1D1B5FC910431C03C0C89BA03503C40'::geometry,'38','05',0.4, 1.0), -(39,'0101000020E610000086E6FEE1BD1E10C00417096748994540'::geometry,'39','06',0.6, 1.0), -(40,'0101000020E6100000FB51C33F733710C038D01729E4954440'::geometry,'40','07',0.5, 1.0), -(41,'0101000020E6100000912D6FDA28BB16C031321F08C4B74240'::geometry,'41','01',0.4, 1.0), -(42,'0101000020E6100000554432EABEB504C069ECD78775CF4440'::geometry,'42','07',0.2, 1.0), -(43,'0101000020E6100000157F117C1A2EEA3F027CD1F2368B4440'::geometry,'43','09',0.3, 1.0), -(44,'0101000020E610000051AA5B1BD718EABFEE67613BA4544440'::geometry,'44','02',0.2, 1.0), -(45,'0101000020E610000022C5C01BB69710C08563BC1499E54340'::geometry,'45','08',0.3, 1.0), -(46,'0101000020E6100000D5FCF78A11A0E9BFDEA46F8E64AF4340'::geometry,'46','10',0.2, 1.0), -(47,'0101000020E61000003AE63525866313C02100050B2BD14440'::geometry,'47','07',0.3, 1.0), -(48,'0101000020E610000030F187FD1FD206C0C767E1496C9E4540'::geometry,'48','16',0.5, 1.0), -(49,'0101000020E61000009C22867B12EC17C006C5F40C14DD4440'::geometry,'49','07',0.2, 1.0), -(50,'0101000020E6100000F7D5EFC62D08F1BF69D1231D68CF4440'::geometry,'50','02',0.6, 1.0), -(51,'0101000020E61000005B0E1F8DAA5F15C0530BFE285BF24140'::geometry,'51','18',0.01, 1.0), -(10,'0101000020E61000000FD65D82AEA418C06192D1351FDB4340'::geometry,'10','11',0.04, 1.0), -(11,'0101000020E6100000B305531DAB0A17C0DEAFCD4EE5464240'::geometry,'11','01',0.08, 1.0), -(12,'0101000020E610000059721A7297C9C2BF9EBE383BE51E4440'::geometry,'12','10',0.2, 1.0), -(14,'0101000020E610000000C86313AF3C13C0E530879C10FF4240'::geometry,'14','01',0.2, 1.0), -(15,'0101000020E61000002A475497B6ED20C06643D4131A904540'::geometry,'15','12',0.3, 1.0), -(20,'0101000020E6100000F975566FAD8D01C0E840C33F67924540'::geometry,'20','16',0.8, 1.0), -(23,'0101000020E610000025FA13E595880BC022BB07131D024340'::geometry,'23','01',0.1, 1.0), -(24,'0101000020E61000009C5F91C5095C17C0C78784B15A4F4540'::geometry,'24','07',0.3, 1.0), -(29,'0101000020E6100000C34D4A5B48E712C092E680892C684240'::geometry,'29','01',0.3, 1.0), -(52,'0101000020E6100000406A545EB29A07C04E5F0BDA39A54140'::geometry,'52','19',0.0, 1.01) --- Areas of Interest functions perform some nondeterministic computations --- (to estimate the significance); we will set the seeds for the RNGs --- that affect those results to have repeateble results -SELECT cdb_crankshaft._cdb_random_seeds(1234); - _cdb_random_seeds -------------------- - -(1 row) +SET client_min_messages TO WARNING; +\set ECHO none +_cdb_random_seeds -SELECT ppoints.code, m.quads - FROM ppoints - JOIN cdb_crankshaft.CDB_AreasOfInterest_Local('SELECT * FROM ppoints', 'value') m - ON ppoints.cartodb_id = m.ids - ORDER BY ppoints.code; - code | quads -------+------- - 01 | HH - 02 | HL - 03 | LL - 04 | LL - 05 | LH - 06 | LL - 07 | HH - 08 | HH - 09 | HH - 10 | LL - 11 | LL - 12 | LL - 13 | HL - 14 | LL - 15 | LL - 16 | HH - 17 | HH - 18 | LL - 19 | HH - 20 | HH - 21 | LL - 22 | HH - 23 | LL - 24 | LL - 25 | HH - 26 | HH - 27 | LL - 28 | HH - 29 | LL - 30 | LL - 31 | HH - 32 | LL - 33 | HL - 34 | LH - 35 | LL - 36 | LL - 37 | HL - 38 | HL - 39 | HH - 40 | HH - 41 | HL - 42 | LH - 43 | LH - 44 | LL - 45 | LH - 46 | LL - 47 | LL - 48 | HH - 49 | LH - 50 | HH - 51 | LL - 52 | LL +(1 row) +code|quads +01|HH +02|HL +03|LL +04|LL +05|LH +06|LL +07|HH +08|HH +09|HH +10|LL +11|LL +12|LL +13|HL +14|LL +15|LL +16|HH +17|HH +18|LL +19|HH +20|HH +21|LL +22|HH +23|LL +24|LL +25|HH +26|HH +27|LL +28|HH +29|LL +30|LL +31|HH +32|LL +33|HL +34|LH +35|LL +36|LL +37|HL +38|HL +39|HH +40|HH +41|HL +42|LH +43|LH +44|LL +45|LH +46|LL +47|LL +48|HH +49|LH +50|HH +51|LL +52|LL (52 rows) +_cdb_random_seeds -SELECT cdb_crankshaft._cdb_random_seeds(1234); - _cdb_random_seeds -------------------- - (1 row) - -SELECT ppoints2.code, m.quads - FROM ppoints2 - JOIN cdb_crankshaft.CDB_AreasOfInterest_Local_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m - ON ppoints2.cartodb_id = m.ids - ORDER BY ppoints2.code; - code | quads -------+------- - 01 | LL - 02 | LH - 03 | HH - 04 | HH - 05 | LL - 06 | HH - 07 | LL - 08 | LL - 09 | LL - 10 | HH - 11 | HH - 12 | HL - 13 | LL - 14 | HH - 15 | LL - 16 | LL - 17 | LL - 18 | LH - 19 | LL - 20 | LL - 21 | HH - 22 | LL - 23 | HL - 24 | LL - 25 | LL - 26 | LL - 27 | LL - 28 | LL - 29 | LH - 30 | HH - 31 | LL - 32 | LL - 33 | LL - 34 | LL - 35 | LH - 36 | HL - 37 | LH - 38 | LH - 39 | LL - 40 | LL - 41 | LH - 42 | HL - 43 | LL - 44 | HL - 45 | LL - 46 | HL - 47 | LL - 48 | LL - 49 | HL - 50 | LL - 51 | HH +code|quads +01|LL +02|LH +03|HH +04|HH +05|LL +06|HH +07|LL +08|LL +09|LL +10|HH +11|HH +12|HL +13|LL +14|HH +15|LL +16|LL +17|LL +18|LH +19|LL +20|LL +21|HH +22|LL +23|HL +24|LL +25|LL +26|LL +27|LL +28|LL +29|LH +30|HH +31|LL +32|LL +33|LL +34|LL +35|LH +36|HL +37|LH +38|LH +39|LL +40|LL +41|LH +42|HL +43|LL +44|HL +45|LL +46|HL +47|LL +48|LL +49|HL +50|LL +51|HH (51 rows) +_cdb_random_seeds +(1 row) +code|quads +03|HH +04|HH +06|HH +10|HH +11|HH +12|HL +14|HH +21|HH +23|HL +30|HH +36|HL +42|HL +44|HL +46|HL +49|HL +51|HH +(16 rows) +_cdb_random_seeds + +(1 row) +code|quads +01|LL +02|LH +05|LL +07|LL +08|LL +09|LL +13|LL +15|LL +16|LL +17|LL +18|LH +19|LL +20|LL +22|LL +24|LL +25|LL +26|LL +27|LL +28|LL +29|LH +31|LL +32|LL +33|LL +34|LL +35|LH +37|LH +38|LH +39|LL +40|LL +41|LH +43|LL +45|LL +47|LL +48|LL +50|LL +(35 rows) +_cdb_random_seeds + +(1 row) +code|quads +02|LH +12|HL +18|LH +23|HL +29|LH +35|LH +36|HL +37|LH +38|LH +41|LH +42|HL +44|HL +46|HL +49|HL +(14 rows) diff --git a/src/pg/test/sql/02_moran_test.sql b/src/pg/test/sql/02_moran_test.sql index de9c6cf..fd8dd58 100644 --- a/src/pg/test/sql/02_moran_test.sql +++ b/src/pg/test/sql/02_moran_test.sql @@ -1,3 +1,5 @@ +\pset format unaligned +\set ECHO all \i test/fixtures/ppoints.sql \i test/fixtures/ppoints2.sql @@ -19,3 +21,30 @@ SELECT ppoints2.code, m.quads JOIN cdb_crankshaft.CDB_AreasOfInterest_Local_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; + +SELECT cdb_crankshaft._cdb_random_seeds(1234); + +-- Spatial Hotspots (rate) +SELECT ppoints2.code, m.quads + FROM ppoints2 + JOIN cdb_crankshaft.CDB_GetSpatialHotspots_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m + ON ppoints2.cartodb_id = m.ids + ORDER BY ppoints2.code; + +SELECT cdb_crankshaft._cdb_random_seeds(1234); + +-- Spatial Coldspots (rate) +SELECT ppoints2.code, m.quads + FROM ppoints2 + JOIN cdb_crankshaft.CDB_GetSpatialColdspots_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m + ON ppoints2.cartodb_id = m.ids + ORDER BY ppoints2.code; + +SELECT cdb_crankshaft._cdb_random_seeds(1234); + +-- Spatial Outliers (rate) +SELECT ppoints2.code, m.quads + FROM ppoints2 + JOIN cdb_crankshaft.CDB_GetSpatialOutliers_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m + ON ppoints2.cartodb_id = m.ids + ORDER BY ppoints2.code; \ No newline at end of file From bc8055a12b50d8fea2025f5f77613a21f141e37f Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 20 May 2016 14:55:16 -0400 Subject: [PATCH 03/14] adds pset format unaligned to reduce output for tests --- src/pg/test/fixtures/polyg_values.sql | 2 ++ src/pg/test/fixtures/ppoints.sql | 2 ++ src/pg/test/fixtures/ppoints2.sql | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/pg/test/fixtures/polyg_values.sql b/src/pg/test/fixtures/polyg_values.sql index 7aaf4a0..a76ca7c 100644 --- a/src/pg/test/fixtures/polyg_values.sql +++ b/src/pg/test/fixtures/polyg_values.sql @@ -1,3 +1,5 @@ +SET client_min_messages TO WARNING; +\set ECHO none CREATE TABLE values (cartodb_id integer, value float, the_geom geometry); INSERT INTO values(cartodb_id, value, the_geom) VALUES (1,10,'0106000020E61000000100000001030000000100000005000000E5AF3500C03608C08068629111374440C7BC0A00C00F02C0AC0551523B414440C7BC0A00C0A700C0CAF23B6E74FB4340A7267FFFFF5206C0FBB7E41B7EE74340E5AF3500C03608C08068629111374440'::geometry), diff --git a/src/pg/test/fixtures/ppoints.sql b/src/pg/test/fixtures/ppoints.sql index 10bc0c1..d0749e0 100644 --- a/src/pg/test/fixtures/ppoints.sql +++ b/src/pg/test/fixtures/ppoints.sql @@ -1,3 +1,5 @@ +SET client_min_messages TO WARNING; +\set ECHO none -- test table (spanish province centroids with some invented values) CREATE TABLE ppoints (cartodb_id integer, the_geom geometry, the_geom_webmercator geometry, code text, region_code text, value float); INSERT INTO ppoints VALUES diff --git a/src/pg/test/fixtures/ppoints2.sql b/src/pg/test/fixtures/ppoints2.sql index 873fd3b..3363955 100644 --- a/src/pg/test/fixtures/ppoints2.sql +++ b/src/pg/test/fixtures/ppoints2.sql @@ -1,3 +1,5 @@ +SET client_min_messages TO WARNING; +\set ECHO none -- test table (spanish province centroids with some invented values) CREATE TABLE ppoints2 (cartodb_id integer, the_geom geometry, code text, region_code text, numerator float, denominator float); INSERT INTO ppoints2 VALUES From b05ad98ed95345d7410ad625ebd6748757f084f2 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 20 May 2016 15:15:19 -0400 Subject: [PATCH 04/14] adding tests for hot/cold/outlier for normal moran functions --- src/pg/test/expected/02_moran_test.out | 79 ++++++++++++++++++++++++++ src/pg/test/sql/02_moran_test.sql | 29 ++++++++++ 2 files changed, 108 insertions(+) diff --git a/src/pg/test/expected/02_moran_test.out b/src/pg/test/expected/02_moran_test.out index 0b9267a..3ba5bfd 100644 --- a/src/pg/test/expected/02_moran_test.out +++ b/src/pg/test/expected/02_moran_test.out @@ -62,6 +62,85 @@ code|quads (52 rows) _cdb_random_seeds +(1 row) +code|quads +01|HH +02|HL +07|HH +08|HH +09|HH +13|HL +16|HH +17|HH +19|HH +20|HH +22|HH +25|HH +26|HH +28|HH +31|HH +33|HL +37|HL +38|HL +39|HH +40|HH +41|HL +48|HH +50|HH +(23 rows) +_cdb_random_seeds + +(1 row) +code|quads +03|LL +04|LL +05|LH +06|LL +10|LL +11|LL +12|LL +14|LL +15|LL +18|LL +21|LL +23|LL +24|LL +27|LL +29|LL +30|LL +32|LL +34|LH +35|LL +36|LL +42|LH +43|LH +44|LL +45|LH +46|LL +47|LL +49|LH +51|LL +52|LL +(29 rows) +_cdb_random_seeds + +(1 row) +code|quads +02|HL +05|LH +13|HL +33|HL +34|LH +37|HL +38|HL +41|HL +42|LH +43|LH +45|LH +49|LH +(12 rows) +_cdb_random_seeds + (1 row) code|quads 01|LL diff --git a/src/pg/test/sql/02_moran_test.sql b/src/pg/test/sql/02_moran_test.sql index fd8dd58..e984b0b 100644 --- a/src/pg/test/sql/02_moran_test.sql +++ b/src/pg/test/sql/02_moran_test.sql @@ -16,6 +16,35 @@ SELECT ppoints.code, m.quads SELECT cdb_crankshaft._cdb_random_seeds(1234); +-- Spatial Hotspots +SELECT ppoints.code, m.quads + FROM ppoints + JOIN cdb_crankshaft.CDB_GetSpatialHotspots('SELECT * FROM ppoints', 'value') m + ON ppoints.cartodb_id = m.ids + ORDER BY ppoints.code; + +SELECT cdb_crankshaft._cdb_random_seeds(1234); + +-- Spatial Coldspots +SELECT ppoints.code, m.quads + FROM ppoints + JOIN cdb_crankshaft.CDB_GetSpatialColdspots('SELECT * FROM ppoints', 'value') m + ON ppoints.cartodb_id = m.ids + ORDER BY ppoints.code; + +SELECT cdb_crankshaft._cdb_random_seeds(1234); + + -- Spatial Outliers +SELECT ppoints.code, m.quads + FROM ppoints + JOIN cdb_crankshaft.CDB_GetSpatialOutliers('SELECT * FROM ppoints', 'value') m + ON ppoints.cartodb_id = m.ids + ORDER BY ppoints.code; + + +SELECT cdb_crankshaft._cdb_random_seeds(1234); + +-- Areas of Interest (rate) SELECT ppoints2.code, m.quads FROM ppoints2 JOIN cdb_crankshaft.CDB_AreasOfInterest_Local_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m From 4e870e4393d61d64ee52ecfa7aeec1a4f177459e Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 20 May 2016 15:15:41 -0400 Subject: [PATCH 05/14] adapt other test for new settings in fixtures --- src/pg/test/expected/03_overlap_sum_test.out | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/pg/test/expected/03_overlap_sum_test.out b/src/pg/test/expected/03_overlap_sum_test.out index 58f02cb..7638301 100644 --- a/src/pg/test/expected/03_overlap_sum_test.out +++ b/src/pg/test/expected/03_overlap_sum_test.out @@ -1,21 +1,11 @@ \i test/fixtures/polyg_values.sql -CREATE TABLE values (cartodb_id integer, value float, the_geom geometry); -INSERT INTO values(cartodb_id, value, the_geom) VALUES -(1,10,'0106000020E61000000100000001030000000100000005000000E5AF3500C03608C08068629111374440C7BC0A00C00F02C0AC0551523B414440C7BC0A00C0A700C0CAF23B6E74FB4340A7267FFFFF5206C0FBB7E41B7EE74340E5AF3500C03608C08068629111374440'::geometry), -(2,20,'0106000020E610000001000000010300000001000000050000002439EC00804AF7BF07D6CCB5C3064440C7BC0A00C0A700C0CAF23B6E74FB4340C7BC0A00C00F02C0AC0551523B414440E20CD5FFFF30FABFBE4F76AFEA4B44402439EC00804AF7BF07D6CCB5C3064440'::geometry) -SELECT round(cdb_crankshaft.cdb_overlap_sum( - '0106000020E61000000100000001030000000100000004000000FFFFFFFFFF3604C09A0B9ECEC42E444000000000C060FBBF30C7FD70E01D44400000000040AD02C06481F1C8CD034440FFFFFFFFFF3604C09A0B9ECEC42E4440'::geometry, - 'values', 'value' -), 2); +SET client_min_messages TO WARNING; +\set ECHO none round ------- 4.42 (1 row) -SELECT round(cdb_crankshaft.cdb_overlap_sum( - '0106000020E61000000100000001030000000100000004000000FFFFFFFFFF3604C09A0B9ECEC42E444000000000C060FBBF30C7FD70E01D44400000000040AD02C06481F1C8CD034440FFFFFFFFFF3604C09A0B9ECEC42E4440'::geometry, - 'values', 'value', schema_name := 'public' -), 2); round ------- 4.42 From 1b0d1cc82c562cd821c04d79393f89bbeaeeb27f Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 24 May 2016 17:51:29 -0400 Subject: [PATCH 06/14] updating function names --- src/pg/sql/10_moran.sql | 52 +++++++++++++------------------ src/pg/test/sql/02_moran_test.sql | 10 +++--- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index 5b7c68f..b6dfc17 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -1,6 +1,6 @@ --- Moran's I (global) +-- Moran's I Global Measure (public-facing) CREATE OR REPLACE FUNCTION - CDB_AreasOfInterest_Global ( + CDB_AreasOfInterestGlobal ( subquery TEXT, attr_name TEXT, permutations INT DEFAULT 99, @@ -16,9 +16,9 @@ AS $$ return moran(subquery, attr, num_ngbrs, permutations, geom_col, id_col, w_type) $$ LANGUAGE plpythonu; --- Moran's I Local +-- Moran's I Local (internal function) CREATE OR REPLACE FUNCTION - _CDB_AreasOfInterest_Local( + _CDB_AreasOfInterestLocal( subquery TEXT, attr TEXT, permutations INT, @@ -34,8 +34,9 @@ AS $$ return moran_local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) $$ LANGUAGE plpythonu; +-- Moran's I Local (public-facing function) CREATE OR REPLACE FUNCTION - CDB_AreasOfInterest_Local( + CDB_AreasOfInterestLocal( subquery TEXT, attr TEXT, permutations INT DEFAULT 99, @@ -51,6 +52,7 @@ AS $$ $$ LANGUAGE SQL; +-- Moran's I only for HH and HL (public-facing function) CREATE OR REPLACE FUNCTION CDB_GetSpatialHotspots( subquery TEXT, @@ -69,6 +71,7 @@ AS $$ $$ LANGUAGE SQL; +-- Moran's I only for LL and LH (public-facing function) CREATE OR REPLACE FUNCTION CDB_GetSpatialColdspots( subquery TEXT, @@ -87,6 +90,7 @@ AS $$ $$ LANGUAGE SQL; +-- Moran's I only for LH and HL (public-facing function) CREATE OR REPLACE FUNCTION CDB_GetSpatialOutliers( subquery TEXT, @@ -105,9 +109,9 @@ AS $$ $$ LANGUAGE SQL; --- Moran's I Rate (global) +-- Moran's I Global Rate (public-facing function) CREATE OR REPLACE FUNCTION - CDB_AreasOfInterest_Global_Rate( + CDB_AreasOfInterestGlobalRate( subquery TEXT, numerator TEXT, denominator TEXT, @@ -125,9 +129,9 @@ AS $$ $$ LANGUAGE plpythonu; --- Moran's I Local Rate +-- Moran's I Local Rate (internal function) CREATE OR REPLACE FUNCTION - _CDB_AreasOfInterest_Local_Rate( + _CDB_AreasOfInterestLocalRate( subquery TEXT, numerator TEXT, denominator TEXT, @@ -145,8 +149,9 @@ AS $$ return moran_local_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) $$ LANGUAGE plpythonu; +-- Moran's I Local Rate (public-facing function) CREATE OR REPLACE FUNCTION - CDB_AreasOfInterest_Local_Rate( + CDB_AreasOfInterestLocalRate( subquery TEXT, numerator TEXT, denominator TEXT, @@ -164,8 +169,9 @@ AS $$ $$ LANGUAGE SQL; +-- Moran's I Local Rate only for HH and HL (public-facing function) CREATE OR REPLACE FUNCTION - CDB_GetSpatialHotspots_Rate( + CDB_GetSpatialHotspotsRate( subquery TEXT, numerator TEXT, denominator TEXT, @@ -184,8 +190,9 @@ AS $$ $$ LANGUAGE SQL; +-- Moran's I Local Rate only for LL and LH (public-facing function) CREATE OR REPLACE FUNCTION - CDB_GetSpatialColdspots_Rate( + CDB_GetSpatialColdspotsRate( subquery TEXT, numerator TEXT, denominator TEXT, @@ -204,8 +211,9 @@ AS $$ $$ LANGUAGE SQL; +-- Moran's I Local Rate only for LH and HL (public-facing function) CREATE OR REPLACE FUNCTION - CDB_GetSpatialOutliers_Rate( + CDB_GetSpatialOutliersRate( subquery TEXT, numerator TEXT, denominator TEXT, @@ -223,21 +231,3 @@ AS $$ WHERE quads IN ('HL', 'LH'); $$ LANGUAGE SQL; - --- -- Moran's I Local Bivariate --- CREATE OR REPLACE FUNCTION --- cdb_moran_local_bv( --- subquery TEXT, --- attr1 TEXT, --- attr2 TEXT, --- permutations INT DEFAULT 99, --- geom_col TEXT DEFAULT 'the_geom', --- id_col TEXT DEFAULT 'cartodb_id', --- w_type TEXT DEFAULT 'knn', --- num_ngbrs INT DEFAULT 5) --- RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric) --- AS $$ --- from crankshaft.clustering import moran_local_bv --- # TODO: use named parameters or a dictionary --- return moran_local_bv(t, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs) --- $$ LANGUAGE plpythonu; diff --git a/src/pg/test/sql/02_moran_test.sql b/src/pg/test/sql/02_moran_test.sql index e984b0b..ca56186 100644 --- a/src/pg/test/sql/02_moran_test.sql +++ b/src/pg/test/sql/02_moran_test.sql @@ -10,7 +10,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads FROM ppoints - JOIN cdb_crankshaft.CDB_AreasOfInterest_Local('SELECT * FROM ppoints', 'value') m + JOIN cdb_crankshaft.CDB_AreasOfInterestLocal('SELECT * FROM ppoints', 'value') m ON ppoints.cartodb_id = m.ids ORDER BY ppoints.code; @@ -47,7 +47,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); -- Areas of Interest (rate) SELECT ppoints2.code, m.quads FROM ppoints2 - JOIN cdb_crankshaft.CDB_AreasOfInterest_Local_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m + JOIN cdb_crankshaft.CDB_AreasOfInterestLocalRate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; @@ -56,7 +56,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); -- Spatial Hotspots (rate) SELECT ppoints2.code, m.quads FROM ppoints2 - JOIN cdb_crankshaft.CDB_GetSpatialHotspots_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m + JOIN cdb_crankshaft.CDB_GetSpatialHotspotsRate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; @@ -65,7 +65,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); -- Spatial Coldspots (rate) SELECT ppoints2.code, m.quads FROM ppoints2 - JOIN cdb_crankshaft.CDB_GetSpatialColdspots_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m + JOIN cdb_crankshaft.CDB_GetSpatialColdspotsRate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; @@ -74,6 +74,6 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); -- Spatial Outliers (rate) SELECT ppoints2.code, m.quads FROM ppoints2 - JOIN cdb_crankshaft.CDB_GetSpatialOutliers_Rate('SELECT * FROM ppoints2', 'numerator', 'denominator') m + JOIN cdb_crankshaft.CDB_GetSpatialOutliersRate('SELECT * FROM ppoints2', 'numerator', 'denominator') m ON ppoints2.cartodb_id = m.ids ORDER BY ppoints2.code; \ No newline at end of file From b5c6b42081060c8cfc89f643620efdb8aced53d3 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 1 Jun 2016 11:33:02 -0400 Subject: [PATCH 07/14] update names to align with CamelCase convention --- src/pg/sql/10_moran.sql | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index b6dfc17..a6e9ec6 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -1,6 +1,6 @@ -- Moran's I Global Measure (public-facing) CREATE OR REPLACE FUNCTION - CDB_AreasOfInterestGlobal ( + CDB_AreasOfInterestGlobal( subquery TEXT, attr_name TEXT, permutations INT DEFAULT 99, @@ -48,7 +48,7 @@ RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMER AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterest_Local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs); + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs); $$ LANGUAGE SQL; @@ -66,7 +66,7 @@ CREATE OR REPLACE FUNCTION AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterest_Local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) WHERE quads IN ('HH', 'HL'); $$ LANGUAGE SQL; @@ -85,7 +85,7 @@ CREATE OR REPLACE FUNCTION AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterest_Local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) WHERE quads IN ('LL', 'LH'); $$ LANGUAGE SQL; @@ -104,7 +104,7 @@ CREATE OR REPLACE FUNCTION AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterest_Local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) WHERE quads IN ('HL', 'LH'); $$ LANGUAGE SQL; @@ -165,7 +165,7 @@ TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterest_Local_Rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs); + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs); $$ LANGUAGE SQL; @@ -185,7 +185,7 @@ TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterest_Local_Rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) WHERE quads IN ('HH', 'HL'); $$ LANGUAGE SQL; @@ -206,7 +206,7 @@ TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterest_Local_Rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) WHERE quads IN ('LL', 'LH'); $$ LANGUAGE SQL; @@ -227,7 +227,7 @@ TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterest_Local_Rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) WHERE quads IN ('HL', 'LH'); $$ LANGUAGE SQL; From 2b8adb744d24d4f28a202730b19cca06ecea0319 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 1 Jun 2016 12:09:43 -0400 Subject: [PATCH 08/14] switched signature to put more common options in the front --- src/pg/sql/10_moran.sql | 96 +++++++++---------- .../crankshaft/crankshaft/clustering/moran.py | 8 +- .../crankshaft/test/test_clustering_moran.py | 6 +- 3 files changed, 55 insertions(+), 55 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index a6e9ec6..fdc2d0d 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -3,17 +3,17 @@ CREATE OR REPLACE FUNCTION CDB_AreasOfInterestGlobal( subquery TEXT, attr_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran NUMERIC, significance NUMERIC) AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary - return moran(subquery, attr, num_ngbrs, permutations, geom_col, id_col, w_type) + return moran(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local (internal function) @@ -21,17 +21,17 @@ CREATE OR REPLACE FUNCTION _CDB_AreasOfInterestLocal( subquery TEXT, attr TEXT, + w_type TEXT, + num_ngbrs INT, permutations INT, geom_col TEXT, - id_col TEXT, - w_type TEXT, - num_ngbrs INT) + id_col TEXT) RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary - return moran_local(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + return moran_local(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local (public-facing function) @@ -39,16 +39,16 @@ CREATE OR REPLACE FUNCTION CDB_AreasOfInterestLocal( subquery TEXT, attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs); + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col); $$ LANGUAGE SQL; @@ -57,16 +57,16 @@ CREATE OR REPLACE FUNCTION CDB_GetSpatialHotspots( subquery TEXT, attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HH', 'HL'); $$ LANGUAGE SQL; @@ -76,16 +76,16 @@ CREATE OR REPLACE FUNCTION CDB_GetSpatialColdspots( subquery TEXT, attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('LL', 'LH'); $$ LANGUAGE SQL; @@ -95,16 +95,16 @@ CREATE OR REPLACE FUNCTION CDB_GetSpatialOutliers( subquery TEXT, attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HL', 'LH'); $$ LANGUAGE SQL; @@ -115,17 +115,17 @@ CREATE OR REPLACE FUNCTION subquery TEXT, numerator TEXT, denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran FLOAT, significance FLOAT) AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary - return moran_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; @@ -135,18 +135,18 @@ CREATE OR REPLACE FUNCTION subquery TEXT, numerator TEXT, denominator TEXT, + w_type TEXT, + num_ngbrs INT, permutations INT, geom_col TEXT, - id_col TEXT, - w_type TEXT, - num_ngbrs INT) + id_col TEXT) RETURNS TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local_rate # TODO: use named parameters or a dictionary - return moran_local_rate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local Rate (public-facing function) @@ -155,17 +155,17 @@ CREATE OR REPLACE FUNCTION subquery TEXT, numerator TEXT, denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs); + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); $$ LANGUAGE SQL; @@ -175,17 +175,17 @@ CREATE OR REPLACE FUNCTION subquery TEXT, numerator TEXT, denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HH', 'HL'); $$ LANGUAGE SQL; @@ -196,17 +196,17 @@ CREATE OR REPLACE FUNCTION subquery TEXT, numerator TEXT, denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('LL', 'LH'); $$ LANGUAGE SQL; @@ -217,17 +217,17 @@ CREATE OR REPLACE FUNCTION subquery TEXT, numerator TEXT, denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', - id_col TEXT DEFAULT 'cartodb_id', - w_type TEXT DEFAULT 'knn', - num_ngbrs INT DEFAULT 5) + id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) AS $$ SELECT moran, quads, significance, ids, y - FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, permutations, geom_col, id_col, w_type, num_ngbrs) + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HL', 'LH'); $$ LANGUAGE SQL; diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 2a043c3..39b3ff6 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -14,7 +14,7 @@ import crankshaft.pysal_utils as pu # High level interface --------------------------------------- def moran(subquery, attr_name, - permutations, geom_col, id_col, w_type, num_ngbrs): + w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I (global) Implementation building neighbors with a PostGIS database and Moran's I @@ -56,7 +56,7 @@ def moran(subquery, attr_name, return zip([moran_global.I], [moran_global.EI]) def moran_local(subquery, attr, - permutations, geom_col, id_col, w_type, num_ngbrs): + w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I implementation for PL/Python Andy Eschbacher @@ -96,7 +96,7 @@ def moran_local(subquery, attr, return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) def moran_rate(subquery, numerator, denominator, - permutations, geom_col, id_col, w_type, num_ngbrs): + w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I Rate (global) Andy Eschbacher @@ -137,7 +137,7 @@ def moran_rate(subquery, numerator, denominator, return zip([lisa_rate.I], [lisa_rate.EI]) def moran_local_rate(subquery, numerator, denominator, - permutations, geom_col, id_col, w_type, num_ngbrs): + w_type, num_ngbrs, permutations, geom_col, id_col): """ Moran's I Local Rate Andy Eschbacher diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 29c5bde..393e93b 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -52,7 +52,7 @@ class MoranTest(unittest.TestCase): data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.moran_local('subquery', 'value', 99, 'the_geom', 'cartodb_id', 'knn', 5) + result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] expected = self.moran_data for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): @@ -64,7 +64,7 @@ class MoranTest(unittest.TestCase): data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 99, 'the_geom', 'cartodb_id', 'knn', 5) + result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id') print 'result == None? ', result == None result = [(row[0], row[1]) for row in result] expected = self.moran_data @@ -76,7 +76,7 @@ class MoranTest(unittest.TestCase): data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1235) - result = cc.moran('table', 'value', 99, 'the_geom', 'cartodb_id', 'knn', 5) + result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') print 'result == None?', result == None result_moran = result[0][0] expected_moran = np.array([row[0] for row in self.moran_data]).mean() From 3013998e1b64d4688bd3fec8a4cf23980e50efe5 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 1 Jun 2016 12:19:08 -0400 Subject: [PATCH 09/14] make functions more flexible to case of weight type --- src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py index fa06e26..02b5e35 100644 --- a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -11,7 +11,7 @@ def construct_neighbor_query(w_type, query_vals): @param query_vals dict: values used to construct the query """ - if w_type == 'knn': + if w_type.lower() == 'knn': return knn(query_vals) else: return queen(query_vals) @@ -22,7 +22,7 @@ def get_weight(query_res, w_type='knn', num_ngbrs=5): Construct PySAL weight from return value of query @param query_res: query results with attributes and neighbors """ - if w_type == 'knn': + if w_type.lower() == 'knn': row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs weights = {x['id']: row_normed_weights for x in query_res} else: From 90c3e21c0dce970b9781ebc9c4dd0251dccefb73 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 1 Jun 2016 12:43:33 -0400 Subject: [PATCH 10/14] renaming output id to rowid --- src/pg/sql/10_moran.sql | 36 +++++++++++++++---------------- src/pg/test/sql/02_moran_test.sql | 20 ++++++++--------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index fdc2d0d..d0730e9 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -26,7 +26,7 @@ CREATE OR REPLACE FUNCTION permutations INT, geom_col TEXT, id_col TEXT) -RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local @@ -44,10 +44,10 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') -RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ - SELECT moran, quads, significance, ids, y + SELECT moran, quads, significance, rowid, y FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col); $$ LANGUAGE SQL; @@ -62,10 +62,10 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') - RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ - SELECT moran, quads, significance, ids, y + SELECT moran, quads, significance, rowid, y FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HH', 'HL'); @@ -81,10 +81,10 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') - RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ - SELECT moran, quads, significance, ids, y + SELECT moran, quads, significance, rowid, y FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('LL', 'LH'); @@ -100,10 +100,10 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') - RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ - SELECT moran, quads, significance, ids, y + SELECT moran, quads, significance, rowid, y FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HL', 'LH'); @@ -141,7 +141,7 @@ CREATE OR REPLACE FUNCTION geom_col TEXT, id_col TEXT) RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local_rate @@ -161,10 +161,10 @@ CREATE OR REPLACE FUNCTION geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ - SELECT moran, quads, significance, ids, y + SELECT moran, quads, significance, rowid, y FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); $$ LANGUAGE SQL; @@ -181,10 +181,10 @@ CREATE OR REPLACE FUNCTION geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ - SELECT moran, quads, significance, ids, y + SELECT moran, quads, significance, rowid, y FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HH', 'HL'); @@ -202,10 +202,10 @@ CREATE OR REPLACE FUNCTION geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ - SELECT moran, quads, significance, ids, y + SELECT moran, quads, significance, rowid, y FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('LL', 'LH'); @@ -223,10 +223,10 @@ CREATE OR REPLACE FUNCTION geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, ids INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) AS $$ - SELECT moran, quads, significance, ids, y + SELECT moran, quads, significance, rowid, y FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HL', 'LH'); diff --git a/src/pg/test/sql/02_moran_test.sql b/src/pg/test/sql/02_moran_test.sql index ca56186..5545a4a 100644 --- a/src/pg/test/sql/02_moran_test.sql +++ b/src/pg/test/sql/02_moran_test.sql @@ -11,7 +11,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads FROM ppoints JOIN cdb_crankshaft.CDB_AreasOfInterestLocal('SELECT * FROM ppoints', 'value') m - ON ppoints.cartodb_id = m.ids + ON ppoints.cartodb_id = m.rowid ORDER BY ppoints.code; SELECT cdb_crankshaft._cdb_random_seeds(1234); @@ -20,7 +20,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads FROM ppoints JOIN cdb_crankshaft.CDB_GetSpatialHotspots('SELECT * FROM ppoints', 'value') m - ON ppoints.cartodb_id = m.ids + ON ppoints.cartodb_id = m.rowid ORDER BY ppoints.code; SELECT cdb_crankshaft._cdb_random_seeds(1234); @@ -29,7 +29,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads FROM ppoints JOIN cdb_crankshaft.CDB_GetSpatialColdspots('SELECT * FROM ppoints', 'value') m - ON ppoints.cartodb_id = m.ids + ON ppoints.cartodb_id = m.rowid ORDER BY ppoints.code; SELECT cdb_crankshaft._cdb_random_seeds(1234); @@ -38,7 +38,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads FROM ppoints JOIN cdb_crankshaft.CDB_GetSpatialOutliers('SELECT * FROM ppoints', 'value') m - ON ppoints.cartodb_id = m.ids + ON ppoints.cartodb_id = m.rowid ORDER BY ppoints.code; @@ -48,16 +48,16 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints2.code, m.quads FROM ppoints2 JOIN cdb_crankshaft.CDB_AreasOfInterestLocalRate('SELECT * FROM ppoints2', 'numerator', 'denominator') m - ON ppoints2.cartodb_id = m.ids + ON ppoints2.cartodb_id = m.rowid ORDER BY ppoints2.code; - + SELECT cdb_crankshaft._cdb_random_seeds(1234); -- Spatial Hotspots (rate) SELECT ppoints2.code, m.quads FROM ppoints2 JOIN cdb_crankshaft.CDB_GetSpatialHotspotsRate('SELECT * FROM ppoints2', 'numerator', 'denominator') m - ON ppoints2.cartodb_id = m.ids + ON ppoints2.cartodb_id = m.rowid ORDER BY ppoints2.code; SELECT cdb_crankshaft._cdb_random_seeds(1234); @@ -66,7 +66,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints2.code, m.quads FROM ppoints2 JOIN cdb_crankshaft.CDB_GetSpatialColdspotsRate('SELECT * FROM ppoints2', 'numerator', 'denominator') m - ON ppoints2.cartodb_id = m.ids + ON ppoints2.cartodb_id = m.rowid ORDER BY ppoints2.code; SELECT cdb_crankshaft._cdb_random_seeds(1234); @@ -75,5 +75,5 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints2.code, m.quads FROM ppoints2 JOIN cdb_crankshaft.CDB_GetSpatialOutliersRate('SELECT * FROM ppoints2', 'numerator', 'denominator') m - ON ppoints2.cartodb_id = m.ids - ORDER BY ppoints2.code; \ No newline at end of file + ON ppoints2.cartodb_id = m.rowid + ORDER BY ppoints2.code; From 59c520da165d8a232a470452c4eab6105aff557c Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 1 Jun 2016 13:40:32 -0400 Subject: [PATCH 11/14] renaming output value to vals --- src/pg/sql/10_moran.sql | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index d0730e9..9e7adf2 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -26,7 +26,7 @@ CREATE OR REPLACE FUNCTION permutations INT, geom_col TEXT, id_col TEXT) -RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local @@ -44,10 +44,10 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') -RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - SELECT moran, quads, significance, rowid, y + SELECT moran, quads, significance, rowid, vals FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col); $$ LANGUAGE SQL; @@ -62,10 +62,10 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') - RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - SELECT moran, quads, significance, rowid, y + SELECT moran, quads, significance, rowid, vals FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HH', 'HL'); @@ -81,10 +81,10 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') - RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - SELECT moran, quads, significance, rowid, y + SELECT moran, quads, significance, rowid, vals FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('LL', 'LH'); @@ -100,10 +100,10 @@ CREATE OR REPLACE FUNCTION permutations INT DEFAULT 99, geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') - RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - SELECT moran, quads, significance, rowid, y + SELECT moran, quads, significance, rowid, vals FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HL', 'LH'); @@ -141,7 +141,7 @@ CREATE OR REPLACE FUNCTION geom_col TEXT, id_col TEXT) RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local_rate @@ -161,10 +161,10 @@ CREATE OR REPLACE FUNCTION geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - SELECT moran, quads, significance, rowid, y + SELECT moran, quads, significance, rowid, vals FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); $$ LANGUAGE SQL; @@ -181,10 +181,10 @@ CREATE OR REPLACE FUNCTION geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - SELECT moran, quads, significance, rowid, y + SELECT moran, quads, significance, rowid, vals FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HH', 'HL'); @@ -202,10 +202,10 @@ CREATE OR REPLACE FUNCTION geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - SELECT moran, quads, significance, rowid, y + SELECT moran, quads, significance, rowid, vals FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('LL', 'LH'); @@ -223,10 +223,10 @@ CREATE OR REPLACE FUNCTION geom_col TEXT DEFAULT 'the_geom', id_col TEXT DEFAULT 'cartodb_id') RETURNS -TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, y NUMERIC) +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - SELECT moran, quads, significance, rowid, y + SELECT moran, quads, significance, rowid, vals FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HL', 'LH'); From 0e3970f52c12009492e47322ce9b70ba5e9c027a Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 1 Jun 2016 13:46:12 -0400 Subject: [PATCH 12/14] adds docs for areasofinterestlocal --- doc/02_moran.md | 127 ++++++++++++++++++------------------------------ 1 file changed, 46 insertions(+), 81 deletions(-) diff --git a/doc/02_moran.md b/doc/02_moran.md index c91eb3f..095c44d 100644 --- a/doc/02_moran.md +++ b/doc/02_moran.md @@ -1,8 +1,50 @@ -## Name +## Areas of Interest Functions -CDB_AreasOfInterest -- returns a table with a cluster/outlier classification, the significance of a classification, an autocorrelation statistic (Local Moran's I), and the geometry id for each geometry in the original dataset. -## Synopsis + +### CDB_AreasOfInterestLocal(subquery text, column_name text) + +This function classifies your data as being part of a cluster, as an outlier, or or not part of a pattern based the significance of a classification. The classification happens through an autocorrelation statistic called Local Moran's I. + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments | +| column_name | TEXT | Name of column (e.g., should `'interesting_value'` instead of `interesting_value` without single quotes) used for the analysis. | +| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySal's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). | +| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. | +| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. | +| geom_col | TEXT | The column name for the geometries. Defaults to `'the_geom'` | +| id_col | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. | + +#### Returns + +A table with the following columns. + +| Column Name | Type | Description | +|-------------|------|-------------| +| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the geometry with id of `rowid` | +| quads | TEXT | Classification of geometry. Result is one of 'HH' (a high value with neighbors high on average), 'LL' (opposite of 'HH'), 'HL' (a high value surrounded by lows on average), and 'LH' (opposite of 'HL'). Null values are returned when nulls exist in the original data. | +| significance | NUMERIC | The statistical significance (from 0 to 1) of a cluster or outlier classification. Lower numbers are more significant. | +| rowid | INT | Row id of the values which correspond to the input rows. | +| vals | NUMERIC | Values from `'column_name'`. | + + +#### Example Usage + +```sql +SELECT + c.the_geom, + aoi.quads, + aoi.significance, + c.cyclists_per_total_population +FROM CDB_GetAreasOfInterestLocal('SELECT * FROM commute_data' + 'cyclists_per_total_population') As aoi +JOIN commute_data As c +ON c.cartodb_id = aoi.rowid; +``` + ```sql table(numeric moran_val, text quadrant, numeric significance, int ids, numeric column_values) CDB_AreasOfInterest(text query, text column_name) @@ -89,81 +131,4 @@ crankshaft's areas of interest functions: * [CDB_AreasOfInterest_Rate_Local]() * [CDB_AreasOfInterest_Rate_Global]() - -PostGIS clustering functions: - -* [ST_ClusterIntersecting](http://postgis.net/docs/manual-2.2/ST_ClusterIntersecting.html) -* [ST_ClusterWithin](http://postgis.net/docs/manual-2.2/ST_ClusterWithin.html) - - --- removing below, working into above - -#### What is Moran's I and why is it significant for CartoDB? - -Moran's I is a geostatistical calculation which gives a measure of the global -clustering and presence of outliers within the geographies in a map. Here global -means over all of the geographies in a dataset. Imagine mapping the incidence -rates of cancer in neighborhoods of a city. If there were areas covering several -neighborhoods with abnormally low rates of cancer, those areas are positively -spatially correlated with one another and would be considered a cluster. If -there was a single neighborhood with a high rate but with all neighbors on -average having a low rate, it would be considered a spatial outlier. - -While Moran's I gives a global snapshot, there are local indicators for -clustering called Local Indicators of Spatial Autocorrelation. Clustering is a -process related to autocorrelation -- i.e., a process that compares a -geography's attribute to the attribute in neighbor geographies. - -For the example of cancer rates in neighborhoods, since these neighborhoods have -a high value for rate of cancer, and all of their neighbors do as well, they are -designated as "High High" or simply **HH**. For areas with multiple neighborhoods -with low rates of cancer, they are designated as "Low Low" or **LL**. HH and LL -naturally fit into the concept of clustering and are in the correlated -variables. - -"Anticorrelated" geogs are in **LH** and **HL** regions -- that is, regions -where a geog has a high value and it's neighbors, on average, have a low value -(or vice versa). An example of this is a "gated community" or placement of a -city housing project in a rich region. These deliberate developments have -opposite median income as compared to the neighbors around them. They have a -high (or low) value while their neighbors have a low (or high) value. They exist -typically as islands, and in rare circumstances can extend as chains dividing -**LL** or **HH**. - -Strong policies such as rent stabilization (probably) tend to prevent the -clustering of high rent areas as they integrate middle class incomes. Luxury -apartment buildings, which are a kind of gated community, probably tend to skew -an area's median income upwards while housing projects have the opposite effect. -What are the nuggets in the analysis? - -Two functions are available to compute Moran I statistics: - -* `cdb_moran_local` computes Moran I measures, quad classification and - significance values from numerial values associated to geometry entities - in an input table. The geometries should be contiguous polygons When - then `queen` `w_type` is used. -* `cdb_moran_local_rate` computes the same statistics using a ratio between - numerator and denominator columns of a table. - -The parameters for `cdb_moran_local` are: - -* `table` name of the table that contains the data values -* `attr` name of the column -* `signficance` significance threshold for the quads values -* `num_ngbrs` number of neighbors to consider (default: 5) -* `permutations` number of random permutations for calculation of - pseudo-p values (default: 99) -* `geom_column` number of the geometry column (default: "the_geom") -* `id_col` PK column of the table (default: "cartodb_id") -* `w_type` Weight types: can be "knn" for k-nearest neighbor weights - or "queen" for contiguity based weights. - -The function returns a table with the following columns: - -* `moran` Moran's value -* `quads` quad classification ('HH', 'LL', 'HL', 'LH' or 'Not significant') -* `significance` significance value -* `ids` id of the corresponding record in the input table - -Function `cdb_moran_local_rate` only differs in that the `attr` input -parameter is substituted by `numerator` and `denominator`. +## Hotspot, Coldspot, and Outlier Functions From ae1bb703a74ff29700d87c2a6041405ff151540a Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 1 Jun 2016 15:06:58 -0400 Subject: [PATCH 13/14] descriptions for all functions --- doc/02_moran.md | 237 +++++++++++++++++++++++++++++------------------- 1 file changed, 144 insertions(+), 93 deletions(-) diff --git a/doc/02_moran.md b/doc/02_moran.md index 095c44d..2d9ae99 100644 --- a/doc/02_moran.md +++ b/doc/02_moran.md @@ -1,22 +1,20 @@ ## Areas of Interest Functions - - ### CDB_AreasOfInterestLocal(subquery text, column_name text) -This function classifies your data as being part of a cluster, as an outlier, or or not part of a pattern based the significance of a classification. The classification happens through an autocorrelation statistic called Local Moran's I. +This function classifies your data as being part of a cluster, as an outlier, or not part of a pattern based the significance of a classification. The classification happens through an autocorrelation statistic called Local Moran's I. #### Arguments | Name | Type | Description | |------|------|-------------| | subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments | -| column_name | TEXT | Name of column (e.g., should `'interesting_value'` instead of `interesting_value` without single quotes) used for the analysis. | -| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySal's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). | +| column_name | TEXT | Name of column (e.g., should be `'interesting_value'` instead of `interesting_value` without single quotes) used for the analysis. | +| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). | | num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. | | permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. | -| geom_col | TEXT | The column name for the geometries. Defaults to `'the_geom'` | -| id_col | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. | +| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` | +| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. | #### Returns @@ -24,8 +22,84 @@ A table with the following columns. | Column Name | Type | Description | |-------------|------|-------------| -| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the geometry with id of `rowid` | -| quads | TEXT | Classification of geometry. Result is one of 'HH' (a high value with neighbors high on average), 'LL' (opposite of 'HH'), 'HL' (a high value surrounded by lows on average), and 'LH' (opposite of 'HL'). Null values are returned when nulls exist in the original data. | +| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the geometry with id of `rowid` | +| quads | TEXT | Classification of geometry. Result is one of 'HH' (a high value with neighbors high on average), 'LL' (opposite of 'HH'), 'HL' (a high value surrounded by lows on average), and 'LH' (opposite of 'HL'). Null values are returned when nulls exist in the original data. | +| significance | NUMERIC | The statistical significance (from 0 to 1) of a cluster or outlier classification. Lower numbers are more significant. | +| rowid | INT | Row id of the values which correspond to the input rows. | +| vals | NUMERIC | Values from `'column_name'`. | + + +#### Example Usage + +```sql +SELECT + c.the_geom, + aoi.quads, + aoi.significance, + c.num_cyclists_per_total_population +FROM CDB_GetAreasOfInterestLocal('SELECT * FROM commute_data' + 'num_cyclists_per_total_population') As aoi +JOIN commute_data As c +ON c.cartodb_id = aoi.rowid; +``` + +### CDB_AreasOfInterestGlobal(subquery text, column_name text) + +This function identifies the extent to which geometries cluster (the groupings of geometries with similarly high or low values relative to the mean) or form outliers (areas where geometries have values opposite of their neighbors). The output of this function gives values between -1 and 1 as well as a significance of that classification. Values close to 0 mean that there is little to no distribution of values as compared to what one would see in a randomly distributed collection of geometries and values. + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments | +| column_name | TEXT | Name of column (e.g., should be `'interesting_value'` instead of `interesting_value` without single quotes) used for the analysis. | +| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). | +| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. | +| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. | +| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` | +| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. | + +#### Returns + +A table with the following columns. + +| Column Name | Type | Description | +|-------------|------|-------------| +| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the entire dataset. Values closer to one indicate cluster, closer to -1 mean more outliers, and near zero indicates a random distribution of data. | +| significance | NUMERIC | The statistical significance of the `moran` measure. | + +#### Examples + +```sql +SELECT * +FROM CDB_AreasOfInterestGlobal('SELECT * FROM commute_data', 'num_cyclists_per_total_population') +``` + +### CDB_AreasOfInterestLocalRate(subquery text, numerator_column text, denominator_column text) + +Just like `CDB_AreasOfInterestLocal`, this function classifies your data as being part of a cluster, as an outlier, or not part of a pattern based the significance of a classification. This function differs in that it calculates the classifications based on input `numerator` and `denominator` columns for finding the areas where there are clusters and outliers for the resulting rate of those two values. + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments | +| numerator | TEXT | Name of the numerator for forming a rate to be used in analysis. | +| denominator | TEXT | Name of the denominator for forming a rate to be used in analysis. | +| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). | +| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. | +| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. | +| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` | +| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. | + +#### Returns + +A table with the following columns. + +| Column Name | Type | Description | +|-------------|------|-------------| +| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the geometry with id of `rowid` | +| quads | TEXT | Classification of geometry. Result is one of 'HH' (a high value with neighbors high on average), 'LL' (opposite of 'HH'), 'HL' (a high value surrounded by lows on average), and 'LH' (opposite of 'HL'). Null values are returned when nulls exist in the original data. | | significance | NUMERIC | The statistical significance (from 0 to 1) of a cluster or outlier classification. Lower numbers are more significant. | | rowid | INT | Row id of the values which correspond to the input rows. | | vals | NUMERIC | Values from `'column_name'`. | @@ -39,96 +113,73 @@ SELECT aoi.quads, aoi.significance, c.cyclists_per_total_population -FROM CDB_GetAreasOfInterestLocal('SELECT * FROM commute_data' - 'cyclists_per_total_population') As aoi +FROM CDB_GetAreasOfInterestLocalRate('SELECT * FROM commute_data' + 'num_cyclists', + 'total_population') As aoi JOIN commute_data As c ON c.cartodb_id = aoi.rowid; ``` +### CDB_AreasOfInterestGlobalRate(subquery text, column_name text) + +This function identifies the extent to which geometries cluster (the groupings of geometries with similarly high or low values relative to the mean) or form outliers (areas where geometries have values opposite of their neighbors). The output of this function gives values between -1 and 1 as well as a significance of that classification. Values close to 0 mean that there is little to no distribution of values as compared to what one would see in a randomly distributed collection of geometries and values. + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments | +| numerator | TEXT | Name of the numerator for forming a rate to be used in analysis. | +| denominator | TEXT | Name of the denominator for forming a rate to be used in analysis. | +| weight type (optional) | TEXT | Type of weight to use when finding neighbors. Currently available options are 'knn' (default) and 'queen'. Read more about weight types in [PySAL's weights documentation](https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/weights.html). | +| num_ngbrs (optional) | INT | Number of neighbors if using k-nearest neighbors weight type. Defaults to 5. | +| permutations (optional) | INT | Number of permutations to check against a random arrangement of the values in `column_name`. This influences the accuracy of the output field `significance`. Defaults to 99. | +| geom_col (optional) | TEXT | The column name for the geometries. Defaults to `'the_geom'` | +| id_col (optional) | TEXT | The column name for the unique ID of each geometry/value pair. Defaults to `'cartodb_id'`. | + +#### Returns + +A table with the following columns. + +| Column Name | Type | Description | +|-------------|------|-------------| +| moran | NUMERIC | Value of Moran's I (spatial autocorrelation measure) for the entire dataset. Values closer to one indicate cluster, closer to -1 mean more outliers, and near zero indicates a random distribution of data. | +| significance | NUMERIC | The statistical significance of the `moran` measure. | + +#### Examples ```sql -table(numeric moran_val, text quadrant, numeric significance, int ids, numeric column_values) CDB_AreasOfInterest(text query, text column_name) - -table(numeric moran_val, text quadrant, numeric significance, int ids, numeric column_values) CDB_AreasOfInterest(text query, text column_name, int permutations, text geom_column, text id_column, text weight_type, int num_ngbrs) +SELECT * +FROM CDB_AreasOfInterestGlobalRate('SELECT * FROM commute_data', + 'num_cyclists', + 'total_population') ``` -## Description - -CDB_AreasOfInterest is a table-returning function that classifies the geometries in a table by an attribute and gives a significance for that classification. This information can be used to find "Areas of Interest" by using the correlation of a geometry's attribute with that of its neighbors. Areas can be clusters, outliers, or neither (depending on which significance value is used). - -Inputs: - -* `query` (required): an arbitrary query against tables you have access to (e.g., in your account, shared in your organization, or through the Data Observatory). This string must contain the following columns: an id `INT` (e.g., `cartodb_id`), geometry (e.g., `the_geom`), and the numeric attribute which is specified in `column_name` -* `column_name` (required): column to perform the area of interest analysis tool on. The data must be numeric (e.g., `float`, `int`, etc.) -* `permutations` (optional): used to calculate the significance of a classification. Defaults to 99, which is sufficient in most situations. -* `geom_column` (optional): the name of the geometry column. Data must be of type `geometry`. -* `id_column` (optional): the name of the id column (e.g., `cartodb_id`). Data must be of type `int` or `bigint` and have a unique condition on the data. -* `weight_type` (optional): the type of weight used for determining what defines a neighborhood. Options are `knn` or `queen`. -* `num_ngbrs` (optional): the number of neighbors in a neighborhood around a geometry. Only used if `knn` is chosen above. - -Outputs: - -* `moran_val`: underlying correlation statistic used in analysis -* `quadrant`: human-readable interpretation of classification -* `significance`: significance of classification (closer to 0 is more significant) -* `ids`: id of original geometry (used for joining against original table if desired -- see examples) -* `column_values`: original column values from `column_name` - -Availability: crankshaft v0.0.1 and above - -## Examples - -```sql -SELECT - t.the_geom_webmercator, - t.cartodb_id, - aoi.significance, - aoi.quadrant As aoi_quadrant -FROM - observatory.acs2013 As t -JOIN - crankshaft.CDB_AreasOfInterest('SELECT * FROM observatory.acs2013', - 'gini_index') -``` - -## API Usage - -Example - -```text -http://eschbacher.cartodb.com/api/v2/sql?q=SELECT * FROM crankshaft.CDB_AreasOfInterest('SELECT * FROM observatory.acs2013','gini_index') -``` - -Result -```json -{ - time: 0.120, - total_rows: 100, - rows: [{ - moran_vals: 0.7213, - quadrant: 'High area', - significance: 0.03, - ids: 1, - column_value: 0.22 - }, - { - moran_vals: -0.7213, - quadrant: 'Low outlier', - significance: 0.13, - ids: 2, - column_value: 0.03 - }, - ... - ] -} -``` - -## See Also - -crankshaft's areas of interest functions: - -* [CDB_AreasOfInterest_Global]() -* [CDB_AreasOfInterest_Rate_Local]() -* [CDB_AreasOfInterest_Rate_Global]() - ## Hotspot, Coldspot, and Outlier Functions + +These functions are convenience functions for extracting only information that you are interested in exposing based on the outputs of the `CDB_AreasOfInterest` functions. For instance, you can use `CDB_GetSpatialHotspots` to output only the classifications of `HH` and `HL`. + +### Non-rate functions + +#### CDB_GetSpatialHotspots +This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocal` except that the outputs are filtered to be only 'HH' and 'HL' (areas of high values). For more information about this function's use, see `CDB_AreasOfInterestLocal`. + +#### CDB_GetSpatialColdspots +This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocal` except that the outputs are filtered to be only 'LL' and 'LH' (areas of low values). For more information about this function's use, see `CDB_AreasOfInterestLocal`. + +#### CDB_GetSpatialOutliers +This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocal` except that the outputs are filtered to be only 'HL' and 'LH' (areas where highs or lows are surrounded by opposite values on average). For more information about this function's use, see `CDB_AreasOfInterestLocal`. + +### Rate functions + +#### CDB_GetSpatialHotspotsRate + +This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocalRate` except that the outputs are filtered to be only 'HH' and 'HL' (areas of high values). For more information about this function's use, see `CDB_AreasOfInterestLocalRate`. + +#### CDB_GetSpatialColdspotsRate + +This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocalRate` except that the outputs are filtered to be only 'LL' and 'LH' (areas of low values). For more information about this function's use, see `CDB_AreasOfInterestLocalRate`. + +#### CDB_GetSpatialOutliersRate + +This function's inputs and outputs exactly mirror `CDB_AreasOfInterestLocalRate` except that the outputs are filtered to be only 'HL' and 'LH' (areas where highs or lows are surrounded by opposite values on average). For more information about this function's use, see `CDB_AreasOfInterestLocalRate`. From c04e15ef810c8fadc151dffe277597670e5ce1f2 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 1 Jun 2016 15:07:16 -0400 Subject: [PATCH 14/14] rename some variables --- src/pg/sql/10_moran.sql | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index 9e7adf2..a336867 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -2,7 +2,7 @@ CREATE OR REPLACE FUNCTION CDB_AreasOfInterestGlobal( subquery TEXT, - attr_name TEXT, + column_name TEXT, w_type TEXT DEFAULT 'knn', num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, @@ -13,14 +13,14 @@ AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary - return moran(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local (internal function) CREATE OR REPLACE FUNCTION _CDB_AreasOfInterestLocal( subquery TEXT, - attr TEXT, + column_name TEXT, w_type TEXT, num_ngbrs INT, permutations INT, @@ -31,14 +31,14 @@ AS $$ plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary - return moran_local(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; -- Moran's I Local (public-facing function) CREATE OR REPLACE FUNCTION CDB_AreasOfInterestLocal( subquery TEXT, - attr TEXT, + column_name TEXT, w_type TEXT DEFAULT 'knn', num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, @@ -48,7 +48,7 @@ RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals AS $$ SELECT moran, quads, significance, rowid, vals - FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col); + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); $$ LANGUAGE SQL; @@ -56,7 +56,7 @@ $$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION CDB_GetSpatialHotspots( subquery TEXT, - attr TEXT, + column_name TEXT, w_type TEXT DEFAULT 'knn', num_ngbrs INT DEFAULT 5, permutations INT DEFAULT 99, @@ -66,7 +66,7 @@ CREATE OR REPLACE FUNCTION AS $$ SELECT moran, quads, significance, rowid, vals - FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) WHERE quads IN ('HH', 'HL'); $$ LANGUAGE SQL;