Add existing release 0.0.1
The existing 0.0.1 files are placed into their location in release/
This commit is contained in:
parent
692014d694
commit
4903af6cdc
148
release/crankshaft--0.0.1.sql
Normal file
148
release/crankshaft--0.0.1.sql
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
|
||||||
|
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||||
|
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
||||||
|
-- Internal function.
|
||||||
|
-- Set the seeds of the RNGs (Random Number Generators)
|
||||||
|
-- used internally.
|
||||||
|
CREATE OR REPLACE FUNCTION
|
||||||
|
_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
|
||||||
|
AS $$
|
||||||
|
from crankshaft import random_seeds
|
||||||
|
random_seeds.set_random_seeds(seed_value)
|
||||||
|
$$ LANGUAGE plpythonu;
|
||||||
|
-- Moran's I
|
||||||
|
CREATE OR REPLACE FUNCTION
|
||||||
|
cdb_moran_local (
|
||||||
|
t TEXT,
|
||||||
|
attr TEXT,
|
||||||
|
significance float DEFAULT 0.05,
|
||||||
|
num_ngbrs INT DEFAULT 5,
|
||||||
|
permutations INT DEFAULT 99,
|
||||||
|
geom_column TEXT DEFAULT 'the_geom',
|
||||||
|
id_col TEXT DEFAULT 'cartodb_id',
|
||||||
|
w_type TEXT DEFAULT 'knn')
|
||||||
|
RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
|
||||||
|
AS $$
|
||||||
|
from crankshaft.clustering import moran_local
|
||||||
|
# TODO: use named parameters or a dictionary
|
||||||
|
return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||||
|
$$ LANGUAGE plpythonu;
|
||||||
|
|
||||||
|
-- Moran's I Local Rate
|
||||||
|
CREATE OR REPLACE FUNCTION
|
||||||
|
cdb_moran_local_rate(t TEXT,
|
||||||
|
numerator TEXT,
|
||||||
|
denominator TEXT,
|
||||||
|
significance FLOAT DEFAULT 0.05,
|
||||||
|
num_ngbrs INT DEFAULT 5,
|
||||||
|
permutations INT DEFAULT 99,
|
||||||
|
geom_column TEXT DEFAULT 'the_geom',
|
||||||
|
id_col TEXT DEFAULT 'cartodb_id',
|
||||||
|
w_type TEXT DEFAULT 'knn')
|
||||||
|
RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
|
||||||
|
AS $$
|
||||||
|
from crankshaft.clustering import moran_local_rate
|
||||||
|
# TODO: use named parameters or a dictionary
|
||||||
|
return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
|
||||||
|
$$ LANGUAGE plpythonu;
|
||||||
|
-- Function by Stuart Lynn for a simple interpolation of a value
|
||||||
|
-- from a polygon table over an arbitrary polygon
|
||||||
|
-- (weighted by the area proportion overlapped)
|
||||||
|
-- Aereal weighting is a very simple form of aereal interpolation.
|
||||||
|
--
|
||||||
|
-- Parameters:
|
||||||
|
-- * geom a Polygon geometry which defines the area where a value will be
|
||||||
|
-- estimated as the area-weighted sum of a given table/column
|
||||||
|
-- * target_table_name table name of the table that provides the values
|
||||||
|
-- * target_column column name of the column that provides the values
|
||||||
|
-- * schema_name optional parameter to defina the schema the target table
|
||||||
|
-- belongs to, which is necessary if its not in the search_path.
|
||||||
|
-- Note that target_table_name should never include the schema in it.
|
||||||
|
-- Return value:
|
||||||
|
-- Aereal-weighted interpolation of the column values over the geometry
|
||||||
|
CREATE OR REPLACE
|
||||||
|
FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
|
||||||
|
RETURNS numeric AS
|
||||||
|
$$
|
||||||
|
DECLARE
|
||||||
|
result numeric;
|
||||||
|
qualified_name text;
|
||||||
|
BEGIN
|
||||||
|
IF schema_name IS NULL THEN
|
||||||
|
qualified_name := Format('%I', target_table_name);
|
||||||
|
ELSE
|
||||||
|
qualified_name := Format('%I.%s', schema_name, target_table_name);
|
||||||
|
END IF;
|
||||||
|
EXECUTE Format('
|
||||||
|
SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
|
||||||
|
FROM %s AS a
|
||||||
|
WHERE $1 && a.the_geom
|
||||||
|
', target_column, qualified_name)
|
||||||
|
USING geom
|
||||||
|
INTO result;
|
||||||
|
RETURN result;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
--
|
||||||
|
-- Creates N points randomly distributed arround the polygon
|
||||||
|
--
|
||||||
|
-- @param g - the geometry to be turned in to points
|
||||||
|
--
|
||||||
|
-- @param no_points - the number of points to generate
|
||||||
|
--
|
||||||
|
-- @params max_iter_per_point - the function generates points in the polygon's bounding box
|
||||||
|
-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
|
||||||
|
-- misses per point the funciton accepts before giving up.
|
||||||
|
--
|
||||||
|
-- Returns: Multipoint with the requested points
|
||||||
|
CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
|
||||||
|
RETURNS GEOMETRY AS $$
|
||||||
|
DECLARE
|
||||||
|
extent GEOMETRY;
|
||||||
|
test_point Geometry;
|
||||||
|
width NUMERIC;
|
||||||
|
height NUMERIC;
|
||||||
|
x0 NUMERIC;
|
||||||
|
y0 NUMERIC;
|
||||||
|
xp NUMERIC;
|
||||||
|
yp NUMERIC;
|
||||||
|
no_left INTEGER;
|
||||||
|
remaining_iterations INTEGER;
|
||||||
|
points GEOMETRY[];
|
||||||
|
bbox_line GEOMETRY;
|
||||||
|
intersection_line GEOMETRY;
|
||||||
|
BEGIN
|
||||||
|
extent := ST_Envelope(geom);
|
||||||
|
width := ST_XMax(extent) - ST_XMIN(extent);
|
||||||
|
height := ST_YMax(extent) - ST_YMIN(extent);
|
||||||
|
x0 := ST_XMin(extent);
|
||||||
|
y0 := ST_YMin(extent);
|
||||||
|
no_left := no_points;
|
||||||
|
|
||||||
|
LOOP
|
||||||
|
if(no_left=0) THEN
|
||||||
|
EXIT;
|
||||||
|
END IF;
|
||||||
|
yp = y0 + height*random();
|
||||||
|
bbox_line = ST_MakeLine(
|
||||||
|
ST_SetSRID(ST_MakePoint(yp, x0),4326),
|
||||||
|
ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
|
||||||
|
);
|
||||||
|
intersection_line = ST_Intersection(bbox_line,geom);
|
||||||
|
test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
|
||||||
|
points := points || test_point;
|
||||||
|
no_left = no_left - 1 ;
|
||||||
|
END LOOP;
|
||||||
|
RETURN ST_Collect(points);
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql VOLATILE;
|
||||||
|
-- Make sure by default there are no permissions for publicuser
|
||||||
|
-- NOTE: this happens at extension creation time, as part of an implicit transaction.
|
||||||
|
-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
|
||||||
|
|
||||||
|
-- Grant permissions on the schema to publicuser (but just the schema)
|
||||||
|
GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
|
||||||
|
|
||||||
|
-- Revoke execute permissions on all functions in the schema by default
|
||||||
|
-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
|
5
release/crankshaft.control
Normal file
5
release/crankshaft.control
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
comment = 'CartoDB Spatial Analysis extension'
|
||||||
|
default_version = '0.0.1'
|
||||||
|
requires = 'plpythonu, postgis, cartodb'
|
||||||
|
superuser = true
|
||||||
|
schema = cdb_crankshaft
|
2
release/python/0.0.1/crankshaft/crankshaft/__init__.py
Normal file
2
release/python/0.0.1/crankshaft/crankshaft/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
import random_seeds
|
||||||
|
import clustering
|
@ -0,0 +1 @@
|
|||||||
|
from moran import *
|
321
release/python/0.0.1/crankshaft/crankshaft/clustering/moran.py
Normal file
321
release/python/0.0.1/crankshaft/crankshaft/clustering/moran.py
Normal file
@ -0,0 +1,321 @@
|
|||||||
|
"""
|
||||||
|
Moran's I geostatistics (global clustering & outliers presence)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# TODO: Fill in local neighbors which have null/NoneType values with the
|
||||||
|
# average of the their neighborhood
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pysal as ps
|
||||||
|
import plpy
|
||||||
|
|
||||||
|
# High level interface ---------------------------------------
|
||||||
|
|
||||||
|
def moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type):
|
||||||
|
"""
|
||||||
|
Moran's I implementation for PL/Python
|
||||||
|
Andy Eschbacher
|
||||||
|
"""
|
||||||
|
# TODO: ensure that the significance output can be smaller that 1e-3 (0.001)
|
||||||
|
# TODO: make a wishlist of output features (zscores, pvalues, raw local lisa, what else?)
|
||||||
|
|
||||||
|
plpy.notice('** Constructing query')
|
||||||
|
|
||||||
|
# geometries with attributes that are null are ignored
|
||||||
|
# resulting in a collection of not as near neighbors
|
||||||
|
|
||||||
|
qvals = {"id_col": id_col,
|
||||||
|
"attr1": attr,
|
||||||
|
"geom_col": geom_column,
|
||||||
|
"table": t,
|
||||||
|
"num_ngbrs": num_ngbrs}
|
||||||
|
|
||||||
|
q = get_query(w_type, qvals)
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = plpy.execute(q)
|
||||||
|
plpy.notice('** Query returned with %d rows' % len(r))
|
||||||
|
except plpy.SPIError:
|
||||||
|
plpy.notice('** Query failed: "%s"' % q)
|
||||||
|
plpy.notice('** Exiting function')
|
||||||
|
return zip([None], [None], [None], [None])
|
||||||
|
|
||||||
|
y = get_attributes(r, 1)
|
||||||
|
w = get_weight(r, w_type)
|
||||||
|
|
||||||
|
# calculate LISA values
|
||||||
|
lisa = ps.Moran_Local(y, w)
|
||||||
|
|
||||||
|
# find units of significance
|
||||||
|
lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance)
|
||||||
|
|
||||||
|
plpy.notice('** Finished calculations')
|
||||||
|
|
||||||
|
return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order)
|
||||||
|
|
||||||
|
|
||||||
|
def moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type):
|
||||||
|
"""
|
||||||
|
Moran's I Local Rate
|
||||||
|
Andy Eschbacher
|
||||||
|
"""
|
||||||
|
|
||||||
|
plpy.notice('** Constructing query')
|
||||||
|
|
||||||
|
# geometries with attributes that are null are ignored
|
||||||
|
# resulting in a collection of not as near neighbors
|
||||||
|
|
||||||
|
qvals = {"id_col": id_col,
|
||||||
|
"numerator": numerator,
|
||||||
|
"denominator": denominator,
|
||||||
|
"geom_col": geom_column,
|
||||||
|
"table": t,
|
||||||
|
"num_ngbrs": num_ngbrs}
|
||||||
|
|
||||||
|
q = get_query(w_type, qvals)
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = plpy.execute(q)
|
||||||
|
plpy.notice('** Query returned with %d rows' % len(r))
|
||||||
|
except plpy.SPIError:
|
||||||
|
plpy.notice('** Query failed: "%s"' % q)
|
||||||
|
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||||
|
plpy.notice('** Exiting function')
|
||||||
|
return zip([None], [None], [None], [None])
|
||||||
|
|
||||||
|
plpy.notice('r.nrows() = %d' % r.nrows())
|
||||||
|
|
||||||
|
## collect attributes
|
||||||
|
numer = get_attributes(r, 1)
|
||||||
|
denom = get_attributes(r, 2)
|
||||||
|
|
||||||
|
w = get_weight(r, w_type, num_ngbrs)
|
||||||
|
|
||||||
|
# calculate LISA values
|
||||||
|
lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, w, permutations=permutations)
|
||||||
|
|
||||||
|
# find units of significance
|
||||||
|
lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance)
|
||||||
|
|
||||||
|
plpy.notice('** Finished calculations')
|
||||||
|
|
||||||
|
## TODO: Decide on which return values here
|
||||||
|
return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order, lisa.y)
|
||||||
|
|
||||||
|
def moran_local_bv(t, attr1, attr2, significance, num_ngbrs, permutations, geom_column, id_col, w_type):
|
||||||
|
plpy.notice('** Constructing query')
|
||||||
|
|
||||||
|
qvals = {"num_ngbrs": num_ngbrs,
|
||||||
|
"attr1": attr1,
|
||||||
|
"attr2": attr2,
|
||||||
|
"table": t,
|
||||||
|
"geom_col": geom_column,
|
||||||
|
"id_col": id_col}
|
||||||
|
|
||||||
|
q = get_query(w_type, qvals)
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = plpy.execute(q)
|
||||||
|
plpy.notice('** Query returned with %d rows' % len(r))
|
||||||
|
except plpy.SPIError:
|
||||||
|
plpy.notice('** Query failed: "%s"' % q)
|
||||||
|
plpy.notice('** Error: %s' % plpy.SPIError)
|
||||||
|
plpy.notice('** Exiting function')
|
||||||
|
return zip([None], [None], [None], [None])
|
||||||
|
|
||||||
|
## collect attributes
|
||||||
|
attr1_vals = get_attributes(r, 1)
|
||||||
|
attr2_vals = get_attributes(r, 2)
|
||||||
|
|
||||||
|
# create weights
|
||||||
|
w = get_weight(r, w_type, num_ngbrs)
|
||||||
|
|
||||||
|
# calculate LISA values
|
||||||
|
lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, w)
|
||||||
|
|
||||||
|
plpy.notice("len of Is: %d" % len(lisa.Is))
|
||||||
|
|
||||||
|
# find clustering of significance
|
||||||
|
lisa_sig = lisa_sig_vals(lisa.p_sim, lisa.q, significance)
|
||||||
|
|
||||||
|
plpy.notice('** Finished calculations')
|
||||||
|
|
||||||
|
return zip(lisa.Is, lisa_sig, lisa.p_sim, w.id_order)
|
||||||
|
|
||||||
|
|
||||||
|
# Low level functions ----------------------------------------
|
||||||
|
|
||||||
|
def map_quads(coord):
|
||||||
|
"""
|
||||||
|
Map a quadrant number to Moran's I designation
|
||||||
|
HH=1, LH=2, LL=3, HL=4
|
||||||
|
Input:
|
||||||
|
:param coord (int): quadrant of a specific measurement
|
||||||
|
"""
|
||||||
|
if coord == 1:
|
||||||
|
return 'HH'
|
||||||
|
elif coord == 2:
|
||||||
|
return 'LH'
|
||||||
|
elif coord == 3:
|
||||||
|
return 'LL'
|
||||||
|
elif coord == 4:
|
||||||
|
return 'HL'
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def query_attr_select(params):
|
||||||
|
"""
|
||||||
|
Create portion of SELECT statement for attributes inolved in query.
|
||||||
|
:param params: dict of information used in query (column names,
|
||||||
|
table name, etc.)
|
||||||
|
"""
|
||||||
|
|
||||||
|
attrs = [k for k in params
|
||||||
|
if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs')]
|
||||||
|
|
||||||
|
template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
|
||||||
|
|
||||||
|
attr_string = ""
|
||||||
|
|
||||||
|
for idx, val in enumerate(sorted(attrs)):
|
||||||
|
attr_string += template % {"col": val, "alias_num": idx + 1}
|
||||||
|
|
||||||
|
return attr_string
|
||||||
|
|
||||||
|
def query_attr_where(params):
|
||||||
|
"""
|
||||||
|
Create portion of WHERE clauses for weeding out NULL-valued geometries
|
||||||
|
"""
|
||||||
|
attrs = sorted([k for k in params
|
||||||
|
if k not in ('id_col', 'geom_col', 'table', 'num_ngbrs')])
|
||||||
|
|
||||||
|
attr_string = []
|
||||||
|
|
||||||
|
for attr in attrs:
|
||||||
|
attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
|
||||||
|
|
||||||
|
if len(attrs) == 2:
|
||||||
|
attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
|
||||||
|
|
||||||
|
out = " AND ".join(attr_string)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
def knn(params):
|
||||||
|
"""SQL query for k-nearest neighbors.
|
||||||
|
:param vars: dict of values to fill template
|
||||||
|
"""
|
||||||
|
|
||||||
|
attr_select = query_attr_select(params)
|
||||||
|
attr_where = query_attr_where(params)
|
||||||
|
|
||||||
|
replacements = {"attr_select": attr_select,
|
||||||
|
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||||
|
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||||
|
|
||||||
|
query = "SELECT " \
|
||||||
|
"i.\"{id_col}\" As id, " \
|
||||||
|
"%(attr_select)s" \
|
||||||
|
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||||
|
"FROM \"{table}\" As j " \
|
||||||
|
"WHERE %(attr_where_j)s " \
|
||||||
|
"ORDER BY j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
|
||||||
|
"LIMIT {num_ngbrs} OFFSET 1 ) " \
|
||||||
|
") As neighbors " \
|
||||||
|
"FROM \"{table}\" As i " \
|
||||||
|
"WHERE " \
|
||||||
|
"%(attr_where_i)s " \
|
||||||
|
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||||
|
|
||||||
|
return query.format(**params)
|
||||||
|
|
||||||
|
## SQL query for finding queens neighbors (all contiguous polygons)
|
||||||
|
def queen(params):
|
||||||
|
"""SQL query for queen neighbors.
|
||||||
|
:param params: dict of information to fill query
|
||||||
|
"""
|
||||||
|
attr_select = query_attr_select(params)
|
||||||
|
attr_where = query_attr_where(params)
|
||||||
|
|
||||||
|
replacements = {"attr_select": attr_select,
|
||||||
|
"attr_where_i": attr_where.replace("idx_replace", "i"),
|
||||||
|
"attr_where_j": attr_where.replace("idx_replace", "j")}
|
||||||
|
|
||||||
|
query = "SELECT " \
|
||||||
|
"i.\"{id_col}\" As id, " \
|
||||||
|
"%(attr_select)s" \
|
||||||
|
"(SELECT ARRAY(SELECT j.\"{id_col}\" " \
|
||||||
|
"FROM \"{table}\" As j " \
|
||||||
|
"WHERE ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
|
||||||
|
"%(attr_where_j)s)" \
|
||||||
|
") As neighbors " \
|
||||||
|
"FROM \"{table}\" As i " \
|
||||||
|
"WHERE " \
|
||||||
|
"%(attr_where_i)s " \
|
||||||
|
"ORDER BY i.\"{id_col}\" ASC;" % replacements
|
||||||
|
|
||||||
|
return query.format(**params)
|
||||||
|
|
||||||
|
## to add more weight methods open a ticket or pull request
|
||||||
|
|
||||||
|
def get_query(w_type, query_vals):
|
||||||
|
"""Return requested query.
|
||||||
|
:param w_type: type of neighbors to calculate (knn or queen)
|
||||||
|
:param query_vals: values used to construct the query
|
||||||
|
"""
|
||||||
|
|
||||||
|
if w_type == 'knn':
|
||||||
|
return knn(query_vals)
|
||||||
|
else:
|
||||||
|
return queen(query_vals)
|
||||||
|
|
||||||
|
def get_attributes(query_res, attr_num):
|
||||||
|
"""
|
||||||
|
:param query_res: query results with attributes and neighbors
|
||||||
|
:param attr_num: attribute number (1, 2, ...)
|
||||||
|
"""
|
||||||
|
return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
|
||||||
|
|
||||||
|
## Build weight object
|
||||||
|
def get_weight(query_res, w_type='queen', num_ngbrs=5):
|
||||||
|
"""
|
||||||
|
Construct PySAL weight from return value of query
|
||||||
|
:param query_res: query results with attributes and neighbors
|
||||||
|
"""
|
||||||
|
if w_type == 'knn':
|
||||||
|
row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
|
||||||
|
weights = {x['id']: row_normed_weights for x in query_res}
|
||||||
|
elif w_type == 'queen':
|
||||||
|
weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
|
||||||
|
if len(x['neighbors']) > 0
|
||||||
|
else [] for x in query_res}
|
||||||
|
|
||||||
|
neighbors = {x['id']: x['neighbors'] for x in query_res}
|
||||||
|
|
||||||
|
return ps.W(neighbors, weights)
|
||||||
|
|
||||||
|
def quad_position(quads):
|
||||||
|
"""
|
||||||
|
Produce Moran's I classification based of n
|
||||||
|
"""
|
||||||
|
|
||||||
|
lisa_sig = np.array([map_quads(q) for q in quads])
|
||||||
|
|
||||||
|
return lisa_sig
|
||||||
|
|
||||||
|
def lisa_sig_vals(pvals, quads, threshold):
|
||||||
|
"""
|
||||||
|
Produce Moran's I classification based of n
|
||||||
|
"""
|
||||||
|
|
||||||
|
sig = (pvals <= threshold)
|
||||||
|
|
||||||
|
lisa_sig = np.empty(len(sig), np.chararray)
|
||||||
|
|
||||||
|
for idx, val in enumerate(sig):
|
||||||
|
if val:
|
||||||
|
lisa_sig[idx] = map_quads(quads[idx])
|
||||||
|
else:
|
||||||
|
lisa_sig[idx] = 'Not significant'
|
||||||
|
|
||||||
|
return lisa_sig
|
10
release/python/0.0.1/crankshaft/crankshaft/random_seeds.py
Normal file
10
release/python/0.0.1/crankshaft/crankshaft/random_seeds.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
import random
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
def set_random_seeds(value):
|
||||||
|
"""
|
||||||
|
Set the seeds of the RNGs (Random Number Generators)
|
||||||
|
used internally.
|
||||||
|
"""
|
||||||
|
random.seed(value)
|
||||||
|
numpy.random.seed(value)
|
48
release/python/0.0.1/crankshaft/setup.py
Normal file
48
release/python/0.0.1/crankshaft/setup.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
|
||||||
|
"""
|
||||||
|
CartoDB Spatial Analysis Python Library
|
||||||
|
See:
|
||||||
|
https://github.com/CartoDB/crankshaft
|
||||||
|
"""
|
||||||
|
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='crankshaft',
|
||||||
|
|
||||||
|
version='0.0.01',
|
||||||
|
|
||||||
|
description='CartoDB Spatial Analysis Python Library',
|
||||||
|
|
||||||
|
url='https://github.com/CartoDB/crankshaft',
|
||||||
|
|
||||||
|
author='Data Services Team - CartoDB',
|
||||||
|
author_email='dataservices@cartodb.com',
|
||||||
|
|
||||||
|
license='MIT',
|
||||||
|
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 3 - Alpha',
|
||||||
|
'Intended Audience :: Mapping comunity',
|
||||||
|
'Topic :: Maps :: Mapping Tools',
|
||||||
|
'License :: OSI Approved :: MIT License',
|
||||||
|
'Programming Language :: Python :: 2.7',
|
||||||
|
],
|
||||||
|
|
||||||
|
keywords='maps mapping tools spatial analysis geostatistics',
|
||||||
|
|
||||||
|
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||||
|
|
||||||
|
extras_require={
|
||||||
|
'dev': ['unittest'],
|
||||||
|
'test': ['unittest', 'nose', 'mock'],
|
||||||
|
},
|
||||||
|
|
||||||
|
# The choice of component versions is dictated by what's
|
||||||
|
# provisioned in the production servers.
|
||||||
|
install_requires=['pysal==1.11.0','numpy==1.6.1','scipy==0.17.0'],
|
||||||
|
|
||||||
|
requires=['pysal', 'numpy'],
|
||||||
|
|
||||||
|
test_suite='test'
|
||||||
|
)
|
52
release/python/0.0.1/crankshaft/test/fixtures/moran.json
vendored
Normal file
52
release/python/0.0.1/crankshaft/test/fixtures/moran.json
vendored
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
[[0.9319096128346788, "HH"],
|
||||||
|
[-1.135787401862846, "HL"],
|
||||||
|
[0.11732030672508517, "Not significant"],
|
||||||
|
[0.6152779669180425, "Not significant"],
|
||||||
|
[-0.14657336660125297, "Not significant"],
|
||||||
|
[0.6967858120189607, "Not significant"],
|
||||||
|
[0.07949310115714454, "Not significant"],
|
||||||
|
[0.4703198759258987, "Not significant"],
|
||||||
|
[0.4421125200498064, "Not significant"],
|
||||||
|
[0.5724288737143592, "Not significant"],
|
||||||
|
[0.8970743435692062, "LL"],
|
||||||
|
[0.18327334401918674, "Not significant"],
|
||||||
|
[-0.01466729201304962, "Not significant"],
|
||||||
|
[0.3481559372544409, "Not significant"],
|
||||||
|
[0.06547094736902978, "Not significant"],
|
||||||
|
[0.15482141569329988, "HH"],
|
||||||
|
[0.4373841193538136, "Not significant"],
|
||||||
|
[0.15971286468915544, "Not significant"],
|
||||||
|
[1.0543588860308968, "Not significant"],
|
||||||
|
[1.7372866900020818, "HH"],
|
||||||
|
[1.091998586053999, "LL"],
|
||||||
|
[0.1171572584252222, "Not significant"],
|
||||||
|
[0.08438455015300014, "Not significant"],
|
||||||
|
[0.06547094736902978, "Not significant"],
|
||||||
|
[0.15482141569329985, "HH"],
|
||||||
|
[1.1627044812890683, "HH"],
|
||||||
|
[0.06547094736902978, "Not significant"],
|
||||||
|
[0.795275137550483, "Not significant"],
|
||||||
|
[0.18562939195219, "LL"],
|
||||||
|
[0.3010757406693439, "Not significant"],
|
||||||
|
[2.8205795942839376, "HH"],
|
||||||
|
[0.11259190602909264, "Not significant"],
|
||||||
|
[-0.07116352791516614, "Not significant"],
|
||||||
|
[-0.09945240794119009, "Not significant"],
|
||||||
|
[0.18562939195219, "LL"],
|
||||||
|
[0.1832733440191868, "Not significant"],
|
||||||
|
[-0.39054253768447705, "Not significant"],
|
||||||
|
[-0.1672071289487642, "HL"],
|
||||||
|
[0.3337669247916343, "Not significant"],
|
||||||
|
[0.2584386102554792, "Not significant"],
|
||||||
|
[-0.19733845476322634, "HL"],
|
||||||
|
[-0.9379282899805409, "LH"],
|
||||||
|
[-0.028770969951095866, "Not significant"],
|
||||||
|
[0.051367269430983485, "Not significant"],
|
||||||
|
[-0.2172548045913472, "LH"],
|
||||||
|
[0.05136726943098351, "Not significant"],
|
||||||
|
[0.04191046803899837, "Not significant"],
|
||||||
|
[0.7482357030403517, "HH"],
|
||||||
|
[-0.014585767863118111, "Not significant"],
|
||||||
|
[0.5410013139159929, "Not significant"],
|
||||||
|
[1.0223932668429925, "LL"],
|
||||||
|
[1.4179402898927476, "LL"]]
|
54
release/python/0.0.1/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
54
release/python/0.0.1/crankshaft/test/fixtures/neighbors.json
vendored
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
[
|
||||||
|
{"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
|
||||||
|
{"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
|
||||||
|
{"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
|
||||||
|
{"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
|
||||||
|
{"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
|
||||||
|
{"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
|
||||||
|
{"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
|
||||||
|
{"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
|
||||||
|
{"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
|
||||||
|
{"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
|
||||||
|
{"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
|
||||||
|
{"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
|
||||||
|
{"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
|
||||||
|
{"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
|
||||||
|
{"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
|
||||||
|
{"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
|
||||||
|
{"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
|
||||||
|
{"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
|
||||||
|
{"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
|
||||||
|
{"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
|
||||||
|
{"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
|
||||||
|
{"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
|
||||||
|
{"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
|
||||||
|
{"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
|
||||||
|
{"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
|
||||||
|
{"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
|
||||||
|
{"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
|
||||||
|
{"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
|
||||||
|
{"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
|
||||||
|
{"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
|
||||||
|
{"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
|
||||||
|
{"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
|
||||||
|
{"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
|
||||||
|
{"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
|
||||||
|
{"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
|
||||||
|
{"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
|
||||||
|
{"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
|
||||||
|
{"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
|
||||||
|
{"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
|
||||||
|
{"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
|
||||||
|
{"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
|
||||||
|
{"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
|
||||||
|
{"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
|
||||||
|
{"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
|
||||||
|
{"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
|
||||||
|
{"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
|
||||||
|
{"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
|
||||||
|
{"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
|
||||||
|
{"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
|
||||||
|
{"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
|
||||||
|
{"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
|
||||||
|
{"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
|
||||||
|
]
|
13
release/python/0.0.1/crankshaft/test/helper.py
Normal file
13
release/python/0.0.1/crankshaft/test/helper.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
from mock_plpy import MockPlPy
|
||||||
|
plpy = MockPlPy()
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.modules['plpy'] = plpy
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
def fixture_file(name):
|
||||||
|
dir = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
return os.path.join(dir, 'fixtures', name)
|
34
release/python/0.0.1/crankshaft/test/mock_plpy.py
Normal file
34
release/python/0.0.1/crankshaft/test/mock_plpy.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
class MockPlPy:
|
||||||
|
def __init__(self):
|
||||||
|
self._reset()
|
||||||
|
|
||||||
|
def _reset(self):
|
||||||
|
self.infos = []
|
||||||
|
self.notices = []
|
||||||
|
self.debugs = []
|
||||||
|
self.logs = []
|
||||||
|
self.warnings = []
|
||||||
|
self.errors = []
|
||||||
|
self.fatals = []
|
||||||
|
self.executes = []
|
||||||
|
self.results = []
|
||||||
|
self.prepares = []
|
||||||
|
self.results = []
|
||||||
|
|
||||||
|
def _define_result(self, query, result):
|
||||||
|
pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
|
||||||
|
self.results.append([pattern, result])
|
||||||
|
|
||||||
|
def notice(self, msg):
|
||||||
|
self.notices.append(msg)
|
||||||
|
|
||||||
|
def info(self, msg):
|
||||||
|
self.infos.append(msg)
|
||||||
|
|
||||||
|
def execute(self, query): # TODO: additional arguments
|
||||||
|
for result in self.results:
|
||||||
|
if result[0].match(query):
|
||||||
|
return result[1]
|
||||||
|
return []
|
144
release/python/0.0.1/crankshaft/test/test_clustering_moran.py
Normal file
144
release/python/0.0.1/crankshaft/test/test_clustering_moran.py
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
import unittest
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
# from mock_plpy import MockPlPy
|
||||||
|
# plpy = MockPlPy()
|
||||||
|
#
|
||||||
|
# import sys
|
||||||
|
# sys.modules['plpy'] = plpy
|
||||||
|
from helper import plpy, fixture_file
|
||||||
|
|
||||||
|
import crankshaft.clustering as cc
|
||||||
|
from crankshaft import random_seeds
|
||||||
|
import json
|
||||||
|
|
||||||
|
class MoranTest(unittest.TestCase):
|
||||||
|
"""Testing class for Moran's I functions."""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
plpy._reset()
|
||||||
|
self.params = {"id_col": "cartodb_id",
|
||||||
|
"attr1": "andy",
|
||||||
|
"attr2": "jay_z",
|
||||||
|
"table": "a_list",
|
||||||
|
"geom_col": "the_geom",
|
||||||
|
"num_ngbrs": 321}
|
||||||
|
self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read())
|
||||||
|
self.moran_data = json.loads(open(fixture_file('moran.json')).read())
|
||||||
|
|
||||||
|
def test_map_quads(self):
|
||||||
|
"""Test map_quads."""
|
||||||
|
self.assertEqual(cc.map_quads(1), 'HH')
|
||||||
|
self.assertEqual(cc.map_quads(2), 'LH')
|
||||||
|
self.assertEqual(cc.map_quads(3), 'LL')
|
||||||
|
self.assertEqual(cc.map_quads(4), 'HL')
|
||||||
|
self.assertEqual(cc.map_quads(33), None)
|
||||||
|
self.assertEqual(cc.map_quads('andy'), None)
|
||||||
|
|
||||||
|
def test_query_attr_select(self):
|
||||||
|
"""Test query_attr_select."""
|
||||||
|
|
||||||
|
ans = "i.\"{attr1}\"::numeric As attr1, " \
|
||||||
|
"i.\"{attr2}\"::numeric As attr2, "
|
||||||
|
|
||||||
|
self.assertEqual(cc.query_attr_select(self.params), ans)
|
||||||
|
|
||||||
|
def test_query_attr_where(self):
|
||||||
|
"""Test query_attr_where."""
|
||||||
|
|
||||||
|
ans = "idx_replace.\"{attr1}\" IS NOT NULL AND "\
|
||||||
|
"idx_replace.\"{attr2}\" IS NOT NULL AND "\
|
||||||
|
"idx_replace.\"{attr2}\" <> 0"
|
||||||
|
|
||||||
|
self.assertEqual(cc.query_attr_where(self.params), ans)
|
||||||
|
|
||||||
|
def test_knn(self):
|
||||||
|
"""Test knn function."""
|
||||||
|
|
||||||
|
ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \
|
||||||
|
"i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT j.\"cartodb_id\" " \
|
||||||
|
"FROM \"a_list\" As j WHERE j.\"andy\" IS NOT NULL AND " \
|
||||||
|
"j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 ORDER BY " \
|
||||||
|
"j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 OFFSET 1 ) ) " \
|
||||||
|
"As neighbors FROM \"a_list\" As i WHERE i.\"andy\" IS NOT " \
|
||||||
|
"NULL AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER " \
|
||||||
|
"BY i.\"cartodb_id\" ASC;"
|
||||||
|
|
||||||
|
self.assertEqual(cc.knn(self.params), ans)
|
||||||
|
|
||||||
|
def test_queen(self):
|
||||||
|
"""Test queen neighbors function."""
|
||||||
|
|
||||||
|
ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \
|
||||||
|
"i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \
|
||||||
|
"j.\"cartodb_id\" FROM \"a_list\" As j WHERE ST_Touches(" \
|
||||||
|
"i.\"the_geom\", j.\"the_geom\") AND j.\"andy\" IS NOT NULL " \
|
||||||
|
"AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0)) As " \
|
||||||
|
"neighbors FROM \"a_list\" As i WHERE i.\"andy\" IS NOT NULL " \
|
||||||
|
"AND i.\"jay_z\" IS NOT NULL AND i.\"jay_z\" <> 0 ORDER BY " \
|
||||||
|
"i.\"cartodb_id\" ASC;"
|
||||||
|
|
||||||
|
self.assertEqual(cc.queen(self.params), ans)
|
||||||
|
|
||||||
|
def test_get_query(self):
|
||||||
|
"""Test get_query."""
|
||||||
|
|
||||||
|
ans = "SELECT i.\"cartodb_id\" As id, i.\"andy\"::numeric As attr1, " \
|
||||||
|
"i.\"jay_z\"::numeric As attr2, (SELECT ARRAY(SELECT " \
|
||||||
|
"j.\"cartodb_id\" FROM \"a_list\" As j WHERE j.\"andy\" IS " \
|
||||||
|
"NOT NULL AND j.\"jay_z\" IS NOT NULL AND j.\"jay_z\" <> 0 " \
|
||||||
|
"ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC LIMIT 321 " \
|
||||||
|
"OFFSET 1 ) ) As neighbors FROM \"a_list\" As i WHERE " \
|
||||||
|
"i.\"andy\" IS NOT NULL AND i.\"jay_z\" IS NOT NULL AND " \
|
||||||
|
"i.\"jay_z\" <> 0 ORDER BY i.\"cartodb_id\" ASC;"
|
||||||
|
|
||||||
|
self.assertEqual(cc.get_query('knn', self.params), ans)
|
||||||
|
|
||||||
|
def test_get_attributes(self):
|
||||||
|
"""Test get_attributes."""
|
||||||
|
|
||||||
|
## need to add tests
|
||||||
|
|
||||||
|
self.assertEqual(True, True)
|
||||||
|
|
||||||
|
def test_get_weight(self):
|
||||||
|
"""Test get_weight."""
|
||||||
|
|
||||||
|
self.assertEqual(True, True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_quad_position(self):
|
||||||
|
"""Test lisa_sig_vals."""
|
||||||
|
|
||||||
|
quads = np.array([1, 2, 3, 4], np.int)
|
||||||
|
|
||||||
|
ans = np.array(['HH', 'LH', 'LL', 'HL'])
|
||||||
|
test_ans = cc.quad_position(quads)
|
||||||
|
|
||||||
|
self.assertTrue((test_ans == ans).all())
|
||||||
|
|
||||||
|
def test_moran_local(self):
|
||||||
|
"""Test Moran's I local"""
|
||||||
|
data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||||
|
plpy._define_result('select', data)
|
||||||
|
random_seeds.set_random_seeds(1234)
|
||||||
|
result = cc.moran_local('table', 'value', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn')
|
||||||
|
result = [(row[0], row[1]) for row in result]
|
||||||
|
expected = self.moran_data
|
||||||
|
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||||
|
self.assertAlmostEqual(res_val, exp_val)
|
||||||
|
self.assertEqual(res_quad, exp_quad)
|
||||||
|
|
||||||
|
def test_moran_local_rate(self):
|
||||||
|
"""Test Moran's I rate"""
|
||||||
|
data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
|
||||||
|
plpy._define_result('select', data)
|
||||||
|
random_seeds.set_random_seeds(1234)
|
||||||
|
result = cc.moran_local_rate('table', 'numerator', 'denominator', 0.05, 5, 99, 'the_geom', 'cartodb_id', 'knn')
|
||||||
|
result = [(row[0], row[1]) for row in result]
|
||||||
|
expected = self.moran_data
|
||||||
|
for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
|
||||||
|
self.assertAlmostEqual(res_val, exp_val)
|
Loading…
Reference in New Issue
Block a user