mirror of
https://github.com/CartoDB/crankshaft.git
synced 2024-11-01 10:20:48 +08:00
259 lines
8.1 KiB
MySQL
259 lines
8.1 KiB
MySQL
|
--DO NOT MODIFY THIS FILE, IT IS GENERATED FROM SOURCES
|
|||
|
|
|||
|
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
|
|||
|
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
|
|||
|
|
|||
|
--------------------------------------------------------------------------------
|
|||
|
|
|||
|
-- Version number of the extension release
|
|||
|
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
|
|||
|
RETURNS text AS $$
|
|||
|
SELECT '0.1.0'::text;
|
|||
|
$$ language 'sql' STABLE STRICT;
|
|||
|
|
|||
|
--------------------------------------------------------------------------------
|
|||
|
|
|||
|
-- PyAgg stuff
|
|||
|
CREATE OR REPLACE FUNCTION
|
|||
|
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
|
|||
|
returns NUMERIC[] as $$
|
|||
|
BEGIN
|
|||
|
if array_upper(current_state,1) is null then
|
|||
|
RAISE NOTICE 'setting state %',array_upper(current_row,1);
|
|||
|
current_state[1] = array_upper(current_row,1);
|
|||
|
end if;
|
|||
|
return array_cat(current_state,current_row) ;
|
|||
|
END
|
|||
|
$$ LANGUAGE plpgsql;
|
|||
|
|
|||
|
|
|||
|
CREATE AGGREGATE CDB_PyAgg(NUMERIC[])(
|
|||
|
SFUNC = CDB_PyAggS,
|
|||
|
STYPE = Numeric[],
|
|||
|
INITCOND = "{}"
|
|||
|
);
|
|||
|
|
|||
|
--------------------------------------------------------------------------------
|
|||
|
|
|||
|
-- Segmentation stuff
|
|||
|
CREATE OR REPLACE FUNCTION
|
|||
|
CDB_CreateAndPredictSegment(
|
|||
|
target NUMERIC[],
|
|||
|
features NUMERIC[],
|
|||
|
target_features NUMERIC[],
|
|||
|
target_ids NUMERIC[],
|
|||
|
n_estimators INTEGER DEFAULT 1200,
|
|||
|
max_depth INTEGER DEFAULT 3,
|
|||
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
|||
|
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
|||
|
min_samples_leaf INTEGER DEFAULT 1)
|
|||
|
RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC)
|
|||
|
AS $$
|
|||
|
import numpy as np
|
|||
|
import plpy
|
|||
|
|
|||
|
from crankshaft.segmentation import create_and_predict_segment_agg
|
|||
|
model_params = {'n_estimators': n_estimators,
|
|||
|
'max_depth': max_depth,
|
|||
|
'subsample': subsample,
|
|||
|
'learning_rate': learning_rate,
|
|||
|
'min_samples_leaf': min_samples_leaf}
|
|||
|
|
|||
|
def unpack2D(data):
|
|||
|
dimension = data.pop(0)
|
|||
|
a = np.array(data, dtype=float)
|
|||
|
return a.reshape(len(a)/dimension, dimension)
|
|||
|
|
|||
|
return create_and_predict_segment_agg(np.array(target, dtype=float),
|
|||
|
unpack2D(features),
|
|||
|
unpack2D(target_features),
|
|||
|
target_ids,
|
|||
|
model_params)
|
|||
|
|
|||
|
$$ LANGUAGE plpythonu;
|
|||
|
|
|||
|
CREATE OR REPLACE FUNCTION
|
|||
|
CDB_CreateAndPredictSegment (
|
|||
|
query TEXT,
|
|||
|
variable_name TEXT,
|
|||
|
target_table TEXT,
|
|||
|
n_estimators INTEGER DEFAULT 1200,
|
|||
|
max_depth INTEGER DEFAULT 3,
|
|||
|
subsample DOUBLE PRECISION DEFAULT 0.5,
|
|||
|
learning_rate DOUBLE PRECISION DEFAULT 0.01,
|
|||
|
min_samples_leaf INTEGER DEFAULT 1)
|
|||
|
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
|
|||
|
AS $$
|
|||
|
from crankshaft.segmentation import create_and_predict_segment
|
|||
|
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
|
|||
|
return create_and_predict_segment(query,variable_name,target_table, model_params)
|
|||
|
$$ LANGUAGE plpythonu;
|
|||
|
|
|||
|
--------------------------------------------------------------------------------
|
|||
|
|
|||
|
-- Spatial interpolation
|
|||
|
|
|||
|
-- 0: nearest neighbor
|
|||
|
-- 1: barymetric
|
|||
|
-- 2: IDW
|
|||
|
|
|||
|
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
|||
|
IN query text,
|
|||
|
IN point geometry,
|
|||
|
IN method integer DEFAULT 1,
|
|||
|
IN p1 numeric DEFAULT 0,
|
|||
|
IN p2 numeric DEFAULT 0
|
|||
|
)
|
|||
|
RETURNS numeric AS
|
|||
|
$$
|
|||
|
DECLARE
|
|||
|
gs geometry[];
|
|||
|
vs numeric[];
|
|||
|
output numeric;
|
|||
|
BEGIN
|
|||
|
EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
|
|||
|
SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a;
|
|||
|
|
|||
|
RETURN output;
|
|||
|
END;
|
|||
|
$$
|
|||
|
language plpgsql IMMUTABLE;
|
|||
|
|
|||
|
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
|
|||
|
IN geomin geometry[],
|
|||
|
IN colin numeric[],
|
|||
|
IN point geometry,
|
|||
|
IN method integer DEFAULT 1,
|
|||
|
IN p1 numeric DEFAULT 0,
|
|||
|
IN p2 numeric DEFAULT 0
|
|||
|
)
|
|||
|
RETURNS numeric AS
|
|||
|
$$
|
|||
|
DECLARE
|
|||
|
gs geometry[];
|
|||
|
vs numeric[];
|
|||
|
gs2 geometry[];
|
|||
|
vs2 numeric[];
|
|||
|
g geometry;
|
|||
|
vertex geometry[];
|
|||
|
sg numeric;
|
|||
|
sa numeric;
|
|||
|
sb numeric;
|
|||
|
sc numeric;
|
|||
|
va numeric;
|
|||
|
vb numeric;
|
|||
|
vc numeric;
|
|||
|
output numeric;
|
|||
|
BEGIN
|
|||
|
output := -999.999;
|
|||
|
-- nearest
|
|||
|
IF method = 0 THEN
|
|||
|
|
|||
|
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v)
|
|||
|
SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1;
|
|||
|
RETURN output;
|
|||
|
|
|||
|
-- barymetric
|
|||
|
ELSIF method = 1 THEN
|
|||
|
WITH a as (SELECT unnest(geomin) AS e),
|
|||
|
b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
|
|||
|
c as (SELECT (ST_Dump(t)).geom as v FROM b),
|
|||
|
d as (SELECT v FROM c WHERE ST_Within(point, v))
|
|||
|
SELECT v INTO g FROM d;
|
|||
|
IF g is null THEN
|
|||
|
-- out of the realm of the input data
|
|||
|
RETURN -888.888;
|
|||
|
END IF;
|
|||
|
-- vertex of the selected cell
|
|||
|
WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v)
|
|||
|
SELECT array_agg(v) INTO vertex FROM a;
|
|||
|
|
|||
|
-- retrieve the value of each vertex
|
|||
|
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
|||
|
SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
|
|||
|
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
|||
|
SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
|
|||
|
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
|
|||
|
SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
|
|||
|
|
|||
|
SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
|
|||
|
|
|||
|
output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg);
|
|||
|
RETURN output;
|
|||
|
|
|||
|
-- IDW
|
|||
|
-- p1: limit the number of neighbors, 0->no limit
|
|||
|
-- p2: order of distance decay, 0-> order 1
|
|||
|
ELSIF method = 2 THEN
|
|||
|
|
|||
|
IF p2 = 0 THEN
|
|||
|
p2 := 1;
|
|||
|
END IF;
|
|||
|
|
|||
|
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v),
|
|||
|
b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g)
|
|||
|
SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b;
|
|||
|
IF p1::integer>0 THEN
|
|||
|
gs2:=gs;
|
|||
|
vs2:=vs;
|
|||
|
FOR i IN 1..p1
|
|||
|
LOOP
|
|||
|
gs2 := gs2 || gs[i];
|
|||
|
vs2 := vs2 || vs[i];
|
|||
|
END LOOP;
|
|||
|
ELSE
|
|||
|
gs2:=gs;
|
|||
|
vs2:=vs;
|
|||
|
END IF;
|
|||
|
|
|||
|
WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v),
|
|||
|
b as (
|
|||
|
SELECT
|
|||
|
(1/ST_distance(point, a.g)^p2::integer) as k,
|
|||
|
(a.v/ST_distance(point, a.g)^p2::integer) as f
|
|||
|
FROM a
|
|||
|
)
|
|||
|
SELECT sum(b.f)/sum(b.k) INTO output FROM b;
|
|||
|
RETURN output;
|
|||
|
|
|||
|
END IF;
|
|||
|
|
|||
|
RETURN -777.777;
|
|||
|
|
|||
|
END;
|
|||
|
$$
|
|||
|
language plpgsql IMMUTABLE;
|
|||
|
|
|||
|
|
|||
|
--------------------------------------------------------------------------------
|
|||
|
|
|||
|
-- Spatial Markov
|
|||
|
|
|||
|
-- input table format:
|
|||
|
-- id | geom | date_1 | date_2 | date_3
|
|||
|
-- 1 | Pt1 | 12.3 | 13.1 | 14.2
|
|||
|
-- 2 | Pt2 | 11.0 | 13.2 | 12.5
|
|||
|
-- ...
|
|||
|
-- Sample Function call:
|
|||
|
-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate',
|
|||
|
-- Array['date_1', 'date_2', 'date_3'])
|
|||
|
|
|||
|
CREATE OR REPLACE FUNCTION
|
|||
|
CDB_SpatialMarkovTrend (
|
|||
|
subquery TEXT,
|
|||
|
time_cols TEXT[],
|
|||
|
num_classes INT DEFAULT 7,
|
|||
|
w_type TEXT DEFAULT 'knn',
|
|||
|
num_ngbrs INT DEFAULT 5,
|
|||
|
permutations INT DEFAULT 99,
|
|||
|
geom_col TEXT DEFAULT 'the_geom',
|
|||
|
id_col TEXT DEFAULT 'cartodb_id')
|
|||
|
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
|
|||
|
AS $$
|
|||
|
|
|||
|
from crankshaft.space_time_dynamics import spatial_markov_trend
|
|||
|
|
|||
|
## TODO: use named parameters or a dictionary
|
|||
|
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
|
|||
|
$$ LANGUAGE plpythonu;
|