You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
crankshaft/release/crankshaft--0.0.4--0.1.0.sql

259 lines
8.1 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

--DO NOT MODIFY THIS FILE, IT IS GENERATED FROM SOURCES
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
--------------------------------------------------------------------------------
-- Version number of the extension release
CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
RETURNS text AS $$
SELECT '0.1.0'::text;
$$ language 'sql' STABLE STRICT;
--------------------------------------------------------------------------------
-- PyAgg stuff
CREATE OR REPLACE FUNCTION
CDB_PyAggS(current_state Numeric[], current_row Numeric[])
returns NUMERIC[] as $$
BEGIN
if array_upper(current_state,1) is null then
RAISE NOTICE 'setting state %',array_upper(current_row,1);
current_state[1] = array_upper(current_row,1);
end if;
return array_cat(current_state,current_row) ;
END
$$ LANGUAGE plpgsql;
CREATE AGGREGATE CDB_PyAgg(NUMERIC[])(
SFUNC = CDB_PyAggS,
STYPE = Numeric[],
INITCOND = "{}"
);
--------------------------------------------------------------------------------
-- Segmentation stuff
CREATE OR REPLACE FUNCTION
CDB_CreateAndPredictSegment(
target NUMERIC[],
features NUMERIC[],
target_features NUMERIC[],
target_ids NUMERIC[],
n_estimators INTEGER DEFAULT 1200,
max_depth INTEGER DEFAULT 3,
subsample DOUBLE PRECISION DEFAULT 0.5,
learning_rate DOUBLE PRECISION DEFAULT 0.01,
min_samples_leaf INTEGER DEFAULT 1)
RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC)
AS $$
import numpy as np
import plpy
from crankshaft.segmentation import create_and_predict_segment_agg
model_params = {'n_estimators': n_estimators,
'max_depth': max_depth,
'subsample': subsample,
'learning_rate': learning_rate,
'min_samples_leaf': min_samples_leaf}
def unpack2D(data):
dimension = data.pop(0)
a = np.array(data, dtype=float)
return a.reshape(len(a)/dimension, dimension)
return create_and_predict_segment_agg(np.array(target, dtype=float),
unpack2D(features),
unpack2D(target_features),
target_ids,
model_params)
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION
CDB_CreateAndPredictSegment (
query TEXT,
variable_name TEXT,
target_table TEXT,
n_estimators INTEGER DEFAULT 1200,
max_depth INTEGER DEFAULT 3,
subsample DOUBLE PRECISION DEFAULT 0.5,
learning_rate DOUBLE PRECISION DEFAULT 0.01,
min_samples_leaf INTEGER DEFAULT 1)
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
AS $$
from crankshaft.segmentation import create_and_predict_segment
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
return create_and_predict_segment(query,variable_name,target_table, model_params)
$$ LANGUAGE plpythonu;
--------------------------------------------------------------------------------
-- Spatial interpolation
-- 0: nearest neighbor
-- 1: barymetric
-- 2: IDW
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
IN query text,
IN point geometry,
IN method integer DEFAULT 1,
IN p1 numeric DEFAULT 0,
IN p2 numeric DEFAULT 0
)
RETURNS numeric AS
$$
DECLARE
gs geometry[];
vs numeric[];
output numeric;
BEGIN
EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a;
RETURN output;
END;
$$
language plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
IN geomin geometry[],
IN colin numeric[],
IN point geometry,
IN method integer DEFAULT 1,
IN p1 numeric DEFAULT 0,
IN p2 numeric DEFAULT 0
)
RETURNS numeric AS
$$
DECLARE
gs geometry[];
vs numeric[];
gs2 geometry[];
vs2 numeric[];
g geometry;
vertex geometry[];
sg numeric;
sa numeric;
sb numeric;
sc numeric;
va numeric;
vb numeric;
vc numeric;
output numeric;
BEGIN
output := -999.999;
-- nearest
IF method = 0 THEN
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v)
SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1;
RETURN output;
-- barymetric
ELSIF method = 1 THEN
WITH a as (SELECT unnest(geomin) AS e),
b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
c as (SELECT (ST_Dump(t)).geom as v FROM b),
d as (SELECT v FROM c WHERE ST_Within(point, v))
SELECT v INTO g FROM d;
IF g is null THEN
-- out of the realm of the input data
RETURN -888.888;
END IF;
-- vertex of the selected cell
WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v)
SELECT array_agg(v) INTO vertex FROM a;
-- retrieve the value of each vertex
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg);
RETURN output;
-- IDW
-- p1: limit the number of neighbors, 0->no limit
-- p2: order of distance decay, 0-> order 1
ELSIF method = 2 THEN
IF p2 = 0 THEN
p2 := 1;
END IF;
WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v),
b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g)
SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b;
IF p1::integer>0 THEN
gs2:=gs;
vs2:=vs;
FOR i IN 1..p1
LOOP
gs2 := gs2 || gs[i];
vs2 := vs2 || vs[i];
END LOOP;
ELSE
gs2:=gs;
vs2:=vs;
END IF;
WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v),
b as (
SELECT
(1/ST_distance(point, a.g)^p2::integer) as k,
(a.v/ST_distance(point, a.g)^p2::integer) as f
FROM a
)
SELECT sum(b.f)/sum(b.k) INTO output FROM b;
RETURN output;
END IF;
RETURN -777.777;
END;
$$
language plpgsql IMMUTABLE;
--------------------------------------------------------------------------------
-- Spatial Markov
-- input table format:
-- id | geom | date_1 | date_2 | date_3
-- 1 | Pt1 | 12.3 | 13.1 | 14.2
-- 2 | Pt2 | 11.0 | 13.2 | 12.5
-- ...
-- Sample Function call:
-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate',
-- Array['date_1', 'date_2', 'date_3'])
CREATE OR REPLACE FUNCTION
CDB_SpatialMarkovTrend (
subquery TEXT,
time_cols TEXT[],
num_classes INT DEFAULT 7,
w_type TEXT DEFAULT 'knn',
num_ngbrs INT DEFAULT 5,
permutations INT DEFAULT 99,
geom_col TEXT DEFAULT 'the_geom',
id_col TEXT DEFAULT 'cartodb_id')
RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT)
AS $$
from crankshaft.space_time_dynamics import spatial_markov_trend
## TODO: use named parameters or a dictionary
return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col)
$$ LANGUAGE plpythonu;