_OBS_Precheck and OBS_MetadataValidator

New functions to perform a precheck for the user data and metadata
parameters that are going to be used to execute a OBS_GetData operation.

Doing this before the OBD_GetData we are able to check if we're going to
have problems with the current request. Is useful because right now
we're using the *_Exception_Safe functions which return NULL in case we
have an exception, this leads to execute the whole request even if we
know beforehand that it's going to fail.
This commit is contained in:
Mario de Frutos 2017-08-08 18:55:26 +02:00
parent c982c6e4fb
commit 3eee68c56e
12 changed files with 291 additions and 14 deletions

View File

@ -6,3 +6,117 @@
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
-- HERE goes your code to upgrade/downgrade
CREATE OR REPLACE FUNCTION cdb_dataservices_client._OBS_PreCheck(
source_query text,
parameters json
) RETURNS boolean AS $$
DECLARE
errors text[];
validator_errors text[];
validator_error text;
valid boolean;
geoms record;
BEGIN
errors := (ARRAY[])::TEXT[];
FOR geoms IN EXECUTE format('SELECT ST_GeometryType(the_geom) as geom_type,
bool_and(st_isvalid(the_geom)) as valid,
avg(st_npoints(the_geom)) as avg_vertex,
ST_SetSRID(ST_Extent(the_geom), 4326) as extent,
count(*)::INT as numgeoms
FROM (%s) as _source
GROUP BY ST_GeometryType(the_geom)', source_query)
LOOP
IF geoms.geom_type NOT IN ('ST_Polygon', 'ST_MultiPolygon', 'ST_Point') THEN
errors := array_append(errors, format($data$'Geometry type %s not supported'$data$, geoms.geom_type));
END IF;
IF geoms.valid IS FALSE THEN
errors := array_append(errors, 'There are invalid geometries in the input data, please review them');
END IF;
-- 1000 vertex for a geometry is a limit we have in the obs_getdata function. You can check here
-- https://github.com/CartoDB/observatory-extension/blob/1.6.0/src/pg/sql/41_observatory_augmentation.sql#L813
IF geoms.avg_vertex > 1000 THEN
errors := array_append(errors, 'The average number of geometries vertex is greater than 1000, please try to simplify them');
END IF;
-- OBS specific part
EXECUTE 'SELECT valid, errors
FROM cdb_dataservices_client.OBS_MetadataValidation($1, $2, $3, $4)'
INTO valid, validator_errors
USING geoms.extent, geoms.geom_type, parameters, geoms.numgeoms;
IF valid is FALSE THEN
FOR validator_error IN EXECUTE 'SELECT unnest($1)' USING validator_errors
LOOP
errors := array_append(errors, validator_error);
END LOOP;
END IF;
END LOOP;
IF CARDINALITY(errors) > 0 THEN
RAISE EXCEPTION '%', errors;
END IF;
RETURN TRUE;
END;
$$ LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION cdb_dataservices_client._obs_metadatavalidation (username text, orgname text, geom_extent Geometry(Geometry, 4326), geom_type text, params json, target_geoms integer DEFAULT NULL)
RETURNS TABLE(valid boolean, errors text[]) AS $$
CONNECT cdb_dataservices_client._server_conn_str();
SELECT * FROM cdb_dataservices_server.obs_metadatavalidation (username, orgname, geom_extent, geom_type, params, target_geoms);
$$ LANGUAGE plproxy;
CREATE OR REPLACE FUNCTION cdb_dataservices_client.obs_metadatavalidation (geom_extent Geometry(Geometry, 4326) ,geom_type text ,params json ,target_geoms integer DEFAULT NULL)
RETURNS TABLE(valid boolean, errors text[]) AS $$
DECLARE
username text;
orgname text;
BEGIN
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
RAISE EXCEPTION 'The api_key must be provided';
END IF;
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
END IF;
RETURN QUERY SELECT * FROM cdb_dataservices_client._obs_metadatavalidation(username, orgname, geom_extent, geom_type, params, target_geoms);
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER;
CREATE OR REPLACE FUNCTION cdb_dataservices_client._obs_metadatavalidation_exception_safe (geom_extent Geometry(Geometry, 4326) ,geom_type text ,params json ,target_geoms integer DEFAULT NULL)
RETURNS TABLE(valid boolean, errors text[]) AS $$
DECLARE
username text;
orgname text;
_returned_sqlstate TEXT;
_message_text TEXT;
_pg_exception_context TEXT;
BEGIN
IF session_user = 'publicuser' OR session_user ~ 'cartodb_publicuser_*' THEN
RAISE EXCEPTION 'The api_key must be provided';
END IF;
SELECT u, o INTO username, orgname FROM cdb_dataservices_client._cdb_entity_config() AS (u text, o text);
-- JSON value stored "" is taken as literal
IF username IS NULL OR username = '' OR username = '""' THEN
RAISE EXCEPTION 'Username is a mandatory argument, check it out';
END IF;
BEGIN
RETURN QUERY SELECT * FROM cdb_dataservices_client._obs_metadatavalidation(username, orgname, geom_extent, geom_type, params, target_geoms);
EXCEPTION
WHEN OTHERS THEN
GET STACKED DIAGNOSTICS _returned_sqlstate = RETURNED_SQLSTATE,
_message_text = MESSAGE_TEXT,
_pg_exception_context = PG_EXCEPTION_CONTEXT;
RAISE WARNING USING ERRCODE = _returned_sqlstate, MESSAGE = _message_text, DETAIL = _pg_exception_context;
END;
END;
$$ LANGUAGE 'plpgsql' SECURITY DEFINER;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._OBS_PreCheck(source_query text, params JSON) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client.obs_metadatavalidation(geom_extent Geometry(Geometry, 4326), geom_type text, params json, target_geoms integer) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._obs_metadatavalidation_exception_safe(geom_extent Geometry(Geometry, 4326), geom_type text, params json, target_geoms integer ) TO publicuser;

View File

@ -6,3 +6,7 @@
SET search_path = "$user",cartodb,public,cdb_dataservices_client;
-- HERE goes your code to upgrade/downgrade
DROP FUNCTION IF EXISTS _OBS_PreCheck(text, json);
DROP FUNCTION IF EXISTS cdb_dataservices_client.obs_metadatavalidation (Geometry(Geometry, 4326), text, json);
DROP FUNCTION IF EXISTS cdb_dataservices_client._obs_metadatavalidation_exception_safe (Geometry(Geometry, 4326), text, json);
DROP FUNCTION IF EXISTS cdb_dataservices_client._obs_metadatavalidation (text, text, Geometry(Geometry, 4326), text, json);

View File

@ -305,6 +305,16 @@
- { name: max_score_rank, type: integer, default: 'NULL' }
- { name: target_geoms, type: integer, default: 'NULL' }
- name: obs_metadatavalidation
return_type: TABLE(valid boolean, errors text[])
multi_row: true
multi_field: true
params:
- { name: geom_extent, type: "Geometry(Geometry, 4326)" }
- { name: geom_type, type: text }
- { name: params, type: json }
- { name: target_geoms, type: integer, default: 'NULL' }
- name: obs_getcategory
return_type: text
params:

View File

@ -1,37 +1,52 @@
CREATE OR REPLACE FUNCTION cdb_dataservices_client._OBS_PreCheck(
source_query text,
parameters jsonb
parameters json
) RETURNS boolean AS $$
DECLARE
username text;
orgname text;
errors text[];
validator_errors text[];
validator_error text;
valid boolean;
geoms record;
BEGIN
errors := (ARRAY[])::TEXT[];
FOR geoms IN
EXECUTE FORMAT('SELECT ST_GeometryType(the_geom) as geom_type,
bool_and(st_isvalid(the_geom)) as valid,
avg(st_npoints(the_geom)) as avg_vertex
FROM (%s) as _source GROUP BY ST_GeometryType(the_geom)', source_query)
FOR geoms IN EXECUTE format('SELECT ST_GeometryType(the_geom) as geom_type,
bool_and(st_isvalid(the_geom)) as valid,
avg(st_npoints(the_geom)) as avg_vertex,
ST_SetSRID(ST_Extent(the_geom), 4326) as extent,
count(*)::INT as numgeoms
FROM (%s) as _source
GROUP BY ST_GeometryType(the_geom)', source_query)
LOOP
IF geoms.geom_type NOT IN ('ST_Polygon', 'ST_MultiPolygon', 'ST_Point') THEN
errors := array_append(errors, format($data$'Geometry type %s not supported'$data$, geoms.geom_type));
END IF;
IF geoms.valid IS FALSE THEN
errors := array_append(errors, 'There are invalid geometries in the input data, please review them');
errors := array_append(errors, 'There are invalid geometries in the input data, please try to fix them');
END IF;
-- 1000 vertex for a geometry is a limit we have in the obs_getdata function. You can check here
-- https://github.com/CartoDB/observatory-extension/blob/1.6.0/src/pg/sql/41_observatory_augmentation.sql#L813
IF geoms.avg_vertex > 1000 THEN
errors := array_append(errors, 'The average number of geometries vertex is greater than 1000, please try to simplify them');
errors := array_append(errors, 'The average number of vertices per geometry is greater than 1000, please try to simplify them');
END IF;
-- OBS specific part
EXECUTE 'SELECT valid, errors
FROM cdb_dataservices_client.OBS_MetadataValidation($1, $2, $3, $4)'
INTO valid, validator_errors
USING geoms.extent, geoms.geom_type, parameters, geoms.numgeoms;
IF valid is FALSE THEN
FOR validator_error IN EXECUTE 'SELECT unnest($1)' USING validator_errors
LOOP
errors := array_append(errors, validator_error);
END LOOP;
END IF;
END LOOP;
IF CARDINALITY(errors) > 0 THEN
RAISE EXCEPTION '%', format('%s', errors);
RAISE EXCEPTION '%', errors;
END IF;
RETURN TRUE;

View File

@ -1,3 +1,3 @@
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._DST_PrepareTableOBS_GetMeasure(output_table_name text, params json) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._DST_PopulateTableOBS_GetMeasure(table_name text, output_table_name text, params json) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._OBS_PreCheck(source_query text, params JSONB) TO publicuser;
GRANT EXECUTE ON FUNCTION cdb_dataservices_client._OBS_PreCheck(source_query text, params JSON) TO publicuser;

View File

@ -194,6 +194,13 @@ BEGIN
RETURN QUERY SELECT '36047'::TEXT AS id, '[{"value" : 10349.1547875017}]'::JSON AS data;
END;
$$ LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION cdb_dataservices_server.obs_metadatavalidation(username text, orgname text, geom_extent geometry(Geometry, 4326), geom_type text, params JSON, target_geoms INTEGER DEFAULT NULL)
RETURNS TABLE (valid boolean, errors text[]) AS $$
BEGIN
RAISE NOTICE 'cdb_dataservices_server.obs_metadatavalidation invoked with params (%, %, %, %, %, %)', username, orgname, geom_extent, geom_type, params, target_geoms;
RETURN QUERY SELECT true AS valid, ARRAY[]::TEXT[] AS errors;
END;
$$ LANGUAGE 'plpgsql';
-- Exercise the public and the proxied function
SELECT obs_get_demographic_snapshot(ST_SetSRID(ST_Point(-73.936669 , 40.704512), 4326), '2009 - 2013'::text, '"us.census.tiger".block_group'::text);
NOTICE: cdb_dataservices_client._obs_get_demographic_snapshot(5): [contrib_regression] REMOTE NOTICE: cdb_dataservices_server.obs_get_demographic_snapshot invoked with params (test_user, <NULL>, 0101000020E6100000548B8862F27B52C0DDD1FF722D5A4440, 2009 - 2013, "us.census.tiger".block_group)
@ -351,3 +358,10 @@ NOTICE: cdb_dataservices_client._obs_getdata(5): [contrib_regression] REMOTE NO
(36047,"[{""value"" : 10349.1547875017}]")
(1 row)
SELECT obs_metadatavalidation(ST_SetSRID(ST_Point(-73.9, 40.7), 4326), 'ST_Polygon', '[{"numer_id": "us.census.acs.B01003001"}]', 1000);
NOTICE: cdb_dataservices_client._obs_metadatavalidation(6): [contrib_regression] REMOTE NOTICE: cdb_dataservices_server.obs_metadatavalidation invoked with params (test_user, <NULL>, 0101000020E61000009A999999997952C09A99999999594440, ST_Polygon, [{"numer_id": "us.census.acs.B01003001"}], 1000)
obs_metadatavalidation
------------------------
(t,{})
(1 row)

View File

@ -218,6 +218,14 @@ BEGIN
END;
$$ LANGUAGE 'plpgsql';
CREATE OR REPLACE FUNCTION cdb_dataservices_server.obs_metadatavalidation(username text, orgname text, geom_extent geometry(Geometry, 4326), geom_type text, params JSON, target_geoms INTEGER DEFAULT NULL)
RETURNS TABLE (valid boolean, errors text[]) AS $$
BEGIN
RAISE NOTICE 'cdb_dataservices_server.obs_metadatavalidation invoked with params (%, %, %, %, %, %)', username, orgname, geom_extent, geom_type, params, target_geoms;
RETURN QUERY SELECT true AS valid, ARRAY[]::TEXT[] AS errors;
END;
$$ LANGUAGE 'plpgsql';
-- Exercise the public and the proxied function
SELECT obs_get_demographic_snapshot(ST_SetSRID(ST_Point(-73.936669 , 40.704512), 4326), '2009 - 2013'::text, '"us.census.tiger".block_group'::text);
SELECT obs_get_segment_snapshot(ST_SetSRID(ST_Point(-73.936669 , 40.704512), 4326), '"us.census.tiger".block_group'::text);
@ -241,4 +249,5 @@ SELECT obs_getavailableboundaries(ST_SetSRID(ST_Point(-73.936669 , 40.704512), 4
SELECT obs_getmeta(ST_SetSRID(ST_Point(-73.9, 40.7), 4326), '[{"numer_id": "us.census.acs.B01003001"}]', 1, 1, 1000);
SELECT obs_getdata(ARRAY['36047'], obs_getmeta(st_setsrid(st_point(-73.9, 40.7), 4326), '[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]', 1, 1, 1000));
SELECT obs_getdata(ARRAY[(ST_SetSRID(ST_Point(-73.9, 40.7), 4326), 1)::geomval], obs_getmeta(st_setsrid(st_point(-73.9, 40.7), 4326), '[{"numer_id": "us.census.acs.B01003001"}]'));
SELECT obs_metadatavalidation(ST_SetSRID(ST_Point(-73.9, 40.7), 4326), 'ST_Polygon', '[{"numer_id": "us.census.acs.B01003001"}]', 1000);

View File

@ -2,4 +2,49 @@
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION cdb_dataservices_server UPDATE TO '0.27.0'" to load this file. \quit
-- HERE goes your code to upgrade/downgrade
-- HERE goes your code to upgrade/downgrade
CREATE OR REPLACE FUNCTION cdb_dataservices_server._OBS_MetadataValidation(
username TEXT,
orgname TEXT,
geometry_extent Geometry(Geometry, 4326),
geometry_type text,
params JSON,
target_geoms INTEGER DEFAULT NULL)
RETURNS TABLE(valid boolean, errors text[]) AS $$
CONNECT cdb_dataservices_server._obs_server_conn_str(username, orgname);
SELECT * FROM cdb_observatory.OBS_MetadataValidation(geometry_extent, geometry_type, params, target_geoms);
$$ LANGUAGE plproxy;
CREATE OR REPLACE FUNCTION cdb_dataservices_server.OBS_MetadataValidation(
username TEXT,
orgname TEXT,
geometry_extent Geometry(Geometry, 4326),
geometry_type text,
params JSON,
target_geoms INTEGER DEFAULT NULL)
RETURNS TABLE(valid boolean, errors text[]) AS $$
from cartodb_services.metrics import metrics
from cartodb_services.tools import Logger,LoggerConfig
plpy.execute("SELECT cdb_dataservices_server._connect_to_redis('{0}')".format(username))
redis_conn = GD["redis_connection_{0}".format(username)]['redis_metrics_connection']
plpy.execute("SELECT cdb_dataservices_server._get_obs_config({0}, {1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
user_obs_config = GD["user_obs_config_{0}".format(username)]
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
logger_config = GD["logger_config"]
logger = Logger(logger_config)
with metrics('obs_metadatavalidation', user_obs_config, logger):
try:
obs_plan = plpy.prepare("SELECT * FROM cdb_dataservices_server._OBS_MetadataValidation($1, $2, $3, $4, $5, $6);", ["text", "text", "Geometry (Geometry, 4326)", "text", "json", "integer"])
result = plpy.execute(obs_plan, [username, orgname, geometry_extent, geometry_type, params, target_geoms])
if result:
return result
else:
return []
except BaseException as e:
import sys
logger.error('Error trying to OBS_MetadataValidation', sys.exc_info(), data={"username": username, "orgname": orgname})
raise Exception('Error trying to OBS_MetadataValidation')
$$ LANGUAGE plpythonu;

View File

@ -2,4 +2,6 @@
-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION cdb_dataservices_server UPDATE TO '0.26.0'" to load this file. \quit
-- HERE goes your code to upgrade/downgrade
-- HERE goes your code to upgrade/downgrade
DROP FUNCTION IF EXISTS cdb_dataservices_server.OBS_MetadataValidation(TEXT, TEXT, Geometry(Geometry, 4326), text, JSON, INTEGER);
DROP FUNCTION IF EXISTS cdb_dataservices_server._OBS_MetadataValidation(Geometry(Geometry, 4326), text, JSON, INTEGER);

View File

@ -737,3 +737,49 @@ RETURNS JSON AS $$
logger.error('Error trying to OBS_GetMeta', sys.exc_info(), data={"username": username, "orgname": orgname})
raise Exception('Error trying to OBS_GetMeta')
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION cdb_dataservices_server._OBS_MetadataValidation(
username TEXT,
orgname TEXT,
geometry_extent Geometry(Geometry, 4326),
geometry_type text,
params JSON,
target_geoms INTEGER DEFAULT NULL)
RETURNS TABLE(valid boolean, errors text[]) AS $$
CONNECT cdb_dataservices_server._obs_server_conn_str(username, orgname);
SELECT * FROM cdb_observatory.OBS_MetadataValidation(geometry_extent, geometry_type, params, target_geoms);
$$ LANGUAGE plproxy;
CREATE OR REPLACE FUNCTION cdb_dataservices_server.OBS_MetadataValidation(
username TEXT,
orgname TEXT,
geometry_extent Geometry(Geometry, 4326),
geometry_type text,
params JSON,
target_geoms INTEGER DEFAULT NULL)
RETURNS TABLE(valid boolean, errors text[]) AS $$
from cartodb_services.metrics import metrics
from cartodb_services.tools import Logger,LoggerConfig
plpy.execute("SELECT cdb_dataservices_server._connect_to_redis('{0}')".format(username))
redis_conn = GD["redis_connection_{0}".format(username)]['redis_metrics_connection']
plpy.execute("SELECT cdb_dataservices_server._get_obs_config({0}, {1})".format(plpy.quote_nullable(username), plpy.quote_nullable(orgname)))
user_obs_config = GD["user_obs_config_{0}".format(username)]
plpy.execute("SELECT cdb_dataservices_server._get_logger_config()")
logger_config = GD["logger_config"]
logger = Logger(logger_config)
with metrics('obs_metadatavalidation', user_obs_config, logger):
try:
obs_plan = plpy.prepare("SELECT * FROM cdb_dataservices_server._OBS_MetadataValidation($1, $2, $3, $4, $5, $6);", ["text", "text", "Geometry (Geometry, 4326)", "text", "json", "integer"])
result = plpy.execute(obs_plan, [username, orgname, geometry_extent, geometry_type, params, target_geoms])
if result:
return result
else:
return []
except BaseException as e:
import sys
logger.error('Error trying to OBS_MetadataValidation', sys.exc_info(), data={"username": username, "orgname": orgname})
raise Exception('Error trying to OBS_MetadataValidation')
$$ LANGUAGE plpythonu;

View File

@ -273,3 +273,14 @@ SELECT exists(SELECT *
t
(1 row)
SELECT exists(SELECT *
FROM pg_proc p
INNER JOIN pg_namespace ns ON (p.pronamespace = ns.oid)
WHERE ns.nspname = 'cdb_dataservices_server'
AND proname = 'obs_metadatavalidation'
AND oidvectortypes(p.proargtypes) = 'text, text, geometry, text, json, integer');
exists
--------
t
(1 row)

View File

@ -172,3 +172,10 @@ SELECT exists(SELECT *
WHERE ns.nspname = 'cdb_dataservices_server'
AND proname = 'obs_legacybuildermetadata'
AND oidvectortypes(p.proargtypes) = 'text, text, text');
SELECT exists(SELECT *
FROM pg_proc p
INNER JOIN pg_namespace ns ON (p.pronamespace = ns.oid)
WHERE ns.nspname = 'cdb_dataservices_server'
AND proname = 'obs_metadatavalidation'
AND oidvectortypes(p.proargtypes) = 'text, text, geometry, text, json, integer');