Merge pull request #121 from CartoDB/text_geom_columns

Handle text 'the_geom' columns as desired in #117
This commit is contained in:
Rafa de la Torre 2015-08-24 14:47:27 +02:00
commit 7ed3c29f82
3 changed files with 238 additions and 76 deletions

View File

@ -654,17 +654,29 @@ END;
$$ LANGUAGE 'plpgsql'; $$ LANGUAGE 'plpgsql';
DROP FUNCTION IF EXISTS _CDB_Has_Usable_Geom(regclass);
CREATE OR REPLACE FUNCTION _CDB_Has_Usable_Geom(reloid REGCLASS) CREATE OR REPLACE FUNCTION _CDB_Has_Usable_Geom(reloid REGCLASS)
RETURNS BOOLEAN RETURNS RECORD
AS $$ AS $$
DECLARE DECLARE
r1 RECORD; r1 RECORD;
r2 RECORD; r2 RECORD;
rv RECORD;
const RECORD; const RECORD;
has_geom BOOLEAN := false; has_geom BOOLEAN := false;
has_mercgeom BOOLEAN := false; has_mercgeom BOOLEAN := false;
has_geom_name TEXT; has_geom_name TEXT;
has_mercgeom_name TEXT; has_mercgeom_name TEXT;
-- In case 'the_geom' is a text column
text_geom_column BOOLEAN := false;
text_geom_column_name TEXT := '';
text_geom_column_srid BOOLEAN := true;
-- Utility variables
srid INTEGER;
str TEXT; str TEXT;
sql TEXT; sql TEXT;
BEGIN BEGIN
@ -694,13 +706,50 @@ BEGIN
RAISE DEBUG 'CDB(_CDB_Has_Usable_Geom): %', Format('checking column ''%s''', r1.attname); RAISE DEBUG 'CDB(_CDB_Has_Usable_Geom): %', Format('checking column ''%s''', r1.attname);
-- Name collision: right name but wrong type, rename it! -- Name collision: right name (the_geom, the_geomwebmercator?) but wrong type...
IF r1.typname != 'geometry' AND r1.attname = r1.desired_attname THEN IF r1.typname != 'geometry' AND r1.attname = r1.desired_attname THEN
-- Maybe it's a geometry column hiding in a text column?
IF r1.typname IN ('text','varchar','char') THEN
RAISE DEBUG 'CDB(_CDB_Has_Usable_Geom): %', Format('column ''%s'' is a text column', r1.attname);
BEGIN
sql := Format('SELECT Max(ST_SRID(%I::geometry)) AS srid FROM %I', r1.attname, reloid::text);
EXECUTE sql INTO srid;
-- This gets skipped if EXCEPTION happens
-- Let the table writer know we need to convert from text
RAISE DEBUG 'CDB(_CDB_Has_Usable_Geom): %', Format('column ''%s'' can be cast from text to geometry', r1.attname);
text_geom_column := true;
text_geom_column_name := r1.attname;
-- Let the table writer know we need to force an SRID
IF srid = 0 THEN
text_geom_column_srid := false;
END IF;
-- Nope, the text in the column can't be converted into geometry
-- so rename it out of the way
EXCEPTION
WHEN others THEN
IF SQLERRM = 'parse error - invalid geometry' THEN
text_geom_column := false;
str := _CDB_Unique_Column_Name(reloid, r1.attname);
sql := Format('ALTER TABLE %s RENAME COLUMN %s TO %I', reloid::text, r1.attname, str);
PERFORM _CDB_SQL(sql,'_CDB_Has_Usable_Geom');
RAISE DEBUG 'CDB(_CDB_Has_Usable_Geom): %',
Format('Text column %s is not convertible to geometry, renamed to %s', r1.attname, str);
ELSE
RAISE EXCEPTION 'CDB(_CDB_Has_Usable_Geom) UNEXPECTED ERROR';
END IF;
END;
-- Just change its name so we can write a new column into that name.
ELSE
str := _CDB_Unique_Column_Name(reloid, r1.attname); str := _CDB_Unique_Column_Name(reloid, r1.attname);
sql := Format('ALTER TABLE %s RENAME COLUMN %s TO %I', reloid::text, r1.attname, str); sql := Format('ALTER TABLE %s RENAME COLUMN %s TO %I', reloid::text, r1.attname, str);
PERFORM _CDB_SQL(sql,'_CDB_Has_Usable_Geom'); PERFORM _CDB_SQL(sql,'_CDB_Has_Usable_Geom');
RAISE DEBUG 'CDB(_CDB_Has_Usable_Geom): %', RAISE DEBUG 'CDB(_CDB_Has_Usable_Geom): %',
Format('%s is the wrong type, renamed to %s', r1.attname, str); Format('%s is the wrong type, renamed to %s', r1.attname, str);
END IF;
-- Found a geometry column! -- Found a geometry column!
ELSIF r1.typname = 'geometry' THEN ELSIF r1.typname = 'geometry' THEN
@ -719,6 +768,12 @@ BEGIN
has_mercgeom_name := r1.attname; has_mercgeom_name := r1.attname;
END IF; END IF;
-- If it's an unknown SRID, we need to know that too
ELSIF r1.srid = 0 OR _CDB_Geometry_SRID(reloid, r1.attname) = 0 THEN
-- Unknown SRID, we'll have to fill it in later
text_geom_column_srid := true;
END IF; END IF;
END IF; END IF;
@ -737,13 +792,20 @@ BEGIN
PERFORM _CDB_SQL(sql,'_CDB_Has_Usable_Geom'); PERFORM _CDB_SQL(sql,'_CDB_Has_Usable_Geom');
END IF; END IF;
SELECT
-- If table is perfect (no transforms required), return TRUE! -- If table is perfect (no transforms required), return TRUE!
RETURN has_geom AND has_mercgeom; has_geom AND has_mercgeom AS has_usable_geoms,
-- If the geometry column is hiding in a text field, return enough info to deal w/ it.
text_geom_column, text_geom_column_name, text_geom_column_srid
INTO rv;
RAISE DEBUG 'CDB(_CDB_Has_Usable_Geom): %', Format('returning %s', rv);
RETURN rv;
END; END;
$$ LANGUAGE 'plpgsql'; $$ LANGUAGE 'plpgsql';
-- Create a copy of the table. Assumes that the "Has usable" functions -- Create a copy of the table. Assumes that the "Has usable" functions
-- have already been run, so that if there is a 'cartodb_id' column, it is -- have already been run, so that if there is a 'cartodb_id' column, it is
-- a "good" one, and the same for the geometry columns. If all the required -- a "good" one, and the same for the geometry columns. If all the required
@ -771,12 +833,13 @@ DECLARE
rec RECORD; rec RECORD;
const RECORD; const RECORD;
gc RECORD;
sql TEXT; sql TEXT;
str TEXT; str TEXT;
table_srid INTEGER; table_srid INTEGER;
geom_srid INTEGER;
has_usable_primary_key BOOLEAN; has_usable_primary_key BOOLEAN;
has_usable_geoms BOOLEAN;
BEGIN BEGIN
@ -799,7 +862,7 @@ BEGIN
-- See if there is a primary key column we need to carry along to the -- See if there is a primary key column we need to carry along to the
-- new table. If this is true, it implies there is an indexed -- new table. If this is true, it implies there is an indexed
-- primary key of integer type named (by default) cartodb_id -- primary key of integer type named (by default) cartodb_id
SELECT _CDB_Has_Usable_Primary_ID(reloid) AS has_usable_primary_key SELECT _CDB_Has_Usable_Primary_ID(reloid)
INTO STRICT has_usable_primary_key; INTO STRICT has_usable_primary_key;
RAISE DEBUG 'CDB(_CDB_Rewrite_Table): has_usable_primary_key %', has_usable_primary_key; RAISE DEBUG 'CDB(_CDB_Rewrite_Table): has_usable_primary_key %', has_usable_primary_key;
@ -808,24 +871,29 @@ BEGIN
-- on the table. If they are, we don't need to do any bulk -- on the table. If they are, we don't need to do any bulk
-- transformation of the table, we can just ensure proper -- transformation of the table, we can just ensure proper
-- indexes are in place and apply a rename -- indexes are in place and apply a rename
SELECT _CDB_Has_Usable_Geom(reloid) AS has_usable_geoms SELECT *
INTO STRICT has_usable_geoms; FROM _CDB_Has_Usable_Geom(reloid)
AS (has_usable_geoms boolean,
text_geom_column boolean,
text_geom_column_name text,
text_geom_column_srid boolean)
INTO STRICT gc;
RAISE DEBUG 'CDB(_CDB_Rewrite_Table): has_usable_geoms %', has_usable_geoms; RAISE DEBUG 'CDB(_CDB_Rewrite_Table): has_usable_geoms %', gc.has_usable_geoms;
-- We can only avoid a rewrite if both the key and -- We can only avoid a rewrite if both the key and
-- geometry are usable -- geometry are usable
-- No table re-write is required, BUT a rename is required to -- No table re-write is required, BUT a rename is required to
-- a destination schema, so do that now -- a destination schema, so do that now
IF has_usable_primary_key AND has_usable_geoms AND destschema != relschema THEN IF has_usable_primary_key AND gc.has_usable_geoms AND destschema != relschema THEN
RAISE DEBUG 'CDB(_CDB_Rewrite_Table): perfect table needs to be moved to schema (%)', destschema; RAISE DEBUG 'CDB(_CDB_Rewrite_Table): perfect table needs to be moved to schema (%)', destschema;
PERFORM _CDB_SQL(Format('ALTER TABLE %s SET SCHEMA %I', reloid::text, destschema), '_CDB_Rewrite_Table'); PERFORM _CDB_SQL(Format('ALTER TABLE %s SET SCHEMA %I', reloid::text, destschema), '_CDB_Rewrite_Table');
RETURN true; RETURN true;
-- Don't move anything, just make sure our destination information is set right -- Don't move anything, just make sure our destination information is set right
ELSIF has_usable_primary_key AND has_usable_geoms AND destschema = relschema THEN ELSIF has_usable_primary_key AND gc.has_usable_geoms AND destschema = relschema THEN
RAISE DEBUG 'CDB(_CDB_Rewrite_Table): perfect table in the perfect place'; RAISE DEBUG 'CDB(_CDB_Rewrite_Table): perfect table in the perfect place';
RETURN true; RETURN true;
@ -848,7 +916,6 @@ BEGIN
IF destschema = relschema THEN IF destschema = relschema THEN
copyname := Format('%I.%I', destschema, Format('%s_%s', destname, salt)); copyname := Format('%I.%I', destschema, Format('%s_%s', destname, salt));
ELSE ELSE
--copyname := destschema || '.' || destname;
copyname := Format('%I.%I', destschema, destname); copyname := Format('%I.%I', destschema, destname);
END IF; END IF;
@ -863,14 +930,57 @@ BEGIN
END IF; END IF;
-- Add the geometry columns! -- Add the geometry columns!
IF has_usable_geoms THEN IF gc.has_usable_geoms THEN
sql := sql || ',' || const.geomcol || ',' || const.mercgeomcol; sql := sql || ',' || const.geomcol || ',' || const.mercgeomcol;
ELSE ELSE
-- This gets complicated: we have to make sure the -- Arg, this "geometry" column is actually text!!
-- geometry column we are using can be transformed into -- OK, we tested back in our geometry column research that it could
-- geographics, which means it needs to have a valid -- be safely cast to geometry, so let's do that.
-- SRID. IF gc.text_geom_column THEN
WITH t AS (
SELECT
a.attname,
CASE WHEN NOT gc.text_geom_column_srid THEN 'ST_SetSRID(' ELSE '' END AS missing_srid_start,
CASE WHEN NOT gc.text_geom_column_srid THEN ',4326)' ELSE '' END AS missing_srid_end
FROM pg_class c
JOIN pg_attribute a ON a.attrelid = c.oid
JOIN pg_type t ON a.atttypid = t.oid
WHERE c.oid = reloid
AND t.typname IN ('text','varchar','char')
AND a.attnum > 0
AND a.attname = gc.text_geom_column_name
AND NOT a.attisdropped
ORDER BY a.attnum
LIMIT 1
)
SELECT ', ST_Transform('
|| t.missing_srid_start || t.attname || '::geometry' || t.missing_srid_end
|| ',4326)::Geometry(GEOMETRY,4326) AS '
|| const.geomcol
|| ', cartodb.CDB_TransformToWebmercator('
|| t.missing_srid_start || t.attname || '::geometry' || t.missing_srid_end
|| ')::Geometry(GEOMETRY,3857) AS '
|| const.mercgeomcol,
t.attname
INTO geom_transform_sql, geom_column_source
FROM t;
IF NOT FOUND THEN
-- We checked that this column existed already, it bloody well
-- better be found.
RAISE EXCEPTION 'CDB(_CDB_Rewrite_Table): Text column % is missing!', gc.text_geom_column_name;
ELSE
sql := sql || geom_transform_sql;
END IF;
-- There is at least one true geometry column in here, we'll
-- reproject that into the projections we need.
ELSE
-- Find the column we are going to be working with (the first
-- column with type "geometry")
SELECT a.attname SELECT a.attname
INTO rec INTO rec
FROM pg_class c FROM pg_class c
@ -883,20 +993,16 @@ BEGIN
ORDER BY a.attnum ORDER BY a.attnum
LIMIT 1; LIMIT 1;
IF NOT FOUND THEN -- The SRID could be undeclared at the table level, but still
-- If there is no geometry column, we continue making a -- exist in the geometries themselves. We first find our geometry
-- non-spatial table. This is important for folks who want -- column and read the first SRID off it it, if there is a row
-- their tables to invalidate the SQL API -- to read.
-- cache on update/insert/delete. IF FOUND THEN
geom_column_source := '';
sql := sql || ',NULL::geometry(Geometry,4326) AS ' || const.geomcol;
sql := sql || ',NULL::geometry(Geometry,3857) AS ' || const.mercgeomcol;
ELSE
-- table_srid = _CDB_Geometry_SRID(reloid, rec.attname);
EXECUTE Format('SELECT ST_SRID(%s) AS srid FROM %s LIMIT 1', rec.attname, reloid::text) EXECUTE Format('SELECT ST_SRID(%s) AS srid FROM %s LIMIT 1', rec.attname, reloid::text)
INTO rec; INTO geom_srid;
ELSE
geom_srid := 0;
END IF;
-- The geometry columns weren't in the right projection, -- The geometry columns weren't in the right projection,
-- so we need to find the first decent geometry column -- so we need to find the first decent geometry column
@ -904,32 +1010,48 @@ BEGIN
-- and another to 3857. Then remember its name so we can -- and another to 3857. Then remember its name so we can
-- ignore it when we build the list of other columns to -- ignore it when we build the list of other columns to
-- add to the output table -- add to the output table
SELECT ',ST_Transform(' WITH t AS (
|| a.attname SELECT
|| ',4326)::Geometry(' a.attname,
|| postgis_typmod_type(a.atttypmod) postgis_typmod_type(a.atttypmod) AS geomtype,
|| ', 4326) AS ' CASE WHEN postgis_typmod_srid(a.atttypmod) = 0 AND srid.srid = 0 THEN 'ST_SetSRID(' ELSE '' END AS missing_srid_start,
|| const.geomcol CASE WHEN postgis_typmod_srid(a.atttypmod) = 0 AND srid.srid = 0 THEN ',4326)' ELSE '' END AS missing_srid_end
|| ', cartodb.CDB_TransformToWebmercator(' || a.attname || ')::Geometry('
|| postgis_typmod_type(a.atttypmod)
|| ', 3857) AS '
|| const.mercgeomcol,
a.attname
INTO geom_transform_sql, geom_column_source
FROM pg_class c FROM pg_class c
JOIN pg_attribute a ON a.attrelid = c.oid JOIN pg_attribute a ON a.attrelid = c.oid
JOIN pg_type t ON a.atttypid = t.oid, JOIN pg_type t ON a.atttypid = t.oid,
( SELECT rec.srid AS srid ) AS srid ( SELECT geom_srid AS srid ) AS srid
-- ( SELECT table_srid AS srid ) AS srid
WHERE c.oid = reloid WHERE c.oid = reloid
AND t.typname = 'geometry' AND t.typname = 'geometry'
AND a.attnum > 0 AND a.attnum > 0
AND NOT a.attisdropped AND NOT a.attisdropped
AND (postgis_typmod_srid(a.atttypmod) > 0 OR srid.srid > 0)
ORDER BY a.attnum ORDER BY a.attnum
LIMIT 1; LIMIT 1
)
SELECT ', ST_Transform('
|| t.missing_srid_start || t.attname || t.missing_srid_end
|| ',4326)::Geometry('
|| t.geomtype
|| ',4326) AS '
|| const.geomcol
|| ', cartodb.CDB_TransformToWebmercator('
|| t.missing_srid_start || t.attname || t.missing_srid_end
|| ')::Geometry('
|| t.geomtype
|| ',3857) AS '
|| const.mercgeomcol,
t.attname
INTO geom_transform_sql, geom_column_source
FROM t;
IF FOUND THEN IF NOT FOUND THEN
-- If there are no geometry columns, we continue making a
-- non-spatial table. This is important for folks who want
-- their tables to invalidate the SQL API
-- cache on update/insert/delete.
geom_column_source := '';
sql := sql || ',NULL::geometry(Geometry,4326) AS ' || const.geomcol;
sql := sql || ',NULL::geometry(Geometry,3857) AS ' || const.mercgeomcol;
ELSE
sql := sql || geom_transform_sql; sql := sql || geom_transform_sql;
END IF; END IF;

View File

@ -181,7 +181,31 @@ DROP TABLE t;
-- table with existing cartodb_id field of type int4 not sequenced -- table with existing cartodb_id field of type int4 not sequenced
CREATE TABLE t AS SELECT 1::int4 as cartodb_id; CREATE TABLE t AS SELECT 1::int4 as cartodb_id;
SELECT CDB_CartodbfyTableCheck('t', 'unsequenced cartodb_id'); SELECT CDB_CartodbfyTableCheck('t', 'unsequenced cartodb_id');
select cartodb_id FROM t; SELECT cartodb_id FROM t;
DROP TABLE t;
-- table with text geometry column
CREATE TABLE t AS SELECT 'SRID=4326;POINT(1 1)'::text AS the_geom, 1::int4 as cartodb_id;
SELECT CDB_CartodbfyTableCheck('t', 'text the_geom column');
SELECT cartodb_id FROM t;
DROP TABLE t;
-- table with text geometry column, no SRS
CREATE TABLE t AS SELECT 'POINT(1 1)'::text AS the_geom, 1::int4 as cartodb_id;
SELECT CDB_CartodbfyTableCheck('t', 'text the_geom column, no srs');
SELECT cartodb_id FROM t;
DROP TABLE t;
-- table with text geometry column, unusual SRS
CREATE TABLE t AS SELECT 'SRID=26910;POINT(1 1)'::text AS the_geom, 1::int4 as cartodb_id;
SELECT CDB_CartodbfyTableCheck('t', 'text the_geom column, srs = 26819');
SELECT cartodb_id FROM t;
DROP TABLE t;
-- table with text unparseable geometry column
CREATE TABLE t AS SELECT 'SRID=26910;PONT(1 1)'::text AS the_geom, 1::int4 as cartodb_id;
SELECT CDB_CartodbfyTableCheck('t', 'text the_geom column, unparseable content');
SELECT cartodb_id FROM t;
DROP TABLE t; DROP TABLE t;
-- table with existing cartodb_id serial primary key -- table with existing cartodb_id serial primary key

View File

@ -31,6 +31,22 @@ SELECT 1
unsequenced cartodb_id cartodbfied fine unsequenced cartodb_id cartodbfied fine
1 1
DROP TABLE DROP TABLE
SELECT 1
text the_geom column cartodbfied fine
1
DROP TABLE
SELECT 1
text the_geom column, no srs cartodbfied fine
1
DROP TABLE
SELECT 1
text the_geom column, srs = 26819 cartodbfied fine
1
DROP TABLE
SELECT 1
text the_geom column, unparseable content cartodbfied fine
1
DROP TABLE
CREATE TABLE CREATE TABLE
cartodb_id serial primary key cartodbfied fine cartodb_id serial primary key cartodbfied fine
t_pkey|cartodb_id t_pkey|cartodb_id