Merge pull request #298 from CartoDB/295-estimate-row-count

Add CDB_EstimateRowCount function
This commit is contained in:
Javier Goizueta 2017-04-11 11:01:31 +02:00 committed by GitHub
commit 4193ff3874
12 changed files with 120 additions and 9 deletions

View File

@ -1,14 +1,41 @@
language: c
addons:
postgresql: 9.3
postgresql: 9.5
before_install:
# Add custom PPAs from cartodb
- sudo add-apt-repository -y ppa:cartodb/postgresql-9.5
- sudo add-apt-repository -y ppa:cartodb/gis
- sudo add-apt-repository -y ppa:cartodb/gis-testing
- sudo apt-get update
#- sudo apt-get install -q postgresql-9.3-postgis-2.1
- sudo apt-get update
- sudo apt-get install -q postgresql-server-dev-9.3
- sudo apt-get install -q postgresql-plpython-9.3
# Force instalation of libgeos-3.5.0 (presumably needed because of existing version of postgis)
- sudo apt-get -y install libgeos-3.5.0=3.5.0-1cdb2
# Install postgres db and build deps
- sudo /etc/init.d/postgresql stop # stop travis default instance
- sudo apt-get -y remove --purge postgresql-9.1
- sudo apt-get -y remove --purge postgresql-9.2
- sudo apt-get -y remove --purge postgresql-9.3
- sudo apt-get -y remove --purge postgresql-9.4
- sudo apt-get -y remove --purge postgresql-9.5
- sudo rm -rf /var/lib/postgresql/
- sudo rm -rf /var/log/postgresql/
- sudo rm -rf /etc/postgresql/
- sudo apt-get -y remove --purge postgis-2.2
- sudo apt-get -y autoremove
- sudo apt-get -y install postgresql-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-server-dev-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-plpython-9.5=9.5.2-3cdb2
- sudo apt-get -y install postgresql-9.5-postgis-scripts=2.2.2.0-cdb2
- sudo apt-get -y install postgresql-9.5-postgis-2.2=2.2.2.0-cdb2
# configure it to accept local connections from postgres
- echo -e "# TYPE DATABASE USER ADDRESS METHOD \nlocal all postgres trust\nlocal all all trust\nhost all all 127.0.0.1/32 trust" \
| sudo tee /etc/postgresql/9.5/main/pg_hba.conf
- sudo /etc/init.d/postgresql restart 9.5
script:
- make

View File

@ -1,3 +1,7 @@
0.19.0 (2017-04-XX)
* Add new function `CDB_EstimateRowCount` #295
0.18.5 (2016-11-30)
* Add to new overview creation strategies #290

View File

@ -0,0 +1,25 @@
Estimate the number of rows of a query.
#### Using the function
```sql
SELECT CDB_EstimateRowCount($$
UPDATE addresses SET the_geom = cdb_geocode_street_point(addr, city, state, 'US');
$$) AS row_count;
```
Result:
```
row_count
-----------
5
(1 row)
```
#### Arguments
CDB_EstimateRowCount(query)
* **query** text: the SQL query to estimate the row count for.

View File

@ -0,0 +1,31 @@
-- Internal function to generate stats for a table if they don't exist
CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS)
RETURNS VOID
AS $$
DECLARE
has_stats BOOLEAN;
BEGIN
SELECT EXISTS (
SELECT * FROM pg_catalog.pg_statistic WHERE starelid = reloid
) INTO has_stats;
IF NOT has_stats THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT SECURITY DEFINER;
-- Return a row count estimate of the result of a query using statistics
CREATE OR REPLACE FUNCTION CDB_EstimateRowCount(query text)
RETURNS Numeric
AS $$
DECLARE
plan JSON;
BEGIN
-- Make sure statistics exist for all the tables of the query
PERFORM _CDB_GenerateStats(tabname) FROM unnest(CDB_QueryTablesText(query)) AS tabname;
-- Use the query planner to obtain an estimate of the number of result rows
EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO STRICT plan;
RETURN plan->0->'Plan'->'Plan Rows';
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT;

View File

@ -0,0 +1 @@
../scripts-available/CDB_EstimateRowCount.sql

View File

@ -1,5 +1,6 @@
BEGIN
CREATE TABLE
COPY 3
none||
only_com_dec|.|,
only_dot_dec|,|.

View File

@ -0,0 +1,10 @@
SET client_min_messages TO error;
\set VERBOSITY terse
CREATE TABLE tmptab1(id INT);
INSERT INTO tmptab1(id) VALUES (1), (2), (3);
CREATE TABLE tmptab2(id INT, value NUMERIC);
INSERT INTO tmptab2(id, value) VALUES (1, 10.0), (2, 20.0);
SELECT CDB_EstimateRowCount('SELECT SUM(value) FROM tmptab1 INNER JOIN tmptab2 ON (tmptab1.id = tmptab2.id);') AS row_count;
SELECT CDB_EstimateRowCount('UPDATE tmptab2 SET value = 30 WHERE id=2;') AS row_count;
DROP TABLE tmptab2;
DROP TABLE tmptab1;

View File

@ -0,0 +1,9 @@
SET
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 2
1
1
DROP TABLE
DROP TABLE

View File

@ -1,3 +1,6 @@
SET client_min_messages TO error;
\set VERBOSITY terse
WITH q AS ( SELECT CDB_QueryStatements('
SELECT * FROM geometry_columns;
') as statement )

View File

@ -1,3 +1,4 @@
SET
1|1|SELECT * FROM geometry_columns
2|1|SELECT * FROM geometry_columns
3|1|SELECT * FROM geometry_columns

View File

@ -1,3 +1,5 @@
SET client_min_messages TO warning;
\set VERBOSITY terse
WITH inp AS ( select 'SELECT * FROM geometry_columns'::text as q )
SELECT q, CDB_QueryTables(q) from inp;

View File

@ -1,17 +1,14 @@
SET
SELECT * FROM geometry_columns|{pg_catalog.pg_attribute,pg_catalog.pg_class,pg_catalog.pg_constraint,pg_catalog.pg_namespace,pg_catalog.pg_type}
SELECT a.attname FROM pg_class c JOIN pg_attribute a on (a.attrelid = c.oid)|{pg_catalog.pg_attribute,pg_catalog.pg_class}
CREATE table "my'tab;le" as select 1|{}
SELECT a.oid, b.oid FROM pg_class a, pg_class b|{pg_catalog.pg_class}
SELECT 1 as col1; select 2 as col2|{}
WARNING: CDB_QueryTables cannot explain query: select 1 from nonexistant (42P01: relation "nonexistant" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "nonexistant" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
begin; select * from pg_class; commit;|{pg_catalog.pg_class}
WARNING: CDB_QueryTables cannot explain query: select * from test (42P01: relation "test" does not exist)
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
ERROR: relation "test" does not exist
CONTEXT: PL/pgSQL function cdb_querytables(text) line 3 at RETURN
WITH a AS (select * from pg_class) select * from a|{pg_catalog.pg_class}
CREATE SCHEMA
CREATE TABLE