Add CDB_EstimateRowCount function

See #295
This commit is contained in:
Javier Goizueta 2017-04-07 15:35:48 +02:00
parent 795d92da8d
commit a486eed2e3
6 changed files with 78 additions and 0 deletions

View File

@ -1,3 +1,7 @@
0.19.0 (2017-04-XX)
* Add new function `CDB_EstimateRowCount` #295
0.18.5 (2016-11-30) 0.18.5 (2016-11-30)
* Add to new overview creation strategies #290 * Add to new overview creation strategies #290

View File

@ -0,0 +1,25 @@
Estimate the number of rows of a query.
#### Using the function
```sql
SELECT CDB_EstimateRowCount($$
UPDATE addresses SET the_geom = cdb_geocode_street_point(addr, city, state, 'US');
$$) AS row_count;
```
Result:
```
row_count
-----------
5
(1 row)
```
#### Arguments
CDB_EstimateRowCount(query)
* **query** text: the SQL query to estimate the row count for.

View File

@ -0,0 +1,29 @@
-- Internal function to generate stats for a table if they don't exist
CREATE OR REPLACE FUNCTION _CDB_GenerateStats(reloid REGCLASS)
RETURNS VOID
AS $$
DECLARE
num_cols INTEGER;
BEGIN
SELECT COUNT(*) FROM pg_catalog.pg_statistic WHERE starelid = reloid INTO num_cols;
IF num_cols = 0 THEN
EXECUTE Format('ANALYZE %s;', reloid);
END IF;
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT SECURITY DEFINER;
-- Return a row count estimate of the result of a query using statistics
CREATE OR REPLACE FUNCTION CDB_EstimateRowCount(query text)
RETURNS Numeric
AS $$
DECLARE
plan JSON;
BEGIN
-- Make sure statistics exist for all the tables of the query
PERFORM _CDB_GenerateStats(tabname) FROM unnest(CDB_QueryTablesText(query)) AS tabname;
-- Use the query planner to obtain an estimate of the number of result rows
EXECUTE 'EXPLAIN (FORMAT JSON) ' || query INTO STRICT plan;
RETURN plan->0->'Plan'->'Plan Rows';
END
$$ LANGUAGE 'plpgsql' VOLATILE STRICT;

View File

@ -0,0 +1 @@
../scripts-available/CDB_EstimateRowCount.sql

View File

@ -0,0 +1,10 @@
SET client_min_messages TO error;
\set VERBOSITY terse
CREATE TABLE tmptab1(id INT);
INSERT INTO tmptab1(id) VALUES (1), (2), (3);
CREATE TABLE tmptab2(id INT, value NUMERIC);
INSERT INTO tmptab2(id, value) VALUES (1, 10.0), (2, 20.0);
SELECT CDB_EstimateRowCount('SELECT SUM(value) FROM tmptab1 INNER JOIN tmptab2 ON (tmptab1.id = tmptab2.id);') AS row_count;
SELECT CDB_EstimateRowCount('UPDATE tmptab2 SET value = 30 WHERE id=2;') AS row_count;
DROP TABLE tmptab2;
DROP TABLE tmptab1;

View File

@ -0,0 +1,9 @@
SET
CREATE TABLE
INSERT 0 3
CREATE TABLE
INSERT 0 2
1
1
DROP TABLE
DROP TABLE