Merge pull request #588 from CartoDB/copy-cancel

Improve copy[to|from] cancel mechanism
This commit is contained in:
Daniel G. Aubert 2019-05-30 13:39:53 +02:00 committed by GitHub
commit 2848de8593
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 294 additions and 79 deletions

View File

@ -7,6 +7,8 @@ Announcements:
* Upgrade devel dependency `sqlite3` to version `4.0.6`
* Log queries (https://github.com/CartoDB/CartoDB-SQL-API/pull/574)
* Improve batch-queries draining while exiting the process #582
* Implement a mechanism to short out hung connections in copy-from endpoints.
## 3.0.0
Released 2019-02-22

View File

@ -11,6 +11,7 @@ const { RATE_LIMIT_ENDPOINTS_GROUPS } = rateLimitsMiddleware;
const errorHandlerFactory = require('../services/error_handler_factory');
const StreamCopy = require('../services/stream_copy');
const StreamCopyMetrics = require('../services/stream_copy_metrics');
const Throttler = require('../services/throttler-stream');
const zlib = require('zlib');
const { PassThrough } = require('stream');
const handleQueryMiddleware = require('../middlewares/handle-query');
@ -36,7 +37,7 @@ CopyController.prototype.route = function (app) {
dbQuotaMiddleware(),
handleQueryMiddleware(),
handleCopyFrom(this.logger),
errorHandler(),
errorHandler(this.logger),
errorMiddleware()
];
};
@ -51,7 +52,7 @@ CopyController.prototype.route = function (app) {
validateCopyQuery(),
handleQueryMiddleware(),
handleCopyTo(this.logger),
errorHandler(),
errorHandler(this.logger),
errorMiddleware()
];
};
@ -71,7 +72,7 @@ function handleCopyTo (logger) {
// https://github.com/CartoDB/CartoDB-SQL-API/issues/515
const isGzip = req.get('accept-encoding') && req.get('accept-encoding').includes('gzip');
const streamCopy = new StreamCopy(sql, userDbParams);
const streamCopy = new StreamCopy(sql, userDbParams, logger);
const metrics = new StreamCopyMetrics(logger, 'copyto', sql, user, isGzip);
res.header("Content-Disposition", `attachment; filename=${encodeURIComponent(filename)}`);
@ -86,20 +87,13 @@ function handleCopyTo (logger) {
.on('data', data => metrics.addSize(data.length))
.on('error', err => {
metrics.end(null, err);
pgstream.unpipe(res);
return next(err);
})
.on('end', () => metrics.end( streamCopy.getRowCount(StreamCopy.ACTION_TO) ))
.pipe(res)
.on('close', () => {
const err = new Error('Connection closed by client');
pgstream.emit('cancelQuery', err);
pgstream.emit('error', err);
})
.on('error', err => {
pgstream.emit('error', err);
});
.on('end', () => metrics.end(streamCopy.getRowCount()))
.pipe(res)
.on('close', () => pgstream.emit('error', new Error('Connection closed by client')))
.on('error', err => pgstream.emit('error', err));
});
};
}
@ -111,7 +105,8 @@ function handleCopyFrom (logger) {
const COPY_FROM_MAX_POST_SIZE = global.settings.copy_from_max_post_size || 2 * 1024 * 1024 * 1024; // 2 GB
const COPY_FROM_MAX_POST_SIZE_PRETTY = global.settings.copy_from_max_post_size_pretty || '2 GB';
const streamCopy = new StreamCopy(sql, userDbParams);
const streamCopy = new StreamCopy(sql, userDbParams, logger);
const decompress = isGzip ? zlib.createGunzip() : new PassThrough();
const metrics = new StreamCopyMetrics(logger, 'copyfrom', sql, user, isGzip);
streamCopy.getPGStream(StreamCopy.ACTION_FROM, (err, pgstream) => {
@ -119,47 +114,43 @@ function handleCopyFrom (logger) {
return next(err);
}
const throttle = new Throttler(pgstream);
req
.on('data', data => isGzip ? metrics.addGzipSize(data.length) : undefined)
.on('error', err => {
metrics.end(null, err);
pgstream.emit('error', err);
})
.on('close', () => {
const err = new Error('Connection closed by client');
pgstream.emit('cancelQuery', err);
pgstream.emit('error', err);
.on('close', () => pgstream.emit('error', new Error('Connection closed by client')))
.pipe(throttle)
.pipe(decompress)
.on('data', data => {
metrics.addSize(data.length);
if(metrics.size > dbRemainingQuota) {
return pgstream.emit('error', new Error('DB Quota exceeded'));
}
if((metrics.gzipSize || metrics.size) > COPY_FROM_MAX_POST_SIZE) {
return pgstream.emit('error', new Error(
`COPY FROM maximum POST size of ${COPY_FROM_MAX_POST_SIZE_PRETTY} exceeded`
));
}
})
.pipe(isGzip ? zlib.createGunzip() : new PassThrough())
.on('error', err => {
err.message = `Error while gunzipping: ${err.message}`;
metrics.end(null, err);
pgstream.emit('error', err);
})
.on('data', data => {
metrics.addSize(data.length);
if(metrics.size > dbRemainingQuota) {
const quotaError = new Error('DB Quota exceeded');
pgstream.emit('cancelQuery', err);
pgstream.emit('error', quotaError);
}
if((metrics.gzipSize || metrics.size) > COPY_FROM_MAX_POST_SIZE) {
const maxPostSizeError = new Error(
`COPY FROM maximum POST size of ${COPY_FROM_MAX_POST_SIZE_PRETTY} exceeded`
);
pgstream.emit('cancelQuery', err);
pgstream.emit('error', maxPostSizeError);
}
})
.pipe(pgstream)
.pipe(pgstream)
.on('error', err => {
metrics.end(null, err);
req.unpipe(pgstream);
return next(err);
})
.on('end', () => {
metrics.end( streamCopy.getRowCount(StreamCopy.ACTION_FROM) );
metrics.end(streamCopy.getRowCount());
const { time, rows } = metrics;
@ -192,10 +183,10 @@ function validateCopyQuery () {
};
}
function errorHandler () {
function errorHandler (logger) {
return function errorHandlerMiddleware (err, req, res, next) {
if (res.headersSent) {
console.error("EXCEPTION REPORT: " + err.stack);
logger.error(err);
const errorHandler = errorHandlerFactory(err);
res.write(JSON.stringify(errorHandler.getResponse()));
res.end();

View File

@ -3,22 +3,24 @@
const PSQL = require('cartodb-psql');
const copyTo = require('pg-copy-streams').to;
const copyFrom = require('pg-copy-streams').from;
const { Client } = require('pg');
const ACTION_TO = 'to';
const ACTION_FROM = 'from';
const DEFAULT_TIMEOUT = "'5h'";
module.exports = class StreamCopy {
const cancelQuery = pid => `SELECT pg_cancel_backend(${pid}) as cancelled`;
const terminateQuery = pid => `SELECT pg_terminate_backend(${pid}) as terminated`;
const timeoutQuery = timeout => `SET statement_timeout=${timeout}`;
constructor(sql, userDbParams) {
const dbParams = Object.assign({}, userDbParams, {
module.exports = class StreamCopy {
constructor(sql, userDbParams, logger) {
this.dbParams = Object.assign({}, userDbParams, {
port: global.settings.db_batch_port || userDbParams.port
});
this.pg = new PSQL(dbParams);
this.sql = sql;
this.stream = null;
this.timeout = global.settings.copy_timeout || DEFAULT_TIMEOUT;
this.logger = logger;
}
static get ACTION_TO() {
@ -29,44 +31,36 @@ module.exports = class StreamCopy {
return ACTION_FROM;
}
getPGStream(action, cb) {
this.pg.connect((err, client, done) => {
getPGStream(action, callback) {
const pg = new PSQL(this.dbParams);
pg.connect((err, client, done) => {
if (err) {
return cb(err);
return callback(err);
}
client.query('SET statement_timeout=' + this.timeout, (err) => {
client.query(timeoutQuery(this.timeout), (err) => {
if (err) {
return cb(err);
return callback(err);
}
const streamMaker = action === ACTION_TO ? copyTo : copyFrom;
this.stream = streamMaker(this.sql);
this.clientProcessID = client.processID;
this.stream = action === ACTION_TO ? copyTo(this.sql) : copyFrom(this.sql);
const pgstream = client.query(this.stream);
pgstream
.on('end', () => {
if(action === ACTION_TO) {
pgstream.connection.stream.resume();
}
done();
})
.on('error', err => done(err))
.on('cancelQuery', err => {
if(action === ACTION_TO) {
// See https://www.postgresql.org/docs/11/protocol-flow.html#PROTOCOL-COPY
const cancelingClient = new Client(client.connectionParameters);
cancelingClient.cancel(client, pgstream);
if (action === ACTION_TO) {
pgstream.on('end', () => done());
pgstream.on('error', () => this._cancel(client.processID, action));
} else if (action === ACTION_FROM) {
pgstream.on('finish', () => done());
pgstream.on('error', err => client.connection.sendCopyFail(err.message));
}
// see https://node-postgres.com/api/pool#releasecallback
return done(err);
} else if (action === ACTION_FROM) {
client.connection.sendCopyFail('CARTO SQL API: Connection closed by client');
}
});
pgstream.on('error', err => done(err));
cb(null, pgstream);
callback(null, pgstream);
});
});
}
@ -74,4 +68,37 @@ module.exports = class StreamCopy {
getRowCount() {
return this.stream.rowCount;
}
_cancel (pid, action) {
const pg = new PSQL(this.dbParams);
const actionType = action === ACTION_TO ? ACTION_TO : ACTION_FROM;
pg.query(cancelQuery(pid), (err, result) => {
if (err) {
return this.logger.error(err);
}
const isCancelled = result.rows.length && result.rows[0].cancelled;
if (isCancelled) {
return this.logger.info(`Canceled "copy ${actionType}" stream query successfully (pid: ${pid})`);
}
return pg.query(terminateQuery(pid), (err, result) => {
if (err) {
return this.logger.error(err);
}
const isTerminated = result.rows.length && result.rows[0].terminated;
if (!isTerminated) {
return this.logger.error(
new Error(`Unable to terminate "copy ${actionType}" stream query (pid: ${pid})`)
);
}
return this.logger.info(`Terminated "copy ${actionType}" stream query successfully (pid: ${pid})`);
});
});
}
};

View File

@ -0,0 +1,50 @@
'use strict';
const { Transform } = require('stream');
module.exports = class Throttler extends Transform {
constructor (pgstream, ...args) {
super(...args);
this.pgstream = pgstream;
this.sampleLength = global.settings.copy_from_maximum_slow_input_speed_interval || 15;
this.minimunBytesPerSecondThershold = global.settings.copy_from_minimum_input_speed || 0;
this.byteCount = 0;
this.bytesPerSecondHistory = [];
this._interval = setInterval(this._updateMetrics.bind(this), 1000);
}
_updateMetrics () {
this.bytesPerSecondHistory.push(this.byteCount);
this.byteCount = 0;
if (this.bytesPerSecondHistory > this.sampleLength) {
this.bytesPerSecondHistory.shift();
}
const doesNotReachThreshold = [];
for (const bytesPerSecond of this.bytesPerSecondHistory) {
if (bytesPerSecond <= this.minimunBytesPerSecondThershold) {
doesNotReachThreshold.push(true);
}
}
if (doesNotReachThreshold.length >= this.sampleLength) {
clearInterval(this._interval);
this.pgstream.emit('error', new Error('Connection closed by server: input data too slow'));
}
}
_transform (chunk, encoding, callback) {
this.byteCount += chunk.length;
callback(null, chunk);
}
_flush (callback) {
clearInterval(this._interval);
callback();
}
};

View File

@ -38,6 +38,8 @@ module.exports.batch_log_filename = 'logs/batch-queries.log';
module.exports.copy_timeout = "'5h'";
module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB;
module.exports.copy_from_max_post_size_pretty = '2 GB';
module.exports.copy_from_minimum_input_speed = 0; // 1 byte per second
module.exports.copy_from_maximum_slow_input_speed_interval = 15 // 15 seconds
// Max number of queued jobs a user can have at a given time
module.exports.batch_max_queued_jobs = 64;
// Capacity strategy to use.

View File

@ -39,6 +39,8 @@ module.exports.batch_log_filename = 'logs/batch-queries.log';
module.exports.copy_timeout = "'5h'";
module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB;
module.exports.copy_from_max_post_size_pretty = '2 GB';
module.exports.copy_from_minimum_input_speed = 0; // 1 byte per second
module.exports.copy_from_maximum_slow_input_speed_interval = 15 // 15 seconds
// Max number of queued jobs a user can have at a given time
module.exports.batch_max_queued_jobs = 64;
// Capacity strategy to use.

View File

@ -39,6 +39,8 @@ module.exports.batch_log_filename = 'logs/batch-queries.log';
module.exports.copy_timeout = "'5h'";
module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB;
module.exports.copy_from_max_post_size_pretty = '2 GB';
module.exports.copy_from_minimum_input_speed = 0; // 1 byte per second
module.exports.copy_from_maximum_slow_input_speed_interval = 15 // 15 seconds
// Max number of queued jobs a user can have at a given time
module.exports.batch_max_queued_jobs = 64;
// Capacity strategy to use.

View File

@ -36,6 +36,8 @@ module.exports.batch_log_filename = 'logs/batch-queries.log';
module.exports.copy_timeout = "'5h'";
module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB;
module.exports.copy_from_max_post_size_pretty = '2 GB';
module.exports.copy_from_minimum_input_speed = 0; // 1 byte per second
module.exports.copy_from_maximum_slow_input_speed_interval = 15 // 15 seconds
// Max number of queued jobs a user can have at a given time
module.exports.batch_max_queued_jobs = 64;
// Capacity strategy to use.

4
package-lock.json generated
View File

@ -2397,8 +2397,8 @@
"integrity": "sha1-2hhHsglA5C7hSSvq9l1J2RskXfc="
},
"pg-copy-streams": {
"version": "github:cartodb/node-pg-copy-streams#94c8d89abaea54bd4e6f61f63f47ef81cdc44253",
"from": "github:cartodb/node-pg-copy-streams#v1.2.0-carto.3"
"version": "github:cartodb/node-pg-copy-streams#be101502d9a7ee116ae3a0b3c94487869f0cada9",
"from": "github:cartodb/node-pg-copy-streams#v2.x-carto"
},
"pg-int8": {
"version": "1.0.1",

View File

@ -34,7 +34,7 @@
"node-statsd": "~0.0.7",
"node-uuid": "^1.4.7",
"oauth-client": "0.3.0",
"pg-copy-streams": "github:cartodb/node-pg-copy-streams#v1.2.0-carto.3",
"pg-copy-streams": "github:cartodb/node-pg-copy-streams#v2.x-carto",
"qs": "~6.2.1",
"queue-async": "~1.0.7",
"redis-mpool": "0.7.0",

View File

@ -43,7 +43,7 @@ describe('Batch API callback templates', function () {
};
self.testClient.getResult(
'create table test_batch_errors (job_id text, error_message text)', function (err) {
'BEGIN;CREATE TABLE test_batch_errors (job_id text, error_message text);COMMIT', function (err) {
if (err) {
return done(err);
}

View File

@ -0,0 +1,137 @@
'use strict';
const querystring = require('querystring');
const StatsClient = require('../../app/stats/client');
const statsClient = StatsClient.getInstance(global.settings.statsd);
const server = require('../../app/server')(statsClient);
const request = require('request');
const assert = require('assert');
const { Readable } = require('stream');
const createTableQuery = `CREATE TABLE copy_from_throttle AS (SELECT 0::integer AS counter)`;
const dropTableQuery = `DROP TABLE copy_from_throttle`;
function * counterGenerator (timeout) {
let counter = 0;
while (true) {
yield new Promise(resolve => setTimeout(() => resolve(`${counter++}`), timeout)); // jshint ignore:line
}
}
class CounterStream extends Readable {
constructor(generator, ...args) {
super(...args);
this.generator = generator;
}
_read () {
const res = this.generator.next();
res.value.then(value => res.done ? this.push(null) : this.push(value));
}
}
describe('COPY FROM throttle', function () {
before(function() {
this.copy_from_minimum_input_speed = global.settings.copy_from_minimum_input_speed;
global.settings.copy_from_minimum_input_speed = 1;
this.copy_from_maximum_slow_input_speed_interval = global.settings.copy_from_maximum_slow_input_speed_interval;
global.settings.copy_from_maximum_slow_input_speed_interval = 1;
});
after(function() {
global.settings.copy_from_first_chunk_timeout = this.copy_from_first_chunk_timeout;
global.settings.copy_from_maximum_slow_input_speed_interval = this.copy_from_maximum_slow_input_speed_interval;
});
beforeEach(function (done) {
this.listener = server.listen(0, '127.0.0.1');
this.listener.on('error', done);
this.listener.on('listening', () => {
const { address, port } = this.listener.address();
this.host = address;
this.port = port;
done();
});
});
beforeEach(function (done) {
const { host, port } = this;
const createTable = querystring.stringify({ q: createTableQuery, api_key: 1234});
const createTableOptions = {
url: `http://${host}:${port}/api/v1/sql?${createTable}`,
headers: { host: 'vizzuality.cartodb.com' },
method: 'GET'
};
request(createTableOptions, function (err, res) {
if (err) {
return done(err);
}
assert.equal(res.statusCode, 200);
done();
});
});
afterEach(function (done) {
const { host, port } = this;
const dropTable = querystring.stringify({ q: dropTableQuery, api_key: 1234 });
const dropTableOptions = {
url: `http://${host}:${port}/api/v1/sql?${dropTable}`,
headers: { host: 'vizzuality.cartodb.com' },
method: 'GET'
};
request(dropTableOptions, function (err) {
if (err) {
return done(err);
}
done();
});
});
afterEach(function (done) {
this.listener.close(done);
});
it('hang while sendind data', function (done) {
const { host, port } = this;
const copy = querystring.stringify({
q: "COPY copy_from_throttle (counter) FROM STDIN WITH (FORMAT CSV, DELIMITER ',')",
api_key: 1234
});
const options = {
url: `http://${host}:${port}/api/v1/sql/copyfrom?${copy}`,
headers: { host: 'vizzuality.cartodb.com' },
body: new CounterStream(counterGenerator(1000)),
method: 'POST'
};
request(options, (err, res, body) => {
if (err) {
return done(err);
}
assert.equal(res.statusCode, 400);
body = JSON.parse(body);
assert.deepEqual(body, { error: ["Connection closed by server: input data too slow"] });
done();
});
});
});

View File

@ -15,7 +15,7 @@ describe('stream copy', function() {
};
const sql = 'COPY dummy_table FROM STDIN';
const streamCopy = new StreamCopy(sql, userDbParams);
assert.equal(streamCopy.pg.dbopts.port, global.settings.db_batch_port);
assert.equal(streamCopy.dbParams.port, global.settings.db_batch_port);
done();
});
});