Merge pull request #589 from CartoDB/improve-copy-error-handling

Improve copy error handling
This commit is contained in:
Daniel G. Aubert 2019-05-30 13:25:37 +02:00 committed by GitHub
commit d9f668048c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 246 additions and 45 deletions

View File

@ -11,6 +11,7 @@ const { RATE_LIMIT_ENDPOINTS_GROUPS } = rateLimitsMiddleware;
const errorHandlerFactory = require('../services/error_handler_factory'); const errorHandlerFactory = require('../services/error_handler_factory');
const StreamCopy = require('../services/stream_copy'); const StreamCopy = require('../services/stream_copy');
const StreamCopyMetrics = require('../services/stream_copy_metrics'); const StreamCopyMetrics = require('../services/stream_copy_metrics');
const Throttler = require('../services/throttler-stream');
const zlib = require('zlib'); const zlib = require('zlib');
const { PassThrough } = require('stream'); const { PassThrough } = require('stream');
const handleQueryMiddleware = require('../middlewares/handle-query'); const handleQueryMiddleware = require('../middlewares/handle-query');
@ -86,20 +87,13 @@ function handleCopyTo (logger) {
.on('data', data => metrics.addSize(data.length)) .on('data', data => metrics.addSize(data.length))
.on('error', err => { .on('error', err => {
metrics.end(null, err); metrics.end(null, err);
pgstream.unpipe(res);
return next(err); return next(err);
}) })
.on('end', () => metrics.end( streamCopy.getRowCount(StreamCopy.ACTION_TO) )) .on('end', () => metrics.end(streamCopy.getRowCount()))
.pipe(res) .pipe(res)
.on('close', () => { .on('close', () => pgstream.emit('error', new Error('Connection closed by client')))
const err = new Error('Connection closed by client'); .on('error', err => pgstream.emit('error', err));
pgstream.emit('cancelQuery');
pgstream.emit('error', err);
})
.on('error', err => {
pgstream.emit('error', err);
});
}); });
}; };
} }
@ -120,27 +114,25 @@ function handleCopyFrom (logger) {
return next(err); return next(err);
} }
const throttle = new Throttler(pgstream);
req req
.on('data', data => isGzip ? metrics.addGzipSize(data.length) : undefined) .on('data', data => isGzip ? metrics.addGzipSize(data.length) : undefined)
.on('error', err => { .on('error', err => {
metrics.end(null, err); metrics.end(null, err);
pgstream.emit('error', err); pgstream.emit('error', err);
}) })
.on('close', () => { .on('close', () => pgstream.emit('error', new Error('Connection closed by client')))
pgstream.emit('cancelQuery'); .pipe(throttle)
pgstream.emit('error', new Error('Connection closed by client')); .pipe(decompress)
})
.pipe(decompress)
.on('data', data => { .on('data', data => {
metrics.addSize(data.length); metrics.addSize(data.length);
if(metrics.size > dbRemainingQuota) { if(metrics.size > dbRemainingQuota) {
pgstream.emit('cancelQuery');
return pgstream.emit('error', new Error('DB Quota exceeded')); return pgstream.emit('error', new Error('DB Quota exceeded'));
} }
if((metrics.gzipSize || metrics.size) > COPY_FROM_MAX_POST_SIZE) { if((metrics.gzipSize || metrics.size) > COPY_FROM_MAX_POST_SIZE) {
pgstream.emit('cancelQuery');
return pgstream.emit('error', new Error( return pgstream.emit('error', new Error(
`COPY FROM maximum POST size of ${COPY_FROM_MAX_POST_SIZE_PRETTY} exceeded` `COPY FROM maximum POST size of ${COPY_FROM_MAX_POST_SIZE_PRETTY} exceeded`
)); ));
@ -149,18 +141,16 @@ function handleCopyFrom (logger) {
.on('error', err => { .on('error', err => {
err.message = `Error while gunzipping: ${err.message}`; err.message = `Error while gunzipping: ${err.message}`;
metrics.end(null, err); metrics.end(null, err);
pgstream.emit('cancelQuery');
pgstream.emit('error', err); pgstream.emit('error', err);
}) })
.pipe(pgstream) .pipe(pgstream)
.on('error', err => { .on('error', err => {
metrics.end(null, err); metrics.end(null, err);
req.unpipe(decompress);
decompress.unpipe(pgstream);
return next(err); return next(err);
}) })
.on('end', () => { .on('end', () => {
metrics.end( streamCopy.getRowCount(StreamCopy.ACTION_FROM) ); metrics.end(streamCopy.getRowCount());
const { time, rows } = metrics; const { time, rows } = metrics;

View File

@ -8,7 +8,8 @@ const ACTION_TO = 'to';
const ACTION_FROM = 'from'; const ACTION_FROM = 'from';
const DEFAULT_TIMEOUT = "'5h'"; const DEFAULT_TIMEOUT = "'5h'";
const cancelQuery = pid => `SELECT pg_cancel_backend(${pid})`; const cancelQuery = pid => `SELECT pg_cancel_backend(${pid}) as cancelled`;
const terminateQuery = pid => `SELECT pg_terminate_backend(${pid}) as terminated`;
const timeoutQuery = timeout => `SET statement_timeout=${timeout}`; const timeoutQuery = timeout => `SET statement_timeout=${timeout}`;
module.exports = class StreamCopy { module.exports = class StreamCopy {
@ -43,19 +44,21 @@ module.exports = class StreamCopy {
return callback(err); return callback(err);
} }
const streamMaker = action === ACTION_TO ? copyTo : copyFrom; this.clientProcessID = client.processID;
this.stream = streamMaker(this.sql);
this.stream = action === ACTION_TO ? copyTo(this.sql) : copyFrom(this.sql);
const pgstream = client.query(this.stream); const pgstream = client.query(this.stream);
pgstream if (action === ACTION_TO) {
.on('end', () => { pgstream.on('end', () => done());
if(action === ACTION_TO) { pgstream.on('error', () => this._cancel(client.processID, action));
pgstream.connection.stream.resume(); } else if (action === ACTION_FROM) {
} pgstream.on('finish', () => done());
done(); pgstream.on('error', err => client.connection.sendCopyFail(err.message));
}) }
.on('error', err => done(err))
.on('cancelQuery', () => this.cancel(client.processID, action)); pgstream.on('error', err => done(err));
callback(null, pgstream); callback(null, pgstream);
}); });
@ -66,22 +69,36 @@ module.exports = class StreamCopy {
return this.stream.rowCount; return this.stream.rowCount;
} }
cancel (pid, action) { _cancel (pid, action) {
const pg = new PSQL(this.dbParams); const pg = new PSQL(this.dbParams);
const actionType = action === ACTION_TO ? ACTION_TO : ACTION_FROM;
pg.query(cancelQuery(pid), (err, result) => { pg.query(cancelQuery(pid), (err, result) => {
if (err) { if (err) {
return this.logger.error(err); return this.logger.error(err);
} }
const actionType = action === ACTION_TO ? ACTION_TO : ACTION_FROM; const isCancelled = result.rows.length && result.rows[0].cancelled;
const isCancelled = result.rows[0].pg_cancel_backend;
if (!isCancelled) { if (isCancelled) {
return this.logger.error(new Error(`Unable to cancel "copy ${actionType}" stream query (pid: ${pid})`)); return this.logger.info(`Canceled "copy ${actionType}" stream query successfully (pid: ${pid})`);
} }
return this.logger.info(`Canceled "copy ${actionType}" stream query successfully (pid: ${pid})`); return pg.query(terminateQuery(pid), (err, result) => {
if (err) {
return this.logger.error(err);
}
const isTerminated = result.rows.length && result.rows[0].terminated;
if (!isTerminated) {
return this.logger.error(
new Error(`Unable to terminate "copy ${actionType}" stream query (pid: ${pid})`)
);
}
return this.logger.info(`Terminated "copy ${actionType}" stream query successfully (pid: ${pid})`);
});
}); });
} }
}; };

View File

@ -0,0 +1,50 @@
'use strict';
const { Transform } = require('stream');
module.exports = class Throttler extends Transform {
constructor (pgstream, ...args) {
super(...args);
this.pgstream = pgstream;
this.sampleLength = global.settings.copy_from_maximum_slow_input_speed_interval || 15;
this.minimunBytesPerSecondThershold = global.settings.copy_from_minimum_input_speed || 0;
this.byteCount = 0;
this.bytesPerSecondHistory = [];
this._interval = setInterval(this._updateMetrics.bind(this), 1000);
}
_updateMetrics () {
this.bytesPerSecondHistory.push(this.byteCount);
this.byteCount = 0;
if (this.bytesPerSecondHistory > this.sampleLength) {
this.bytesPerSecondHistory.shift();
}
const doesNotReachThreshold = [];
for (const bytesPerSecond of this.bytesPerSecondHistory) {
if (bytesPerSecond <= this.minimunBytesPerSecondThershold) {
doesNotReachThreshold.push(true);
}
}
if (doesNotReachThreshold.length >= this.sampleLength) {
clearInterval(this._interval);
this.pgstream.emit('error', new Error('Connection closed by server: input data too slow'));
}
}
_transform (chunk, encoding, callback) {
this.byteCount += chunk.length;
callback(null, chunk);
}
_flush (callback) {
clearInterval(this._interval);
callback();
}
};

View File

@ -38,6 +38,8 @@ module.exports.batch_log_filename = 'logs/batch-queries.log';
module.exports.copy_timeout = "'5h'"; module.exports.copy_timeout = "'5h'";
module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB; module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB;
module.exports.copy_from_max_post_size_pretty = '2 GB'; module.exports.copy_from_max_post_size_pretty = '2 GB';
module.exports.copy_from_minimum_input_speed = 0; // 1 byte per second
module.exports.copy_from_maximum_slow_input_speed_interval = 15 // 15 seconds
// Max number of queued jobs a user can have at a given time // Max number of queued jobs a user can have at a given time
module.exports.batch_max_queued_jobs = 64; module.exports.batch_max_queued_jobs = 64;
// Capacity strategy to use. // Capacity strategy to use.

View File

@ -39,6 +39,8 @@ module.exports.batch_log_filename = 'logs/batch-queries.log';
module.exports.copy_timeout = "'5h'"; module.exports.copy_timeout = "'5h'";
module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB; module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB;
module.exports.copy_from_max_post_size_pretty = '2 GB'; module.exports.copy_from_max_post_size_pretty = '2 GB';
module.exports.copy_from_minimum_input_speed = 0; // 1 byte per second
module.exports.copy_from_maximum_slow_input_speed_interval = 15 // 15 seconds
// Max number of queued jobs a user can have at a given time // Max number of queued jobs a user can have at a given time
module.exports.batch_max_queued_jobs = 64; module.exports.batch_max_queued_jobs = 64;
// Capacity strategy to use. // Capacity strategy to use.

View File

@ -39,6 +39,8 @@ module.exports.batch_log_filename = 'logs/batch-queries.log';
module.exports.copy_timeout = "'5h'"; module.exports.copy_timeout = "'5h'";
module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB; module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB;
module.exports.copy_from_max_post_size_pretty = '2 GB'; module.exports.copy_from_max_post_size_pretty = '2 GB';
module.exports.copy_from_minimum_input_speed = 0; // 1 byte per second
module.exports.copy_from_maximum_slow_input_speed_interval = 15 // 15 seconds
// Max number of queued jobs a user can have at a given time // Max number of queued jobs a user can have at a given time
module.exports.batch_max_queued_jobs = 64; module.exports.batch_max_queued_jobs = 64;
// Capacity strategy to use. // Capacity strategy to use.

View File

@ -36,6 +36,8 @@ module.exports.batch_log_filename = 'logs/batch-queries.log';
module.exports.copy_timeout = "'5h'"; module.exports.copy_timeout = "'5h'";
module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB; module.exports.copy_from_max_post_size = 2 * 1024 * 1024 * 1024 // 2 GB;
module.exports.copy_from_max_post_size_pretty = '2 GB'; module.exports.copy_from_max_post_size_pretty = '2 GB';
module.exports.copy_from_minimum_input_speed = 0; // 1 byte per second
module.exports.copy_from_maximum_slow_input_speed_interval = 15 // 15 seconds
// Max number of queued jobs a user can have at a given time // Max number of queued jobs a user can have at a given time
module.exports.batch_max_queued_jobs = 64; module.exports.batch_max_queued_jobs = 64;
// Capacity strategy to use. // Capacity strategy to use.

5
package-lock.json generated
View File

@ -2397,9 +2397,8 @@
"integrity": "sha1-2hhHsglA5C7hSSvq9l1J2RskXfc=" "integrity": "sha1-2hhHsglA5C7hSSvq9l1J2RskXfc="
}, },
"pg-copy-streams": { "pg-copy-streams": {
"version": "2.2.0", "version": "github:cartodb/node-pg-copy-streams#be101502d9a7ee116ae3a0b3c94487869f0cada9",
"resolved": "https://registry.npmjs.org/pg-copy-streams/-/pg-copy-streams-2.2.0.tgz", "from": "github:cartodb/node-pg-copy-streams#v2.x-carto"
"integrity": "sha512-w1gy/KAD49vaCCUfqcjh4fy1V4TF6fvtZ+EaRAU0QKtADMxihb+qYRgbWTDbi6Lh6v31CsWC0ru8vCDHofrRQQ=="
}, },
"pg-int8": { "pg-int8": {
"version": "1.0.1", "version": "1.0.1",

View File

@ -34,7 +34,7 @@
"node-statsd": "~0.0.7", "node-statsd": "~0.0.7",
"node-uuid": "^1.4.7", "node-uuid": "^1.4.7",
"oauth-client": "0.3.0", "oauth-client": "0.3.0",
"pg-copy-streams": "^2.2.0", "pg-copy-streams": "github:cartodb/node-pg-copy-streams#v2.x-carto",
"qs": "~6.2.1", "qs": "~6.2.1",
"queue-async": "~1.0.7", "queue-async": "~1.0.7",
"redis-mpool": "0.7.0", "redis-mpool": "0.7.0",

View File

@ -0,0 +1,137 @@
'use strict';
const querystring = require('querystring');
const StatsClient = require('../../app/stats/client');
const statsClient = StatsClient.getInstance(global.settings.statsd);
const server = require('../../app/server')(statsClient);
const request = require('request');
const assert = require('assert');
const { Readable } = require('stream');
const createTableQuery = `CREATE TABLE copy_from_throttle AS (SELECT 0::integer AS counter)`;
const dropTableQuery = `DROP TABLE copy_from_throttle`;
function * counterGenerator (timeout) {
let counter = 0;
while (true) {
yield new Promise(resolve => setTimeout(() => resolve(`${counter++}`), timeout)); // jshint ignore:line
}
}
class CounterStream extends Readable {
constructor(generator, ...args) {
super(...args);
this.generator = generator;
}
_read () {
const res = this.generator.next();
res.value.then(value => res.done ? this.push(null) : this.push(value));
}
}
describe('COPY FROM throttle', function () {
before(function() {
this.copy_from_minimum_input_speed = global.settings.copy_from_minimum_input_speed;
global.settings.copy_from_minimum_input_speed = 1;
this.copy_from_maximum_slow_input_speed_interval = global.settings.copy_from_maximum_slow_input_speed_interval;
global.settings.copy_from_maximum_slow_input_speed_interval = 1;
});
after(function() {
global.settings.copy_from_first_chunk_timeout = this.copy_from_first_chunk_timeout;
global.settings.copy_from_maximum_slow_input_speed_interval = this.copy_from_maximum_slow_input_speed_interval;
});
beforeEach(function (done) {
this.listener = server.listen(0, '127.0.0.1');
this.listener.on('error', done);
this.listener.on('listening', () => {
const { address, port } = this.listener.address();
this.host = address;
this.port = port;
done();
});
});
beforeEach(function (done) {
const { host, port } = this;
const createTable = querystring.stringify({ q: createTableQuery, api_key: 1234});
const createTableOptions = {
url: `http://${host}:${port}/api/v1/sql?${createTable}`,
headers: { host: 'vizzuality.cartodb.com' },
method: 'GET'
};
request(createTableOptions, function (err, res) {
if (err) {
return done(err);
}
assert.equal(res.statusCode, 200);
done();
});
});
afterEach(function (done) {
const { host, port } = this;
const dropTable = querystring.stringify({ q: dropTableQuery, api_key: 1234 });
const dropTableOptions = {
url: `http://${host}:${port}/api/v1/sql?${dropTable}`,
headers: { host: 'vizzuality.cartodb.com' },
method: 'GET'
};
request(dropTableOptions, function (err) {
if (err) {
return done(err);
}
done();
});
});
afterEach(function (done) {
this.listener.close(done);
});
it('hang while sendind data', function (done) {
const { host, port } = this;
const copy = querystring.stringify({
q: "COPY copy_from_throttle (counter) FROM STDIN WITH (FORMAT CSV, DELIMITER ',')",
api_key: 1234
});
const options = {
url: `http://${host}:${port}/api/v1/sql/copyfrom?${copy}`,
headers: { host: 'vizzuality.cartodb.com' },
body: new CounterStream(counterGenerator(1000)),
method: 'POST'
};
request(options, (err, res, body) => {
if (err) {
return done(err);
}
assert.equal(res.statusCode, 400);
body = JSON.parse(body);
assert.deepEqual(body, { error: ["Connection closed by server: input data too slow"] });
done();
});
});
});