CartoDB-SQL-API/app/controllers/app.js
2015-12-03 15:00:35 +01:00

648 lines
23 KiB
JavaScript
Executable File

// CartoDB SQL API
//
// all requests expect the following URL args:
// - `sql` {String} SQL to execute
//
// for private (read/write) queries:
// - OAuth. Must have proper OAuth 1.1 headers. For OAuth 1.1 spec see Google
//
// eg. /api/v1/?sql=SELECT 1 as one (with a load of OAuth headers or URL arguments)
//
// for public (read only) queries:
// - sql only, provided the subdomain exists in CartoDB and the table's sharing options are public
//
// eg. vizzuality.cartodb.com/api/v1/?sql=SELECT * from my_table
//
//
var express = require('express');
var path = require('path');
var step = require('step');
var crypto = require('crypto');
var os = require('os');
var Profiler = require('step-profiler');
var StatsD = require('node-statsd').StatsD;
var PSQL = require('cartodb-psql');
var _ = require('underscore');
var LRU = require('lru-cache');
var assert = require('assert');
var CdbRequest = require('../models/cartodb_request');
var AuthApi = require('../auth/auth_api');
var formats = require('../models/formats');
var HealthCheck = require('../monitoring/health_check');
var PgErrorHandler = require('../postgresql/error_handler');
process.env.PGAPPNAME = process.env.PGAPPNAME || 'cartodb_sqlapi';
// jshint ignore:start
function pad(n) { return n < 10 ? '0' + n : n; }
Date.prototype.toJSON = function() {
var s = this.getFullYear() + '-' + pad(this.getMonth() + 1) + '-' + pad(this.getDate()) + 'T' +
pad(this.getHours()) + ':' + pad(this.getMinutes()) + ':' + pad(this.getSeconds());
var offset = this.getTimezoneOffset();
if (offset === 0) {
s += 'Z';
} else {
s += ( offset < 0 ? '+' : '-' ) + pad(Math.abs(offset / 60)) + pad(Math.abs(offset % 60));
}
return s;
};
// jshint ignore:end
// jshint maxcomplexity:21
function App() {
var app = express.createServer();
var metadataBackend = require('cartodb-redis')({
host: global.settings.redis_host,
port: global.settings.redis_port,
max: global.settings.redisPool,
idleTimeoutMillis: global.settings.redisIdleTimeoutMillis,
reapIntervalMillis: global.settings.redisReapIntervalMillis
});
var cdbReq = new CdbRequest();
// Set default configuration
global.settings.db_pubuser = global.settings.db_pubuser || "publicuser";
global.settings.bufferedRows = global.settings.bufferedRows || 1000;
var tableCache = LRU({
// store no more than these many items in the cache
max: global.settings.tableCacheMax || 8192,
// consider entries expired after these many milliseconds (10 minutes by default)
maxAge: global.settings.tableCacheMaxAge || 1000*60*10
});
// Size based on https://github.com/CartoDB/cartodb.js/blob/3.15.2/src/geo/layer_definition.js#L72
var SQL_QUERY_BODY_LOG_MAX_LENGTH = 2000;
app.getSqlQueryFromRequestBody = function(req) {
var sqlQuery = req.body && req.body.q;
if (!sqlQuery) {
return '';
}
if (sqlQuery.length > SQL_QUERY_BODY_LOG_MAX_LENGTH) {
sqlQuery = sqlQuery.substring(0, SQL_QUERY_BODY_LOG_MAX_LENGTH) + ' [...]';
}
return JSON.stringify({q: sqlQuery});
};
if ( global.log4js ) {
var loggerOpts = {
buffer: true,
// log4js provides a tokens solution as expess but in does not provide the request/response in the callback.
// Thus it is not possible to extract relevant information from them.
// This is a workaround to be able to access request/response.
format: function(req, res, format) {
var logFormat = global.settings.log_format ||
':remote-addr :method :req[Host]:url :status :response-time ms -> :res[Content-Type]';
logFormat = logFormat.replace(/:sql/, app.getSqlQueryFromRequestBody(req));
return format(logFormat);
}
};
app.use(global.log4js.connectLogger(global.log4js.getLogger(), _.defaults(loggerOpts, {level:'info'})));
} else {
// Express logger uses tokens as described here: http://www.senchalabs.org/connect/logger.html
express.logger.token('sql', function(req) {
return app.getSqlQueryFromRequestBody(req);
});
app.use(express.logger({
buffer: true,
format: global.settings.log_format ||
':remote-addr :method :req[Host]:url :status :response-time ms -> :res[Content-Type]'
}));
}
// Initialize statsD client if requested
var statsd_client;
if ( global.settings.statsd ) {
// Perform keyword substitution in statsd
if ( global.settings.statsd.prefix ) {
var host_token = os.hostname().split('.').reverse().join('.');
global.settings.statsd.prefix = global.settings.statsd.prefix.replace(/:host/, host_token);
}
statsd_client = new StatsD(global.settings.statsd);
statsd_client.last_error = { msg:'', count:0 };
statsd_client.socket.on('error', function(err) {
var last_err = statsd_client.last_error;
var last_msg = last_err.msg;
var this_msg = ''+err;
if ( this_msg !== last_msg ) {
console.error("statsd client socket error: " + err);
statsd_client.last_error.count = 1;
statsd_client.last_error.msg = this_msg;
} else {
++last_err.count;
if ( ! last_err.interval ) {
//console.log("Installing interval");
statsd_client.last_error.interval = setInterval(function() {
var count = statsd_client.last_error.count;
if ( count > 1 ) {
console.error("last statsd client socket error repeated " + count + " times");
statsd_client.last_error.count = 1;
//console.log("Clearing interval");
clearInterval(statsd_client.last_error.interval);
statsd_client.last_error.interval = null;
}
}, 1000);
}
}
});
}
// Use step-profiler
if ( global.settings.useProfiler ) {
app.use(function(req, res, next) {
req.profiler = new Profiler({statsd_client:statsd_client});
next();
});
}
// Set connection timeout
if ( global.settings.hasOwnProperty('node_socket_timeout') ) {
var timeout = parseInt(global.settings.node_socket_timeout);
app.use(function(req, res, next) {
req.connection.setTimeout(timeout);
next();
});
}
// Version extracting function
// function getVersion() {
// var version = {};
// version.cartodb_sql_api = require(__dirname + '/../../package.json').version;
// return version;
// }
app.use(express.bodyParser());
app.enable('jsonp callback');
app.set("trust proxy", true);
// basic routing
app.options('*', function(req,res) { setCrossDomain(res); res.end(); });
app.all(global.settings.base_url + '/sql', handleQuery);
app.all(global.settings.base_url + '/sql.:f', handleQuery);
var CacheStatusController = require('./cache_status_controller');
var cacheStatusController = new CacheStatusController(tableCache);
cacheStatusController.register(app);
var HealthCheckController = require('./health_check_controller');
var healthCheckController = new HealthCheckController();
healthCheckController.register(app);
var VersionController = require('./version_controller');
var versionController = new VersionController();
versionController.register(app);
var sqlQueryMayWriteRegex = new RegExp("\\b(alter|insert|update|delete|create|drop|reindex|truncate|refresh)\\b", "i");
/**
* This is a fuzzy check, the return could be true even if the query doesn't really write anything. But you can be
* pretty sure of a false return.
*
* @param sql The SQL statement to check against
* @returns {boolean} Return true of the given query may write to the database
*/
function queryMayWrite(sql) {
return sqlQueryMayWriteRegex.test(sql);
}
function sanitize_filename(filename) {
filename = path.basename(filename, path.extname(filename));
filename = filename.replace(/[;()\[\]<>'"\s]/g, '_');
//console.log("Sanitized: " + filename);
return filename;
}
function handleQuery(req, res) {
// extract input
var body = (req.body) ? req.body : {};
var params = _.extend({}, req.query, body); // clone so don't modify req.params or req.body so oauth is not broken
var sql = params.q;
var limit = parseInt(params.rows_per_page);
var offset = parseInt(params.page);
var orderBy = params.order_by;
var sortOrder = params.sort_order;
var requestedFormat = params.format;
var format = _.isArray(requestedFormat) ? _.last(requestedFormat) : requestedFormat;
var requestedFilename = params.filename;
var filename = requestedFilename;
var requestedSkipfields = params.skipfields;
var cdbUsername = cdbReq.userByReq(req);
var skipfields;
var dp = params.dp; // decimal point digits (defaults to 6)
var gn = "the_geom"; // TODO: read from configuration file
var tableCacheItem;
if ( req.profiler ) {
req.profiler.start('sqlapi.query');
}
req.aborted = false;
req.on("close", function() {
if (req.formatter && _.isFunction(req.formatter.cancel)) {
req.formatter.cancel();
}
req.aborted = true; // TODO: there must be a builtin way to check this
});
function checkAborted(step) {
if ( req.aborted ) {
var err = new Error("Request aborted during " + step);
// We'll use status 499, same as ngnix in these cases
// see http://en.wikipedia.org/wiki/List_of_HTTP_status_codes#4xx_Client_Error
err.http_status = 499;
throw err;
}
}
try {
// sanitize and apply defaults to input
dp = (dp === "" || _.isUndefined(dp)) ? '6' : dp;
format = (format === "" || _.isUndefined(format)) ? 'json' : format.toLowerCase();
filename = (filename === "" || _.isUndefined(filename)) ? 'cartodb-query' : sanitize_filename(filename);
sql = (sql === "" || _.isUndefined(sql)) ? null : sql;
limit = (!_.isNaN(limit)) ? limit : null;
offset = (!_.isNaN(offset)) ? offset * limit : null;
// Accept both comma-separated string or array of comma-separated strings
if ( requestedSkipfields ) {
if ( _.isString(requestedSkipfields) ) {
skipfields = requestedSkipfields.split(',');
} else if ( _.isArray(requestedSkipfields) ) {
skipfields = [];
_.each(requestedSkipfields, function(ele) {
skipfields = skipfields.concat(ele.split(','));
});
}
} else {
skipfields = [];
}
//if ( -1 === supportedFormats.indexOf(format) )
if ( ! formats.hasOwnProperty(format) ) {
throw new Error("Invalid format: " + format);
}
if (!_.isString(sql)) {
throw new Error("You must indicate a sql query");
}
// initialise MD5 key of sql for cache lookups
var sql_md5 = generateMD5(sql);
// placeholder for connection
var pg;
// Database options
var dbopts = {
port: global.settings.db_port,
pass: global.settings.db_pubuser_pass
};
var authenticated = false;
var formatter;
var authApi = new AuthApi(req, params),
dbParams;
if ( req.profiler ) {
req.profiler.done('init');
}
// 1. Get database from redis via the username stored in the host header subdomain
// 2. Run the request through OAuth to get R/W user id if signed
// 3. Get the list of tables affected by the query
// 4. Setup headers
// 5. Send formatted results back
step(
function getDatabaseConnectionParams() {
checkAborted('getDatabaseConnectionParams');
// If the request is providing credentials it may require every DB parameters
if (authApi.hasCredentials()) {
metadataBackend.getAllUserDBParams(cdbUsername, this);
} else {
metadataBackend.getUserDBPublicConnectionParams(cdbUsername, this);
}
},
function authenticate(err, userDBParams) {
if (err) {
err.http_status = 404;
err.message = "Sorry, we can't find CartoDB user '" + cdbUsername + "'. " +
"Please check that you have entered the correct domain.";
throw err;
}
if ( req.profiler ) {
req.profiler.done('getDBParams');
}
dbParams = userDBParams;
dbopts.host = dbParams.dbhost;
dbopts.dbname = dbParams.dbname;
dbopts.user = (!!dbParams.dbpublicuser) ? dbParams.dbpublicuser : global.settings.db_pubuser;
authApi.verifyCredentials({
metadataBackend: metadataBackend,
apiKey: dbParams.apikey
}, this);
},
function setDBAuth(err, isAuthenticated) {
if (err) {
throw err;
}
if ( req.profiler ) {
req.profiler.done('authenticate');
}
if (_.isBoolean(isAuthenticated) && isAuthenticated) {
authenticated = isAuthenticated;
dbopts.user = _.template(global.settings.db_user, {user_id: dbParams.dbuser});
if ( global.settings.hasOwnProperty('db_user_pass') ) {
dbopts.pass = _.template(global.settings.db_user_pass, {
user_id: dbParams.dbuser,
user_password: dbParams.dbpass
});
} else {
delete dbopts.pass;
}
}
return null;
},
function queryExplain(err){
var self = this;
assert.ifError(err);
if ( req.profiler ) {
req.profiler.done('setDBAuth');
}
checkAborted('queryExplain');
pg = new PSQL(dbopts, {}, { destroyOnError: true });
// get all the tables from Cache or SQL
tableCacheItem = tableCache.get(sql_md5);
if (tableCacheItem) {
tableCacheItem.hits++;
return false;
} else {
var affectedTablesAndLastUpdatedTimeQuery = [
'WITH querytables AS (',
'SELECT * FROM CDB_QueryTablesText($quotesql$' + sql + '$quotesql$) as tablenames',
')',
'SELECT (SELECT tablenames FROM querytables), EXTRACT(EPOCH FROM max(updated_at)) as max',
'FROM CDB_TableMetadata m',
'WHERE m.tabname = any ((SELECT tablenames from querytables)::regclass[])'
].join(' ');
pg.query(affectedTablesAndLastUpdatedTimeQuery, function (err, resultSet) {
var tableNames = [];
var lastUpdatedTime = Date.now();
if (!err && resultSet.rowCount === 1) {
var result = resultSet.rows[0];
// This is an Array, so no need to split into parts
tableNames = result.tablenames;
if (Number.isFinite(result.max)) {
lastUpdatedTime = result.max * 1000;
}
} else {
var errorMessage = (err && err.message) || 'unknown error';
console.error("Error on query explain '%s': %s", sql, errorMessage);
}
return self(null, {
affectedTables: tableNames,
lastUpdatedTime: lastUpdatedTime
});
});
}
},
function setHeaders(err, result) {
assert.ifError(err);
if ( req.profiler ) {
req.profiler.done('queryExplain');
}
checkAborted('setHeaders');
// store explain result in local Cache
if ( ! tableCacheItem && result && result.affectedTables ) {
tableCacheItem = {
affected_tables: result.affectedTables,
last_modified: result.lastUpdatedTime,
// check if query may possibly write
may_write: queryMayWrite(sql),
// initialise hit counter
hits: 1
};
tableCache.set(sql_md5, tableCacheItem);
}
if ( !authenticated && tableCacheItem ) {
var affected_tables = tableCacheItem.affected_tables;
for ( var i = 0; i < affected_tables.length; ++i ) {
var t = affected_tables[i];
if ( t.match(/\bpg_/) ) {
var e = new SyntaxError("system tables are forbidden");
e.http_status = 403;
throw(e);
}
}
}
var FormatClass = formats[format];
formatter = new FormatClass();
req.formatter = formatter;
// configure headers for given format
var use_inline = !requestedFormat && !requestedFilename;
res.header("Content-Disposition", getContentDisposition(formatter, filename, use_inline));
res.header("Content-Type", formatter.getContentType());
// allow cross site post
setCrossDomain(res);
// set cache headers
var ttl = 31536000; // 1 year time to live by default
var cache_policy = req.query.cache_policy;
if ( cache_policy === 'persist' ) {
res.header('Cache-Control', 'public,max-age=' + ttl);
} else {
if ( ! tableCacheItem || tableCacheItem.may_write ) {
// Tell clients this response is already expired
// TODO: prevent cache_policy from overriding this ?
ttl = 0;
}
res.header('Cache-Control', 'no-cache,max-age='+ttl+',must-revalidate,public');
}
// Only set an X-Cache-Channel for responses we want Varnish to cache.
if ( tableCacheItem && tableCacheItem.affected_tables.length > 0 && !tableCacheItem.may_write ) {
res.header('X-Cache-Channel', generateCacheKey(dbopts.dbname, tableCacheItem, authenticated));
}
var lastModified = (tableCacheItem && tableCacheItem.last_modified) ?
tableCacheItem.last_modified :
Date.now();
res.header('Last-Modified', new Date(lastModified).toUTCString());
return null;
},
function generateFormat(err){
assert.ifError(err);
checkAborted('generateFormat');
// TODO: drop this, fix UI!
sql = new PSQL.QueryWrapper(sql).orderBy(orderBy, sortOrder).window(limit, offset).query();
var opts = {
username: cdbUsername,
dbopts: dbopts,
sink: res,
gn: gn,
dp: dp,
skipfields: skipfields,
sql: sql,
filename: filename,
bufferedRows: global.settings.bufferedRows,
callback: params.callback,
abortChecker: checkAborted
};
if ( req.profiler ) {
opts.profiler = req.profiler;
opts.beforeSink = function() {
req.profiler.done('beforeSink');
res.header('X-SQLAPI-Profiler', req.profiler.toJSONString());
};
}
if (global.settings.api_hostname) {
res.header('X-Served-By-Host', global.settings.api_hostname);
}
if (dbopts.host) {
res.header('X-Served-By-DB-Host', dbopts.host);
}
formatter.sendResponse(opts, this);
},
function errorHandle(err){
formatter = null;
if ( err ) {
handleException(err, res);
}
if ( req.profiler ) {
req.profiler.sendStats(); // TODO: do on nextTick ?
}
if (statsd_client) {
if ( err ) {
statsd_client.increment('sqlapi.query.error');
} else {
statsd_client.increment('sqlapi.query.success');
}
}
}
);
} catch (err) {
handleException(err, res);
if (statsd_client) {
statsd_client.increment('sqlapi.query.error');
}
}
}
function getContentDisposition(formatter, filename, inline) {
var ext = formatter.getFileExtension();
var time = new Date().toUTCString();
return ( inline ? 'inline' : 'attachment' ) + '; filename=' + filename + '.' + ext + '; ' +
'modification-date="' + time + '";';
}
function setCrossDomain(res){
res.header("Access-Control-Allow-Origin", "*");
res.header("Access-Control-Allow-Headers", "X-Requested-With, X-Prototype-Version, X-CSRF-Token");
}
function generateCacheKey(database, query_info, is_authenticated){
if ( ! query_info || ( is_authenticated && query_info.may_write ) ) {
return "NONE";
} else {
return database + ":" + query_info.affected_tables.join(',');
}
}
function generateMD5(data){
var hash = crypto.createHash('md5');
hash.update(data);
return hash.digest('hex');
}
function handleException(err, res) {
var pgErrorHandler = new PgErrorHandler(err);
var msg = {
error: [pgErrorHandler.getMessage()]
};
_.defaults(msg, pgErrorHandler.getFields());
if (global.settings.environment === 'development') {
msg.stack = err.stack;
}
if (global.settings.environment !== 'test'){
// TODO: email this Exception report
console.error("EXCEPTION REPORT: " + err.stack);
}
// allow cross site post
setCrossDomain(res);
// Force inline content disposition
res.header("Content-Disposition", 'inline');
if ( res.req && res.req.profiler ) {
res.req.profiler.done('finish');
res.header('X-SQLAPI-Profiler', res.req.profiler.toJSONString());
}
res.send(msg, getStatusError(pgErrorHandler, res.req));
if ( res.req && res.req.profiler ) {
res.req.profiler.sendStats();
}
}
function getStatusError(pgErrorHandler, req) {
var statusError = pgErrorHandler.getStatus();
// JSONP has to return 200 status error
if (req && req.query && req.query.callback) {
statusError = 200;
}
return statusError;
}
return app;
}
module.exports = App;