2011-08-24 04:42:27 +08:00
// CartoDB SQL API
2011-06-13 11:23:02 +08:00
// all requests expect the following URL args:
// - `sql` {String} SQL to execute
// for private (read/write) queries:
2011-08-24 04:42:27 +08:00
// - OAuth. Must have proper OAuth 1.1 headers. For OAuth 1.1 spec see Google
2011-06-13 11:23:02 +08:00
2011-08-24 04:42:27 +08:00
// eg. /api/v1/?sql=SELECT 1 as one (with a load of OAuth headers or URL arguments)
2011-06-13 11:23:02 +08:00
// for public (read only) queries:
2011-08-24 04:42:27 +08:00
// - sql only, provided the subdomain exists in CartoDB and the table's sharing options are public
2011-06-13 11:23:02 +08:00
2011-08-24 04:42:27 +08:00
// eg. vizzuality.cartodb.com/api/v1/?sql=SELECT * from my_table
2012-07-24 16:29:47 +08:00
2012-11-12 19:37:34 +08:00
2014-01-31 17:55:30 +08:00
function App() {
2012-11-12 19:37:34 +08:00
var path = require('path');
2012-04-13 22:37:09 +08:00
var express = require('express')
2014-02-11 00:34:27 +08:00
, app = express.createServer()
2012-05-01 23:46:30 +08:00
, Step = require('step')
, crypto = require('crypto')
2012-10-15 16:13:39 +08:00
, fs = require('fs')
, zlib = require('zlib')
2012-10-25 18:38:45 +08:00
, util = require('util')
2012-10-15 16:13:39 +08:00
, spawn = require('child_process').spawn
2013-11-16 01:36:49 +08:00
, Meta = require('cartodb-redis')({
host: global.settings.redis_host,
port: global.settings.redis_port
// global.settings.app_root + '/app/models/metadata')
2012-05-01 23:46:30 +08:00
, oAuth = require(global.settings.app_root + '/app/models/oauth')
, PSQL = require(global.settings.app_root + '/app/models/psql')
2013-12-18 18:57:46 +08:00
, CdbRequest = require(global.settings.app_root + '/app/models/cartodb_request')
2012-05-01 23:46:30 +08:00
, ApiKeyAuth = require(global.settings.app_root + '/app/models/apikey_auth')
, _ = require('underscore')
2013-02-14 01:57:14 +08:00
, LRU = require('lru-cache')
2013-05-27 17:21:56 +08:00
, formats = require(global.settings.app_root + '/app/models/formats')
2013-02-14 01:57:14 +08:00
2013-12-18 18:57:46 +08:00
var cdbReq = new CdbRequest(Meta);
var apiKeyAuth = new ApiKeyAuth(Meta, cdbReq);
2013-11-06 00:49:10 +08:00
// Set default configuration
global.settings.db_pubuser = global.settings.db_pubuser || "publicuser";
2013-02-14 01:57:14 +08:00
var tableCache = LRU({
// store no more than these many items in the cache
max: global.settings.tableCacheMax || 8192,
// consider entries expired after these many milliseconds (10 minutes by default)
maxAge: global.settings.tableCacheMaxAge || 1000*60*10
2011-08-24 04:42:27 +08:00
2013-06-19 18:24:04 +08:00
function pad(n) { return n < 10 ? '0' + n : n }
Date.prototype.toJSON = function() {
var s = this.getFullYear() + '-'
+ pad(this.getMonth() + 1) + '-'
+ pad(this.getDate()) + 'T'
+ pad(this.getHours()) + ':'
+ pad(this.getMinutes()) + ':'
+ pad(this.getSeconds());
var offset = this.getTimezoneOffset();
if (offset == 0) s += 'Z';
else {
s += ( offset < 0 ? '+' : '-' )
+ pad(Math.abs(offset / 60))
+ pad(Math.abs(offset % 60))
return s;
2014-03-13 18:52:40 +08:00
var loggerOpts = {
buffer: true,
format: global.settings.log_format ||
':req[X-Real-IP] :method :req[Host]:url :status :response-time ms -> :res[Content-Type]'
if ( global.log4js ) {
app.use(log4js.connectLogger(log4js.getLogger(), _.defaults(loggerOpts, {level:'auto'})));
} else {
2014-02-11 00:34:27 +08:00
2014-01-31 17:55:30 +08:00
// Set connection timeout
if ( global.settings.hasOwnProperty('node_socket_timeout') ) {
var timeout = parseInt(global.settings.node_socket_timeout);
app.use(function(req, res, next) {
2014-03-13 20:40:56 +08:00
// Version extracting function
function getVersion() {
var version = {};
version.cartodb_sql_api = require(__dirname + '/../../package.json').version;
return version;
2011-09-07 19:05:10 +08:00
2011-08-24 04:42:27 +08:00
app.enable('jsonp callback');
2013-05-30 17:56:43 +08:00
app.set("trust proxy", true);
2011-09-07 19:05:10 +08:00
2012-05-01 23:46:30 +08:00
// basic routing
2013-05-24 20:21:13 +08:00
app.options('*', function(req,res) { setCrossDomain(res); res.end(); });
2013-05-23 17:49:23 +08:00
app.all(global.settings.base_url+'/sql', function(req, res) { handleQuery(req, res) } );
app.all(global.settings.base_url+'/sql.:f', function(req, res) { handleQuery(req, res) } );
app.get(global.settings.base_url+'/cachestatus', function(req, res) { handleCacheStatus(req, res) } );
2014-03-13 20:40:56 +08:00
app.get(global.settings.base_url+'/version', function(req, res) {
2012-05-01 23:46:30 +08:00
2012-10-15 19:20:37 +08:00
// Return true of the given query may write to the database
// NOTE: this is a fuzzy check, the return could be true even
// if the query doesn't really write anything.
// But you can be pretty sure of a false return.
function queryMayWrite(sql) {
2012-11-13 23:47:04 +08:00
var mayWrite = false;
2013-07-25 00:43:38 +08:00
var pattern = RegExp("\\b(alter|insert|update|delete|create|drop|reindex|truncate)\\b", "i");
2012-10-15 19:20:37 +08:00
if ( pattern.test(sql) ) {
mayWrite = true;
return mayWrite;
2012-11-12 19:37:34 +08:00
function sanitize_filename(filename) {
filename = path.basename(filename, path.extname(filename));
filename = filename.replace(/[;()\[\]<>'"\s]/g, '_');
//console.log("Sanitized: " + filename);
return filename;
2012-10-18 17:33:35 +08:00
2012-05-01 23:46:30 +08:00
// request handlers
2012-06-30 07:54:53 +08:00
function handleQuery(req, res) {
2011-07-04 23:28:39 +08:00
2012-06-07 02:47:21 +08:00
// extract input
2011-09-07 19:05:10 +08:00
var body = (req.body) ? req.body : {};
2013-07-15 20:39:15 +08:00
var params = _.extend({}, req.query, body); // clone so don't modify req.params or req.body so oauth is not broken
var sql = params.q;
var api_key = params.api_key;
var database = params.database; // TODO: Deprecate
var limit = parseInt(params.rows_per_page);
var offset = parseInt(params.page);
var requestedFormat = params.format;
2012-11-13 23:47:04 +08:00
var format = _.isArray(requestedFormat) ? _.last(requestedFormat) : requestedFormat;
2013-07-15 20:39:15 +08:00
var requestedFilename = params.filename;
var cache_policy = params.cache_policy;
2012-11-13 23:47:04 +08:00
var filename = requestedFilename;
2013-07-15 20:39:15 +08:00
var requestedSkipfields = params.skipfields;
2013-05-06 18:30:32 +08:00
var skipfields;
2013-07-15 20:39:15 +08:00
var dp = params.dp; // decimal point digits (defaults to 6)
2012-11-13 23:47:04 +08:00
var gn = "the_geom"; // TODO: read from configuration file
2012-10-15 16:13:39 +08:00
var user_id;
2013-02-14 01:57:14 +08:00
var tableCacheItem;
2013-05-15 00:01:37 +08:00
var requestProtocol = req.protocol;
2012-09-11 18:22:27 +08:00
2011-08-24 04:42:27 +08:00
try {
2012-10-12 18:17:35 +08:00
2013-05-06 18:30:32 +08:00
// sanitize and apply defaults to input
dp = (dp === "" || _.isUndefined(dp)) ? '6' : dp;
format = (format === "" || _.isUndefined(format)) ? 'json' : format.toLowerCase();
filename = (filename === "" || _.isUndefined(filename)) ? 'cartodb-query' : sanitize_filename(filename);
sql = (sql === "" || _.isUndefined(sql)) ? null : sql;
database = (database === "" || _.isUndefined(database)) ? null : database;
2013-05-23 19:21:23 +08:00
limit = (!_.isNaN(limit)) ? limit : null;
offset = (!_.isNaN(offset)) ? offset * limit : null;
2013-05-06 18:30:32 +08:00
// Accept both comma-separated string or array of comma-separated strings
if ( requestedSkipfields ) {
if ( _.isString(requestedSkipfields) ) skipfields = requestedSkipfields.split(',');
else if ( _.isArray(requestedSkipfields) ) {
skipfields = [];
_.each(requestedSkipfields, function(ele) {
skipfields = skipfields.concat(ele.split(','));
} else {
skipfields = [];
2013-05-17 17:44:50 +08:00
//if ( -1 === supportedFormats.indexOf(format) )
if ( ! formats.hasOwnProperty(format) )
2012-10-12 18:17:35 +08:00
throw new Error("Invalid format: " + format);
2011-08-24 04:42:27 +08:00
if (!_.isString(sql)) throw new Error("You must indicate a sql query");
2012-05-08 22:28:22 +08:00
// initialise MD5 key of sql for cache lookups
var sql_md5 = generateMD5(sql);
// placeholder for connection
2012-05-08 22:25:19 +08:00
var pg;
2011-08-24 04:42:27 +08:00
2013-11-18 18:42:43 +08:00
// Database options
2013-11-18 19:21:30 +08:00
var dbopts = {
2013-11-18 20:31:11 +08:00
port: global.settings.db_port,
pass: global.settings.db_pubuser_pass
2013-11-18 19:21:30 +08:00
2013-11-18 18:42:43 +08:00
2012-10-15 19:20:37 +08:00
var authenticated;
2013-05-27 17:21:56 +08:00
var formatter;
2013-12-18 18:57:46 +08:00
var cdbuser = cdbReq.userByReq(req);
2011-08-24 04:42:27 +08:00
// 1. Get database from redis via the username stored in the host header subdomain
// 2. Run the request through OAuth to get R/W user id if signed
2012-04-21 02:34:18 +08:00
// 3. Get the list of tables affected by the query
2013-05-27 17:21:56 +08:00
// 4. Setup headers
// 5. Send formatted results back
2011-08-24 04:42:27 +08:00
2012-06-30 07:54:53 +08:00
function getDatabaseName() {
if (_.isNull(database)) {
2013-12-18 18:57:46 +08:00
Meta.getUserDBName(cdbuser, this);
2012-06-30 07:54:53 +08:00
} else {
// database hardcoded in query string (deprecated??): don't use redis
return database;
2011-08-24 04:42:27 +08:00
function setDBGetUser(err, data) {
2013-11-16 01:36:49 +08:00
if (err) {
// If the database could not be found, the user is non-existant
if ( err.message.match('missing') ) {
2013-12-18 19:02:51 +08:00
err.message = "Sorry, we can't find CartoDB user '" + cdbuser
+ "'. Please check that you have entered the correct domain.";
2012-06-07 02:47:21 +08:00
err.http_status = 404;
2013-11-16 01:36:49 +08:00
throw err;
2012-05-12 04:46:45 +08:00
2012-05-08 22:25:19 +08:00
2013-11-16 01:36:49 +08:00
database = (data === "" || _.isNull(data) || _.isUndefined(data)) ? database : data;
2013-11-18 19:21:30 +08:00
dbopts.dbname = database;
2013-11-16 01:36:49 +08:00
2011-12-27 02:16:41 +08:00
if(api_key) {
2013-12-18 18:57:46 +08:00
apiKeyAuth.verifyRequest(req, this);
2011-12-27 02:16:41 +08:00
} else {
2013-05-15 00:01:37 +08:00
oAuth.verifyRequest(req, this, requestProtocol);
2011-12-27 02:16:41 +08:00
2011-08-24 04:42:27 +08:00
2013-11-18 19:21:30 +08:00
function setUserGetDBHost(err, data){
2011-08-24 04:42:27 +08:00
if (err) throw err;
2013-11-18 20:31:11 +08:00
user_id = data;
authenticated = ! _.isNull(user_id);
2013-11-18 18:42:43 +08:00
var dbuser = user_id ?
_.template(global.settings.db_user, {user_id: user_id})
2013-11-18 19:21:30 +08:00
dbopts.user = dbuser;
2013-12-18 18:57:46 +08:00
Meta.getUserDBHost(cdbuser, this);
2013-11-18 19:21:30 +08:00
2013-11-18 20:31:11 +08:00
function setDBHostGetPassword(err, data){
2013-11-18 19:21:30 +08:00
if (err) throw err;
dbopts.host = data || global.settings.db_host;
2013-11-18 20:31:11 +08:00
// by-pass redis lookup for password if not authenticated
if ( ! authenticated ) return null;
2011-10-28 19:11:18 +08:00
2013-12-18 18:57:46 +08:00
Meta.getUserDBPass(cdbuser, this);
2013-11-18 20:31:11 +08:00
function queryExplain(err, data){
if (err) throw err;
if ( authenticated ) {
2014-01-17 20:14:30 +08:00
if ( global.settings.hasOwnProperty('db_user_pass') ) {
dbopts.pass = _.template(global.settings.db_user_pass, {
user_id: user_id,
user_password: data
else delete dbopts.pass;
2013-11-18 20:31:11 +08:00
pg = new PSQL(dbopts);
2012-10-15 19:20:37 +08:00
2012-05-01 23:46:30 +08:00
// get all the tables from Cache or SQL
2013-02-14 01:57:14 +08:00
tableCacheItem = tableCache.get(sql_md5);
if (tableCacheItem) {
2013-02-13 23:43:04 +08:00
return false;
2012-10-15 19:20:37 +08:00
} else {
2013-05-24 16:22:17 +08:00
pg.query("SELECT CDB_QueryTables($quotesql$" + sql + "$quotesql$)", this);
2012-05-01 23:46:30 +08:00
2012-04-21 02:34:18 +08:00
2013-05-27 17:21:56 +08:00
function setHeaders(err, result){
2012-04-21 02:34:18 +08:00
if (err) throw err;
2012-04-21 04:09:31 +08:00
2012-05-01 23:46:30 +08:00
// store explain result in local Cache
2013-02-14 01:57:14 +08:00
if ( ! tableCacheItem ) {
2013-02-13 23:43:04 +08:00
if ( result.rowCount === 1 ) {
2013-02-14 01:57:14 +08:00
tableCacheItem = {
2013-02-13 23:43:04 +08:00
affected_tables: result.rows[0].cdb_querytables,
// check if query may possibly write
may_write: queryMayWrite(sql),
// initialise hit counter
hits: 1
2013-02-14 01:57:14 +08:00
tableCache.set(sql_md5, tableCacheItem);
2013-02-13 23:43:04 +08:00
} else {
2014-03-13 19:14:11 +08:00
console.error("Unexpected result from CDB_QueryTables($quotesql$" + sql + "$quotesql$): " + util.inspect(result));
2013-02-13 23:43:04 +08:00
2012-05-01 23:46:30 +08:00
2012-04-21 02:34:18 +08:00
2013-04-09 18:36:37 +08:00
if ( tableCacheItem ) {
var affected_tables = tableCacheItem.affected_tables.split(/^\{(.*)\}$/)[1].split(',');
for ( var i=0; i<affected_tables.length; ++i ) {
var t = affected_tables[i];
2013-11-07 17:16:58 +08:00
if ( t.match(/\bpg_/) ) {
2013-04-09 18:36:37 +08:00
var e = new SyntaxError("system tables are forbidden");
e.http_status = 403;
2013-05-16 17:24:52 +08:00
2013-05-27 17:21:56 +08:00
var fClass = formats[format]
formatter = new fClass();
2011-10-28 19:11:18 +08:00
2012-04-13 22:37:09 +08:00
2012-10-25 19:40:21 +08:00
// configure headers for given format
2012-11-14 23:30:18 +08:00
var use_inline = !requestedFormat && !requestedFilename;
2013-09-05 23:07:11 +08:00
res.header("Content-Disposition", getContentDisposition(formatter, filename, use_inline));
2013-05-27 17:21:56 +08:00
res.header("Content-Type", formatter.getContentType());
2012-04-13 22:37:09 +08:00
// allow cross site post
// set cache headers
2013-07-22 23:21:38 +08:00
var ttl = 31536000; // 1 year time to live by default
2012-11-13 02:14:20 +08:00
var cache_policy = req.query.cache_policy;
2013-07-15 20:39:15 +08:00
if ( cache_policy === 'persist' ) {
2013-07-22 23:21:38 +08:00
res.header('Cache-Control', 'public,max-age=' + ttl);
2012-11-13 02:14:20 +08:00
} else {
2013-07-23 00:20:26 +08:00
if ( ! tableCacheItem || tableCacheItem.may_write ) {
2013-10-18 19:29:06 +08:00
// Tell clients this response is already expired
// TODO: prevent cache_policy from overriding this ?
2013-07-22 23:21:38 +08:00
ttl = 0;
2013-10-18 19:29:06 +08:00
2013-07-10 13:58:35 +08:00
res.header('Cache-Control', 'no-cache,max-age='+ttl+',must-revalidate,public');
2012-11-13 02:14:20 +08:00
2013-10-18 19:29:06 +08:00
// Only set an X-Cache-Channel for responses we want Varnish to cache.
if ( tableCacheItem && ! tableCacheItem.may_write ) {
res.header('X-Cache-Channel', generateCacheKey(database, tableCacheItem, authenticated));
2013-07-15 19:13:19 +08:00
// Set Last-Modified header
// Currently sets it to NOW
// TODO: use a real value, querying for most recent change in
// any of the source tables
res.header('Last-Modified', new Date().toUTCString());
2012-04-13 22:37:09 +08:00
return result;
2013-05-27 17:21:56 +08:00
function generateFormat(err, result){
2011-08-24 04:42:27 +08:00
if (err) throw err;
2011-10-28 19:11:18 +08:00
2013-05-27 17:21:56 +08:00
// TODO: drop this, fix UI!
2013-06-14 17:18:16 +08:00
sql = PSQL.window_sql(sql,limit,offset);
2013-05-27 17:21:56 +08:00
var opts = {
2013-11-18 18:42:43 +08:00
dbopts: dbopts,
2013-05-27 17:21:56 +08:00
sink: res,
gn: gn,
dp: dp,
skipfields: skipfields,
sql: sql,
filename: filename
2012-11-13 00:10:16 +08:00
2013-05-27 17:21:56 +08:00
formatter.sendResponse(opts, this);
2011-08-24 04:42:27 +08:00
2013-05-27 17:21:56 +08:00
function errorHandle(err){
if ( err ) handleException(err, res);
2011-08-24 04:42:27 +08:00
} catch (err) {
2011-08-18 01:42:19 +08:00
handleException(err, res);
2011-08-24 04:42:27 +08:00
2011-09-07 19:05:10 +08:00
2012-05-01 23:46:30 +08:00
function handleCacheStatus(req, res){
2013-02-14 01:57:14 +08:00
var tableCacheValues = tableCache.values();
2012-05-01 23:46:30 +08:00
var totalExplainHits = _.reduce(tableCacheValues, function(memo, res) { return memo + res.hits}, 0);
var totalExplainKeys = tableCacheValues.length;
2013-02-13 23:43:04 +08:00
res.send({explain: {pid: process.pid, hits: totalExplainHits, keys : totalExplainKeys }});
2012-05-01 23:46:30 +08:00
2013-03-28 02:40:37 +08:00
2013-09-05 23:07:11 +08:00
function getContentDisposition(formatter, filename, inline) {
var ext = formatter.getFileExtension();
2011-10-28 19:11:18 +08:00
var time = new Date().toUTCString();
2012-11-13 02:44:16 +08:00
return ( inline ? 'inline' : 'attachment' ) +'; filename=' + filename + '.' + ext + '; modification-date="' + time + '";';
2011-10-28 19:11:18 +08:00
2011-09-07 19:05:10 +08:00
2012-04-13 07:30:45 +08:00
function setCrossDomain(res){
res.header("Access-Control-Allow-Origin", "*");
2012-11-01 20:16:46 +08:00
res.header("Access-Control-Allow-Headers", "X-Requested-With, X-Prototype-Version, X-CSRF-Token");
2012-04-13 07:30:45 +08:00
2013-02-13 23:43:04 +08:00
function generateCacheKey(database, query_info, is_authenticated){
2013-02-18 19:20:18 +08:00
if ( ! query_info || ( is_authenticated && query_info.may_write ) ) {
2012-10-15 19:20:37 +08:00
return "NONE";
} else {
2013-02-13 23:43:04 +08:00
return database + ":" + query_info.affected_tables.split(/^\{(.*)\}$/)[1];
2012-10-15 19:20:37 +08:00
2012-04-21 02:34:18 +08:00
2012-05-01 23:46:30 +08:00
function generateMD5(data){
var hash = crypto.createHash('md5');
return hash.digest('hex');
2013-05-16 17:24:52 +08:00
2011-08-18 00:27:45 +08:00
function handleException(err, res){
2011-08-24 04:42:27 +08:00
var msg = (global.settings.environment == 'development') ? {error:[err.message], stack: err.stack} : {error:[err.message]}
2011-08-25 03:47:10 +08:00
if (global.settings.environment !== 'test'){
// TODO: email this Exception report
2014-03-13 19:14:11 +08:00
console.error("EXCEPTION REPORT: " + err.stack)
2011-08-25 03:47:10 +08:00
2011-10-28 19:11:18 +08:00
2012-04-13 07:30:45 +08:00
// allow cross site post
2013-02-13 20:32:34 +08:00
// Force inline content disposition
res.header("Content-Disposition", 'inline');
2012-06-07 02:47:21 +08:00
// if the exception defines a http status code, use that, else a 400
2011-11-22 08:06:14 +08:00
if (!_.isUndefined(err.http_status)){
res.send(msg, err.http_status);
} else {
res.send(msg, 400);
2011-06-13 20:07:21 +08:00
2014-01-31 17:55:30 +08:00
return app;
module.exports = App;