// CartoDB SQL API
//
// all requests expect the following URL args:
// - `sql` {String} SQL to execute
//
// for private (read/write) queries:
// - OAuth. Must have proper OAuth 1.1 headers. For OAuth 1.1 spec see Google
//
// eg. /api/v1/?sql=SELECT 1 as one (with a load of OAuth headers or URL arguments)
//
// for public (read only) queries:
// - sql only, provided the subdomain exists in CartoDB and the table's sharing options are public
//
// eg. vizzuality.cartodb.com/api/v1/?sql=SELECT * from my_table
//
//
var path = require('path');
var express = require('express')
, app = express.createServer(
express.logger({
buffer: true,
format: '[:date] :req[X-Real-IP] \033[90m:method\033[0m \033[36m:req[Host]:url\033[0m \033[90m:status :response-time ms -> :res[Content-Type]\033[0m'
}))
, Step = require('step')
, crypto = require('crypto')
, fs = require('fs')
, zlib = require('zlib')
, util = require('util')
, spawn = require('child_process').spawn
, Meta = require(global.settings.app_root + '/app/models/metadata')
, oAuth = require(global.settings.app_root + '/app/models/oauth')
, PSQL = require(global.settings.app_root + '/app/models/psql')
, ApiKeyAuth = require(global.settings.app_root + '/app/models/apikey_auth')
, _ = require('underscore')
, LRU = require('lru-cache')
;
var tableCache = LRU({
// store no more than these many items in the cache
max: global.settings.tableCacheMax || 8192,
// consider entries expired after these many milliseconds (10 minutes by default)
maxAge: global.settings.tableCacheMaxAge || 1000*60*10
});
// Keeps track of what's waiting baking for export
var bakingExports = {};
app.use(express.bodyParser());
app.enable('jsonp callback');
// basic routing
app.all('/api/v1/sql', function(req, res) { handleQuery(req, res) } );
app.all('/api/v1/sql.:f', function(req, res) { handleQuery(req, res) } );
app.get('/api/v1/cachestatus', function(req, res) { handleCacheStatus(req, res) } );
// Return true of the given query may write to the database
//
// NOTE: this is a fuzzy check, the return could be true even
// if the query doesn't really write anything.
// But you can be pretty sure of a false return.
//
function queryMayWrite(sql) {
var mayWrite = false;
var pattern = RegExp("(alter|insert|update|delete|create|drop|truncate)", "i");
if ( pattern.test(sql) ) {
mayWrite = true;
}
return mayWrite;
}
// Return database username from user_id
// NOTE: a "null" user_id is a request to use the public user
function userid_to_dbuser(user_id) {
if ( _.isString(user_id) )
return _.template(global.settings.db_user, {user_id: user_id});
return "publicuser" // FIXME: make configurable
};
function sanitize_filename(filename) {
filename = path.basename(filename, path.extname(filename));
filename = filename.replace(/[;()\[\]<>'"\s]/g, '_');
//console.log("Sanitized: " + filename);
return filename;
}
// request handlers
function handleQuery(req, res) {
var supportedFormats = ['json', 'geojson', 'topojson', 'csv', 'svg', 'shp', 'kml'];
var svg_width = 1024.0;
var svg_height = 768.0;
// extract input
var body = (req.body) ? req.body : {};
var sql = req.query.q || body.q; // HTTP GET and POST store in different vars
var api_key = req.query.api_key || body.api_key;
var database = req.query.database; // TODO: Deprecate
var limit = parseInt(req.query.rows_per_page);
var offset = parseInt(req.query.page);
var requestedFormat = req.query.format || body.format;
var format = _.isArray(requestedFormat) ? _.last(requestedFormat) : requestedFormat;
var requestedFilename = req.query.filename || body.filename
var filename = requestedFilename;
var requestedSkipfields = req.query.skipfields || body.skipfields;
var skipfields;
var dp = req.query.dp || body.dp; // decimal point digits (defaults to 6)
var gn = "the_geom"; // TODO: read from configuration file
var user_id;
var tableCacheItem;
try {
// sanitize and apply defaults to input
dp = (dp === "" || _.isUndefined(dp)) ? '6' : dp;
format = (format === "" || _.isUndefined(format)) ? 'json' : format.toLowerCase();
filename = (filename === "" || _.isUndefined(filename)) ? 'cartodb-query' : sanitize_filename(filename);
sql = (sql === "" || _.isUndefined(sql)) ? null : sql;
database = (database === "" || _.isUndefined(database)) ? null : database;
limit = (_.isNumber(limit)) ? limit : null;
offset = (_.isNumber(offset)) ? offset * limit : null;
// Accept both comma-separated string or array of comma-separated strings
if ( requestedSkipfields ) {
if ( _.isString(requestedSkipfields) ) skipfields = requestedSkipfields.split(',');
else if ( _.isArray(requestedSkipfields) ) {
skipfields = [];
_.each(requestedSkipfields, function(ele) {
skipfields = skipfields.concat(ele.split(','));
});
}
} else {
skipfields = [];
}
// setup step run
var start = new Date().getTime();
if ( -1 === supportedFormats.indexOf(format) )
throw new Error("Invalid format: " + format);
if (!_.isString(sql)) throw new Error("You must indicate a sql query");
// initialise MD5 key of sql for cache lookups
var sql_md5 = generateMD5(sql);
// placeholder for connection
var pg;
var authenticated;
// 1. Get database from redis via the username stored in the host header subdomain
// 2. Run the request through OAuth to get R/W user id if signed
// 3. Get the list of tables affected by the query
// 4. Run query with r/w or public user
// 5. package results and send back
Step(
function getDatabaseName() {
if (_.isNull(database)) {
Meta.getDatabase(req, this);
} else {
// database hardcoded in query string (deprecated??): don't use redis
return database;
}
},
function setDBGetUser(err, data) {
if (err) throw err;
database = (data === "" || _.isNull(data) || _.isUndefined(data)) ? database : data;
// If the database could not be found, the user is non-existant
if (_.isNull(database)) {
var msg = "Sorry, we can't find this CartoDB. Please check that you have entered the correct domain.";
err = new Error(msg);
err.http_status = 404;
throw err;
}
if(api_key) {
ApiKeyAuth.verifyRequest(req, this);
} else {
oAuth.verifyRequest(req, this);
}
},
function queryExplain(err, data){
if (err) throw err;
user_id = data;
// store postgres connection
pg = new PSQL(user_id, database, limit, offset);
authenticated = ! _.isNull(user_id);
// get all the tables from Cache or SQL
tableCacheItem = tableCache.get(sql_md5);
if (tableCacheItem) {
tableCacheItem.hits++;
return false;
} else {
pg.query("SELECT CDB_QueryTables($quotesql$" + sql + "$quotesql$)", this, true);
}
},
function queryResult(err, result){
if (err) throw err;
// store explain result in local Cache
if ( ! tableCacheItem ) {
if ( result.rowCount === 1 ) {
tableCacheItem = {
affected_tables: result.rows[0].cdb_querytables,
// check if query may possibly write
may_write: queryMayWrite(sql),
// initialise hit counter
hits: 1
};
tableCache.set(sql_md5, tableCacheItem);
} else {
console.log("[ERROR] Unexpected result from CDB_QueryTables($quotesql$" + sql + "$quotesql$)");
console.dir(result);
}
}
if ( tableCacheItem ) {
var affected_tables = tableCacheItem.affected_tables.split(/^\{(.*)\}$/)[1].split(',');
for ( var i=0; i');
} else if ( gdims == '1' ) {
// Avoid filling closed linestrings
var linetag = '';
lines.push(linetag);
} else if ( gdims == '2' ) {
polys.push('');
}
if ( ! bbox ) {
// Parse layer extent: "BOX(x y, X Y)"
// NOTE: the name of the extent field is
// determined by the same code adding the
// ST_AsSVG call (in queryResult)
//
bbox = ele[gn + '_box'];
bbox = bbox.match(/BOX\(([^ ]*) ([^ ,]*),([^ ]*) ([^)]*)\)/);
bbox = {
xmin: parseFloat(bbox[1]),
ymin: parseFloat(bbox[2]),
xmax: parseFloat(bbox[3]),
ymax: parseFloat(bbox[4])
};
}
});
// Set point radius
for (var i=0; i',
'',
];
var root_tag = '');
// return payload
callback(null, out.join("\n"));
}
function toCSV(dbname, user_id, gcol, sql, skipfields, res, callback) {
toOGR_SingleFile(dbname, user_id, gcol, sql, skipfields, 'CSV', 'csv', res, callback);
}
// Internal function usable by all OGR-driven outputs
function toOGR(dbname, user_id, gcol, sql, skipfields, out_format, out_filename, callback) {
var ogr2ogr = 'ogr2ogr'; // FIXME: make configurable
var dbhost = global.settings.db_host;
var dbport = global.settings.db_port;
var dbuser = userid_to_dbuser(user_id);
var dbpass = ''; // turn into a parameter..
var columns = [];
// Drop ending semicolon (ogr doens't like it)
sql = sql.replace(/;\s*$/, '');
Step (
function fetchColumns() {
var colsql = 'SELECT * FROM (' + sql + ') as _cartodbsqlapi LIMIT 1';
var pg = new PSQL(user_id, dbname, 1, 0);
pg.query(colsql, this);
},
function spawnDumper(err, result) {
if (err) throw err;
//if ( ! result.rows.length ) throw new Error("Query returns no rows");
// Skip system columns
if ( result.rows.length ) {
for (var k in result.rows[0]) {
if ( skipfields.indexOf(k) != -1 ) continue;
if ( out_format != 'CSV' && k == "the_geom_webmercator" ) continue; // TODO: drop ?
if ( out_format == 'CSV' ) columns.push('"' + k + '"::text');
else columns.push('"' + k + '"');
}
} else columns.push('*');
//console.log(columns.join(','));
var next = this;
sql = 'SELECT ' + columns.join(',')
+ ' FROM (' + sql + ') as _cartodbsqlapi';
var child = spawn(ogr2ogr, [
'-f', out_format,
'-lco', 'ENCODING=UTF-8',
'-lco', 'LINEFORMAT=CRLF',
out_filename,
"PG:host=" + dbhost
+ " user=" + dbuser
+ " dbname=" + dbname
+ " password=" + dbpass
+ " tables=fake" // trick to skip query to geometry_columns
+ "",
'-sql', sql
]);
/*
console.log(['ogr2ogr',
'-f', '"'+out_format+'"',
out_filename,
"'PG:host=" + dbhost
+ " user=" + dbuser
+ " dbname=" + dbname
+ " password=" + dbpass
+ " tables=fake" // trick to skip query to geometry_columns
+ "'",
"-sql '", sql, "'"].join(' '));
*/
var stdout = '';
child.stdout.on('data', function(data) {
stdout += data;
//console.log('stdout: ' + data);
});
var stderr;
var logErrPat = new RegExp(/^ERROR/);
child.stderr.on('data', function(data) {
data = data.toString(); // know of a faster way ?
// Store only the first ERROR line
if ( ! stderr && data.match(logErrPat) ) stderr = data;
console.log('ogr2ogr stderr: ' + data);
});
child.on('exit', function(code) {
if ( code ) {
var emsg = stderr.split('\n')[0];
// TODO: add more info about this error ?
//if ( RegExp(/attempt to write non-.*geometry.*to.*type shapefile/i).exec(emsg) )
next(new Error(emsg));
} else {
next(null);
}
});
},
function finish(err) {
callback(err);
}
);
}
function toSHP(dbname, user_id, gcol, sql, skipfields, filename, res, callback) {
var zip = 'zip'; // FIXME: make configurable
var tmpdir = global.settings.tmpDir || '/tmp';
var reqKey = [ 'shp', dbname, user_id, gcol, generateMD5(sql) ].concat(skipfields).join(':');
var outdirpath = tmpdir + '/sqlapi-' + reqKey;
var zipfile = outdirpath + '.zip';
var shapefile = outdirpath + '/' + filename + '.shp';
// TODO: following tests:
// - fetch query with no "the_geom" column
var qElem = new ExportRequest(res, callback);
var baking = bakingExports[reqKey];
if ( baking ) {
baking.req.push( qElem );
return;
}
baking = bakingExports[reqKey] = { req: [ qElem ] };
Step (
function createOutDir() {
fs.mkdir(outdirpath, 0777, this);
},
function spawnDumper(err) {
if ( err ) throw err;
toOGR(dbname, user_id, gcol, sql, skipfields, 'ESRI Shapefile', shapefile, this);
},
function doZip(err) {
if ( err ) throw err;
var next = this;
var child = spawn(zip, ['-qrj', zipfile, outdirpath ]);
child.on('exit', function(code) {
//console.log("Zip complete, zip return code was " + code);
if (code) {
next(new Error("Zip command return code " + code));
res.statusCode = 500;
}
next(null);
});
},
function cleanupDir(topError) {
var next = this;
//console.log("Cleaning up " + outdirpath);
// Unlink the dir content
var unlinkall = function(dir, files, finish) {
var f = files.shift();
if ( ! f ) { finish(null); return; }
var fn = dir + '/' + f;
fs.unlink(fn, function(err) {
if ( err ) {
console.log("Unlinking " + fn + ": " + err);
finish(err);
}
else unlinkall(dir, files, finish)
});
}
fs.readdir(outdirpath, function(err, files) {
if ( err ) {
if ( err.code != 'ENOENT' ) {
next(new Error([topError, err].join('\n')));
} else {
next(topError);
}
} else {
unlinkall(outdirpath, files, function(err) {
fs.rmdir(outdirpath, function(err) {
if ( err ) console.log("Removing dir " + path + ": " + err);
next(topError);
});
});
}
});
},
function sendResults(err) {
var nextPipe = function(finish) {
var r = baking.req.shift();
if ( ! r ) { finish(null); return; }
r.sendFile(err, zipfile, function() {
nextPipe(finish);
});
}
if ( ! err ) nextPipe(this);
else {
_.each(baking.req, function(r) {
r.cb(err);
});
return true;
}
},
function cleanup(err) {
delete bakingExports[reqKey];
// unlink dump file (sync to avoid race condition)
try { fs.unlinkSync(zipfile); }
catch (e) {
if ( e.code != 'ENOENT' ) {
console.log("Could not unlink zipfile " + zipfile + ": " + e);
}
}
}
);
}
function ExportRequest(ostream, callback) {
this.cb = callback;
this.ostream = ostream;
this.istream = null;
this.canceled = false;
var that = this;
this.ostream.on('close', function() {
//console.log("Request close event, qElem.stream is " + qElem.stream);
that.canceled = true;
if ( that.istream ) {
that.istream.destroy();
}
});
}
ExportRequest.prototype.sendFile = function (err, filename, callback) {
var that = this;
if ( ! this.canceled ) {
//console.log("Creating readable stream out of dumpfile");
this.istream = fs.createReadStream(filename)
.on('open', function(fd) {
that.istream.pipe(that.ostream);
callback();
})
.on('error', function(e) {
console.log("Can't send response: " + e);
that.ostream.end();
callback();
});
} else {
//console.log("Response was canceled, not streaming the file");
callback();
}
this.cb();
}
function toOGR_SingleFile(dbname, user_id, gcol, sql, skipfields, fmt, ext, res, callback) {
var tmpdir = global.settings.tmpDir || '/tmp';
var reqKey = [ fmt, dbname, user_id, gcol, generateMD5(sql) ].concat(skipfields).join(':');
var outdirpath = tmpdir + '/sqlapi-' + reqKey;
var dumpfile = outdirpath + ':cartodb-query.' + ext;
// TODO: following tests:
// - fetch query with no "the_geom" column
var qElem = new ExportRequest(res, callback);
var baking = bakingExports[reqKey];
if ( baking ) {
//console.log("Queuing request for baking resource " + reqKey);
baking.req.push( qElem );
return;
}
//console.log("Registering baking resource " + reqKey);
baking = bakingExports[reqKey] = { req: [ qElem ] };
Step (
function spawnDumper() {
toOGR(dbname, user_id, gcol, sql, skipfields, fmt, dumpfile, this);
},
function sendResults(err) {
//console.log("toOGR completed, have to send result to " + baking.req.length + " requests");
var nextPipe = function(finish) {
var r = baking.req.shift();
if ( ! r ) { finish(null); return; }
r.sendFile(err, dumpfile, function() {
nextPipe(finish);
});
}
if ( ! err ) nextPipe(this);
else {
_.each(baking.req, function(r) {
r.cb(err);
});
return true;
}
},
function cleanup(err) {
//console.log("Deleting baking export " + reqKey + " and cleaning up");
delete bakingExports[reqKey];
// unlink dump file (sync to avoid race condition)
try { fs.unlinkSync(dumpfile); }
catch (e) {
if ( e.code != 'ENOENT' ) {
console.log("Could not unlink dumpfile " + dumpfile + ": " + e);
}
}
}
);
}
function toKML(dbname, user_id, gcol, sql, skipfields, res, callback) {
toOGR_SingleFile(dbname, user_id, gcol, sql, skipfields, 'KML', 'kml', res, callback);
}
function getContentDisposition(format, filename, inline) {
var ext = 'json';
if (format === 'geojson'){
ext = 'geojson';
}
else if (format === 'topojson'){
ext = 'topojson';
}
else if (format === 'csv'){
ext = 'csv';
}
else if (format === 'svg'){
ext = 'svg';
}
else if (format === 'shp'){
ext = 'zip';
}
else if (format === 'kml'){
ext = 'kml';
}
var time = new Date().toUTCString();
return ( inline ? 'inline' : 'attachment' ) +'; filename=' + filename + '.' + ext + '; modification-date="' + time + '";';
}
function getContentType(format){
var type = "application/json; charset=utf-8";
if (format === 'csv'){
type = "text/csv; charset=utf-8; header=present";
}
else if (format === 'svg'){
type = "image/svg+xml; charset=utf-8";
}
else if (format === 'shp'){
type = "application/zip; charset=utf-8";
}
else if (format === 'kml'){
type = "application/kml; charset=utf-8";
}
return type;
}
function setCrossDomain(res){
res.header("Access-Control-Allow-Origin", "*");
res.header("Access-Control-Allow-Headers", "X-Requested-With, X-Prototype-Version, X-CSRF-Token");
}
function generateCacheKey(database, query_info, is_authenticated){
if ( ! query_info || ( is_authenticated && query_info.may_write ) ) {
return "NONE";
} else {
return database + ":" + query_info.affected_tables.split(/^\{(.*)\}$/)[1];
}
}
function generateMD5(data){
var hash = crypto.createHash('md5');
hash.update(data);
return hash.digest('hex');
}
function handleException(err, res){
var msg = (global.settings.environment == 'development') ? {error:[err.message], stack: err.stack} : {error:[err.message]}
if (global.settings.environment !== 'test'){
// TODO: email this Exception report
console.log("EXCEPTION REPORT")
console.log(err.message);
console.log(err.stack);
}
// allow cross site post
setCrossDomain(res);
// Force inline content disposition
res.header("Content-Disposition", 'inline');
// if the exception defines a http status code, use that, else a 400
if (!_.isUndefined(err.http_status)){
res.send(msg, err.http_status);
} else {
res.send(msg, 400);
}
}
module.exports = app;