This commit is contained in:
zhongjin 2023-05-11 13:16:50 +08:00
commit e3ebcdcabe
6 changed files with 661 additions and 5 deletions

View File

@ -34,10 +34,45 @@ RUN cd cartodb && bundle exec grunt
# /bin/bash -l -c 'bundle exec grunt'
# && \
#rm -rf .git /root/.cache/pip node_modules
RUN cd cartodb/config && \
cp app_config.yml.sample app_config.yml && \
cp database.yml.sample database.yml && \
cp grunt_production.json.sample grunt_production.json
# Copy confs
ADD ./config/CartoDB-dev.js \
/CartoDB-SQL-API/config/environments/development.js
ADD ./config/WS-dev.js \
/Windshaft-cartodb/config/environments/development.js
ADD ./config/app_config.yml /cartodb/config/app_config.yml
ADD ./config/database.yml /cartodb/config/database.yml
ADD ./create_dev_user /cartodb/script/create_dev_user
ADD ./setup_organization.sh /cartodb/script/setup_organization.sh
ADD ./config/cartodb.nginx.proxy.conf /etc/nginx/nginx.conf
ADD ./config/varnish.vcl /etc/varnish.vcl
ADD ./geocoder.sh /cartodb/script/geocoder.sh
ADD ./geocoder_server.sql /cartodb/script/geocoder_server.sql
ADD ./fill_geocoder.sh /cartodb/script/fill_geocoder.sh
ADD ./sync_tables_trigger.sh /cartodb/script/sync_tables_trigger.sh
ENV PATH /usr/local/rvm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
RUN mkdir -p /cartodb/log && touch /cartodb/log/users_modifications && \
/opt/varnish/sbin/varnishd -a :6081 -T localhost:6082 -s malloc,256m -f /etc/varnish.vcl && \
perl -pi.bak -e 's/^bind 127.0.0.1 ::1$/bind 0.0.0.0/' /etc/redis/redis.conf && \
service postgresql start && service redis-server start && \
perl -pi -e 's/0\.22\.0/0.22.2/' /cartodb/app/models/user/db_service.rb && \
bash -l -c "cd /cartodb && bash script/create_dev_user && \
bash script/setup_organization.sh && bash script/geocoder.sh" && \
service postgresql stop && service redis-server stop && \
chmod +x /cartodb/script/fill_geocoder.sh && \
chmod +x /cartodb/script/sync_tables_trigger.sh
EXPOSE 80
ENV GDAL_DATA /usr/share/gdal/2.2
# Number of seconds between a sync tables task is run
# Default interval is an hour, use `docker run -e SYNC_TABLES_INTERVAL=60 ...` to change it
ENV SYNC_TABLES_INTERVAL 3600
ADD ./startup.sh /opt/startup.sh
CMD ["/bin/bash", "/opt/startup.sh"]
HEALTHCHECK CMD curl -f http://localhost || exit 1

113
config/CartoDB-dev.js Normal file
View File

@ -0,0 +1,113 @@
// In case the base_url has a :user param the username will be the one specified in the URL,
// otherwise it will fallback to extract the username from the host header.
module.exports.base_url = '(?:/api/:version|/user/:user/api/:version)';
// If useProfiler is true every response will be served with an
// X-SQLAPI-Profile header containing elapsed timing for various
// steps taken for producing the response.
module.exports.useProfiler = true;
module.exports.log_format = '[:date] :remote-addr :method :req[Host]:url :status :response-time ms -> :res[Content-Type] (:res[X-SQLAPI-Profiler])';
// If log_filename is given logs will be written there, in append mode. Otherwise stdout is used (default).
// Log file will be re-opened on receiving the HUP signal
module.exports.log_filename = 'logs/cartodb-sql-api.log';
// Regular expression pattern to extract username
// from hostname. Must have a single grabbing block.
module.exports.user_from_host = '^([^\\.]+)\\.';
module.exports.node_port = 8080;
module.exports.node_host = '0.0.0.0';
// idle socket timeout, in miliseconds
module.exports.node_socket_timeout = 600000;
module.exports.environment = 'development';
module.exports.db_base_name = 'cartodb_dev_user_<%= user_id %>_db';
// Supported labels: 'user_id' (read from redis)
module.exports.db_user = 'development_cartodb_user_<%= user_id %>';
// Supported labels: 'user_id', 'user_password' (both read from redis)
module.exports.db_user_pass = '<%= user_password %>'
// Name of the anonymous PostgreSQL user
module.exports.db_pubuser = 'publicuser';
// Password for the anonymous PostgreSQL user
module.exports.db_pubuser_pass = 'public';
module.exports.db_host = 'localhost';
module.exports.db_port = '5432';
module.exports.db_batch_port = '5432';
module.exports.finished_jobs_ttl_in_seconds = 2 * 3600; // 2 hours
module.exports.batch_query_timeout = 12 * 3600 * 1000; // 12 hours in milliseconds
module.exports.batch_log_filename = 'logs/batch-queries.log';
// Max number of queued jobs a user can have at a given time
module.exports.batch_max_queued_jobs = 64;
// Capacity strategy to use.
// It allows to tune how many queries run at a db host at the same time.
// Options: 'fixed', 'http-simple', 'http-load'
module.exports.batch_capacity_strategy = 'fixed';
// Applies when strategy='fixed'.
// Number of simultaneous users running queries in the same host.
// It will use 1 as min.
// Default 4.
module.exports.batch_capacity_fixed_amount = 4;
// Applies when strategy='http-simple' or strategy='http-load'.
// HTTP endpoint to check db host load.
// Helps to decide the number of simultaneous users running queries in that host.
// 'http-simple' will use 'available_cores' to decide the number.
// 'http-load' will use 'cores' and 'relative_load' to decide the number.
// It will use 1 as min.
// If no template is provided it will default to 'fixed' strategy.
module.exports.batch_capacity_http_url_template = 'http://<%= dbhost %>:9999/load';
// Max database connections in the pool
// Subsequent connections will wait for a free slot.
// NOTE: not used by OGR-mediated accesses
module.exports.db_pool_size = 500;
// Milliseconds before a connection is removed from pool
module.exports.db_pool_idleTimeout = 30000;
// Milliseconds between idle client checking
module.exports.db_pool_reapInterval = 1000;
// max number of bytes for a row, when exceeded the query will throw an error
//module.exports.db_max_row_size = 10 * 1024 * 1024;
// allows to use an object to connect with node-postgres instead of a connection string
//module.exports.db_use_config_object = true;
// requires enabling db_use_config_object=true
// allows to enable/disable keep alive for database connections
// by default is not enabled
//module.exports.db_keep_alive = {
// enabled: true,
// initialDelay: 5000
//};
module.exports.redis_host = '127.0.0.1';
module.exports.redis_port = 6379;
module.exports.redisPool = 50;
module.exports.redisIdleTimeoutMillis = 100;
module.exports.redisReapIntervalMillis = 10;
module.exports.redisLog = false;
// Max number of entries in the query tables cache
module.exports.tableCacheMax = 8192;
// Max age of query table cache items, in milliseconds
module.exports.tableCacheMaxAge = 1000*60*10;
// Temporary directory, make sure it is writable by server user
module.exports.tmpDir = '/tmp';
// change ogr2ogr command or path
module.exports.ogr2ogrCommand = 'ogr2ogr';
// Optional statsd support
module.exports.statsd = {
host: 'localhost',
port: 8125,
prefix: 'dev.:host.',
cacheDns: true
// support all allowed node-statsd options
};
module.exports.health = {
enabled: true,
username: 'development',
query: 'select 1'
};
module.exports.disabled_file = 'pids/disabled';
module.exports.ratelimits = {
// whether it should rate limit endpoints (global configuration)
rateLimitsEnabled: false,
// whether it should rate limit one or more endpoints (only if rateLimitsEnabled = true)
endpoints: {
query: false,
query_format: false,
job_create: false,
job_get: false,
job_delete: false
}
}

329
config/WS-dev.js Normal file
View File

@ -0,0 +1,329 @@
var config = {
environment: 'development'
,port: 8181
,host: '0.0.0.0'
// Size of the threadpool which can be used to run user code and get notified in the loop thread
// Its default size is 4, but it can be changed at startup time (the absolute maximum is 128).
// See http://docs.libuv.org/en/latest/threadpool.html
,uv_threadpool_size: undefined
// Regular expression pattern to extract username
// from hostname. Must have a single grabbing block.
,user_from_host: '^([^\\.]+)\\.'
// Base URLs for the APIs
//
// See http://github.com/CartoDB/Windshaft-cartodb/wiki/Unified-Map-API
//
// Base url for the Templated Maps API
// "/api/v1/map/named" is the new API,
// "/tiles/template" is for compatibility with versions up to 1.6.x
,base_url_templated: '(?:/api/v1/map/named|/user/:user/api/v1/map/named|/tiles/template)'
// Base url for the Detached Maps API
// "maps" is the the new API,
// "tiles/layergroup" is for compatibility with versions up to 1.6.x
,base_url_detached: '(?:/api/v1/map|/user/:user/api/v1/map|/tiles/layergroup)'
// Resource URLs expose endpoints to request/retrieve metadata associated to Maps: dataviews, analysis node status.
//
// This URLs depend on how `base_url_detached` and `user_from_host` are configured: the application can be
// configured to accept request with the {user} in the header host or in the request path.
// It also might depend on the configured cdn_url via `serverMetadata.cdn_url`.
//
// This template allows to make the endpoints generation more flexible, the template exposes the following params:
// 1. {{=it.cdn_url}}: will be used when `serverMetadata.cdn_url` exists.
// 2. {{=it.user}}: will use the username as extraced from `user_from_host` or `base_url_detached`.
// 3. {{=it.port}}: will use the `port` from this very same configuration file.
,resources_url_templates: {
http: 'http://cartodb.localhost/user/{{=it.user}}/api/v1/map',
https: 'http://cartodb.localhost/user/{{=it.user}}/api/v1/map'
}
// Maximum number of connections for one process
// 128 is a good value with a limit of 1024 open file descriptors
,maxConnections:128
// Maximum number of templates per user. Unlimited by default.
,maxUserTemplates:1024
// Seconds since "last creation" before a detached
// or template instance map expires. Or: how long do you want
// to be able to navigate the map without a reload ?
// Defaults to 7200 (2 hours)
,mapConfigTTL: 7200
// idle socket timeout, in milliseconds
,socket_timeout: 600000
,enable_cors: true
,cache_enabled: false
,log_format: ':req[X-Real-IP] :method :req[Host]:url :status :response-time ms -> :res[Content-Type] (:res[X-Tiler-Profiler])'
// If log_filename is given logs will be written
// there, in append mode. Otherwise stdout is used (default).
// Log file will be re-opened on receiving the HUP signal
,log_filename: undefined
// Templated database username for authorized user
// Supported labels: 'user_id' (read from redis)
,postgres_auth_user: 'development_cartodb_user_<%= user_id %>'
// Templated database password for authorized user
// Supported labels: 'user_id', 'user_password' (both read from redis)
,postgres_auth_pass: '<%= user_password %>'
,postgres: {
// Parameters to pass to datasource plugin of mapnik
// See http://github.com/mapnik/mapnik/wiki/PostGIS
type: "postgis",
user: "publicuser",
password: "public",
host: '127.0.0.1',
port: 5432,
extent: "-20037508.3,-20037508.3,20037508.3,20037508.3",
/* experimental
geometry_field: "the_geom",
extent: "-180,-90,180,90",
srid: 4326,
*/
// max number of rows to return when querying data, 0 means no limit
row_limit: 65535,
simplify_geometries: true,
use_overviews: true, // use overviews to retrieve raster
/*
* Set persist_connection to false if you want
* database connections to be closed on renderer
* expiration (1 minute after last use).
* Setting to true (the default) would never
* close any connection for the server's lifetime
*/
persist_connection: false,
max_size: 500
}
,mapnik_version: undefined
,mapnik_tile_format: 'png8:m=h'
,statsd: {
host: 'localhost',
port: 8125,
prefix: 'dev.',
cacheDns: true
// support all allowed node-statsd options
}
,renderer: {
// Milliseconds since last access before renderer cache item expires
cache_ttl: 60000,
statsInterval: 5000, // milliseconds between each report to statsd about number of renderers and mapnik pool status
mapnik: {
// The size of the pool of internal mapnik backend
// This pool size is per mapnik renderer created in Windshaft's RendererFactory
// See https://github.com/CartoDB/Windshaft/blob/master/lib/windshaft/renderers/renderer_factory.js
// Important: check the configuration of uv_threadpool_size to use suitable value
poolSize: 8,
// Whether grainstore will use a child process or not to transform CartoCSS into Mapnik XML.
// This will prevent blocking the main thread.
useCartocssWorkers: false,
// Metatile is the number of tiles-per-side that are going
// to be rendered at once. If all of them will be requested
// we'd have saved time. If only one will be used, we'd have
// wasted time.
metatile: 2,
// tilelive-mapnik uses an internal cache to store tiles/grids
// generated when using metatile. This options allow to tune
// the behaviour for that internal cache.
metatileCache: {
// Time an object must stay in the cache until is removed
ttl: 0,
// Whether an object must be removed after the first hit
// Usually you want to use `true` here when ttl>0.
deleteOnHit: false
},
// Override metatile behaviour depending on the format
formatMetatile: {
png: 2,
'grid.json': 1
},
// Buffer size is the tickness in pixel of a buffer
// around the rendered (meta?)tile.
//
// This is important for labels and other marker that overlap tile boundaries.
// Setting to 128 ensures no render artifacts.
// 64 may have artifacts but is faster.
// Less important if we can turn metatiling on.
bufferSize: 64,
// SQL queries will be wrapped with ST_SnapToGrid
// Snapping all points of the geometry to a regular grid
snapToGrid: false,
// SQL queries will be wrapped with ST_ClipByBox2D
// Returning the portion of a geometry falling within a rectangle
// It will only work if snapToGrid is enabled
clipByBox2d: false, // this requires postgis >=2.2 and geos >=3.5
limits: {
// Time in milliseconds a render request can take before it fails, some notes:
// - 0 means no render limit
// - it considers metatiling, naive implementation: (render timeout) * (number of tiles in metatile)
render: 0,
// As the render request will finish even if timed out, whether it should be placed in the internal
// cache or it should be fully discarded. When placed in the internal cache another attempt to retrieve
// the same tile will result in an immediate response, however that will use a lot of more application
// memory. If we want to enforce this behaviour we have to implement a cache eviction policy for the
// internal cache.
cacheOnTimeout: true
},
geojson: {
dbPoolParams: {
// maximum number of resources to create at any given time
size: 16,
// max milliseconds a resource can go unused before it should be destroyed
idleTimeout: 3000,
// frequency to check for idle resources
reapInterval: 1000
},
// SQL queries will be wrapped with ST_ClipByBox2D
// Returning the portion of a geometry falling within a rectangle
// It will only work if snapToGrid is enabled
clipByBox2d: false, // this requires postgis >=2.2 and geos >=3.5
// geometries will be simplified using ST_RemoveRepeatedPoints
// which cost is no more expensive than snapping and results are
// much closer to the original geometry
removeRepeatedPoints: false // this requires postgis >=2.2
}
},
http: {
timeout: 2000, // the timeout in ms for a http tile request
proxy: undefined, // the url for a proxy server
whitelist: [ // the whitelist of urlTemplates that can be used
'.*', // will enable any URL
'http://{s}.example.com/{z}/{x}/{y}.png'
],
// image to use as placeholder when urlTemplate is not in the whitelist
// if provided the http renderer will use it instead of throw an error
fallbackImage: {
type: 'fs', // 'fs' and 'url' supported
src: __dirname + '/../../assets/default-placeholder.png'
}
},
torque: {
dbPoolParams: {
// maximum number of resources to create at any given time
size: 16,
// max milliseconds a resource can go unused before it should be destroyed
idleTimeout: 3000,
// frequency to check for idle resources
reapInterval: 1000
}
}
}
// anything analyses related
,analysis: {
// batch configuration
batch: {
// Inline execution avoid the use of SQL API as batch endpoint
// When set to true it will run all analysis queries in series, with a direct connection to the DB
// This might be useful for:
// - testing
// - running an standalone server without any dependency on external services
inlineExecution: false,
// where the SQL API is running, it will use a custom Host header to specify the username.
endpoint: 'http://127.0.0.1:8080/api/v2/sql/job',
// the template to use for adding the host header in the batch api requests
hostHeaderTemplate: '{{=it.username}}.localhost.lan'
},
logger: {
// If filename is given logs comming from analysis client will be written
// there, in append mode. Otherwise 'log_filename' is used. Otherwise stdout is used (default).
// Log file will be re-opened on receiving the HUP signal
filename: '/tmp/analysis.log'
},
// Define max execution time in ms for analyses or tags
// If analysis or tag are not found in redis this values will be used as default.
limits: {
moran: { timeout: 120000, maxNumberOfRows: 1e5 },
cpu2x: { timeout: 60000 }
}
}
,millstone: {
// Needs to be writable by server user
cache_basedir: '/tmp/cdb-tiler-dev/millstone-dev'
}
,redis: {
host: '127.0.0.1',
port: 6379,
// Max number of connections in each pool.
// Users will be put on a queue when the limit is hit.
// Set to maxConnection to have no possible queues.
// There are currently 2 pools involved in serving
// windshaft-cartodb requests so multiply this number
// by 2 to know how many possible connections will be
// kept open by the server. The default is 50.
max: 50,
returnToHead: true, // defines the behaviour of the pool: false => queue, true => stack
idleTimeoutMillis: 1, // idle time before dropping connection
reapIntervalMillis: 1, // time between cleanups
slowQueries: {
log: true,
elapsedThreshold: 200
},
slowPool: {
log: true, // whether a slow acquire must be logged or not
elapsedThreshold: 25 // the threshold to determine an slow acquire must be reported or not
},
emitter: {
statusInterval: 5000 // time, in ms, between each status report is emitted from the pool, status is sent to statsd
},
unwatchOnRelease: false, // Send unwatch on release, see http://github.com/CartoDB/Windshaft-cartodb/issues/161
noReadyCheck: true // Check `no_ready_check` at https://github.com/mranney/node_redis/tree/v0.12.1#overloading
}
// For more details about this options check https://nodejs.org/api/http.html#http_new_agent_options
,httpAgent: {
keepAlive: true,
keepAliveMsecs: 1000,
maxSockets: 25,
maxFreeSockets: 256
}
,varnish: {
host: 'localhost',
port: 6082, // the por for the telnet interface where varnish is listening to
http_port: 6081, // the port for the HTTP interface where varnish is listening to
purge_enabled: false, // whether the purge/invalidation mechanism is enabled in varnish or not
secret: 'xxx',
ttl: 86400,
layergroupTtl: 86400 // the max-age for cache-control header in layergroup responses
}
// this [OPTIONAL] configuration enables invalidating by surrogate key in fastly
,fastly: {
// whether the invalidation is enabled or not
enabled: false,
// the fastly api key
apiKey: 'wadus_api_key',
// the service that will get surrogate key invalidation
serviceId: 'wadus_service_id'
}
// If useProfiler is true every response will be served with an
// X-Tiler-Profile header containing elapsed timing for various
// steps taken for producing the response.
,useProfiler:true
// Settings for the health check available at /health
,health: {
enabled: false,
username: 'localhost',
z: 0,
x: 0,
y: 0
}
,disabled_file: 'pids/disabled'
// Use this as a feature flags enabling/disabling mechanism
,enabledFeatures: {
// whether it should intercept tile render errors an act based on them, enabled by default.
onTileErrorStrategy: true,
// whether the affected tables for a given SQL must query directly postgresql or use the SQL API
cdbQueryTablesFromPostgres: true,
// whether in mapconfig is available stats & metadata for each layer
layerMetadata: true
}
};
module.exports = config;

View File

@ -0,0 +1,80 @@
user www-data;
worker_processes auto;
pid /run/nginx.pid;
include /etc/nginx/modules-enabled/*.conf;
events {
worker_connections 768;
}
http {
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
include /etc/nginx/mime.types;
default_type application/octet-stream;
ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE
ssl_prefer_server_ciphers on;
access_log /var/log/nginx/access.log;
error_log /var/log/nginx/error.log;
log_format main '[$time_local] $status REQUEST: "$request" REFERER: "$http_referer" FWD_FOR "$http_x_forwarded_for" PROXY_HOST: "$proxy_host" UPSTREAM_ADDR: "$upstream_addr"';
gzip on;
server {
server_name cartodb.localhost *.cartodb.localhost;
client_max_body_size 0;
location ~* /(user/.*/)?api/v1/maps {
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://127.0.0.1:3000;
}
location ~* /(user/.*/)?api/v1/map {
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Carto-Service windshaft; # tell varnish what backend
proxy_no_cache true; # Make sure nginx doesn't cache
proxy_cache_bypass true; # Make sure nginx doesn't cache
proxy_pass http://127.0.0.1:6081; # hand off to Varnish
}
location ~* /(user/.*/)?api/v2/sql {
# RedHog: Hack to work around bug in cartodb local hosting but using cdn for js libs
rewrite /(user/.*)?/api/v2/sql(.*) /$1/api/v2/sql$2 break;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Carto-Service sqlapi; # tell varnish what backend
proxy_no_cache true; # make sure nginx doesn't cache
proxy_cache_bypass true; # make sure nginx doesn't cache
proxy_pass http://127.0.0.1:6081; # hand off to Varnish
}
location ^~ /assets {
root /cartodb/public;
}
location / {
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://127.0.0.1:3000;
}
error_log /var/log/nginx/cartodb_error.log;
access_log /var/log/nginx/cartodb_access.log main;
}
}

50
config/database.yml Normal file
View File

@ -0,0 +1,50 @@
production:
adapter: postgresql
encoding: unicode
host: localhost
port: 5432
direct_port: 5432
database: carto_db_production
username: postgres
password:
conn_validator_timeout: 900
prepared_statements: false
staging:
adapter: postgresql
encoding: unicode
host: localhost
port: 5432
direct_port: 5432
database: carto_db_staging
username: postgres
password:
conn_validator_timeout: 900
prepared_statements: false
development:
adapter: postgresql
encoding: unicode
host: localhost
port: 5432
direct_port: 5432
database: carto_db_development
username: postgres
password:
conn_validator_timeout: 900
pool: 50
prepared_statements: false
test:
adapter: postgresql
encoding: unicode
database: carto_db_test
host: localhost
port: 5432
direct_port: 5432
username: postgres
password:
conn_validator_timeout: -1
prepared_statements: false
Footer

49
config/varnish.vcl Normal file
View File

@ -0,0 +1,49 @@
acl purge {
"localhost";
"127.0.0.1";
}
backend sqlapi {
.host = "127.0.0.1";
.port = "8080";
}
backend windshaft {
.host = "127.0.0.1";
.port = "8181";
}
sub vcl_recv {
# Allowing PURGE from localhost
if (req.request == "PURGE") {
if (!client.ip ~ purge) {
error 405 "Not allowed.";
}
return (lookup);
}
# Routing request to backend based on X-Carto-Service header from nginx
if (req.http.X-Carto-Service == "sqlapi") {
set req.backend = sqlapi;
remove req.http.X-Carto-Service;
}
if (req.http.X-Carto-Service == "windshaft") {
set req.backend = windshaft;
remove req.http.X-Carto-Service;
}
}
sub vcl_hit {
if (req.request == "PURGE") {
purge;
error 200 "Purged.";
}
}
sub vcl_miss {
if (req.request == "PURGE") {
purge;
error 200 "Purged.";
}
}
Footer