From 5f0eb86bed6302751e4a7b5f76507d063da43e74 Mon Sep 17 00:00:00 2001 From: Nick Ballenger Date: Thu, 18 Jul 2019 16:08:53 -0700 Subject: [PATCH 1/4] Changing the Nginx config to a single file I've combined the core nginx.conf with the proxy config, which all goes into /etc/nginx/nginx.conf. I've made a number of changes: * Nginx now proxies both SQL API and Windshaft requests through Varnish. * Nginx adds a custom HTTP header, X-Carto-Service, so that Varnish can differentiate between backends (since it can't do so based on incoming port). * I've modified the primary Nginx log format to include more information on how requests are being proxied--you can now see the upstream address for proxied requests. * I've added the `proxy_no_cache` and `proxy_cache_bypass` directives to the Windshaft and SQL API proxy sections. Without those directives, Nginx may attempt to act as a cache, returning 304 Not Modified for resources that more accurately should be cached by Varnish (whose cache is invalidated via a Postgres trigger for updated metadata). --- Dockerfile | 2 +- config/cartodb.nginx.proxy.conf | 124 ++++++++++++++++++++------------ 2 files changed, 80 insertions(+), 46 deletions(-) diff --git a/Dockerfile b/Dockerfile index 59c5804..894cd46 100644 --- a/Dockerfile +++ b/Dockerfile @@ -221,7 +221,7 @@ ADD ./config/app_config.yml /cartodb/config/app_config.yml ADD ./config/database.yml /cartodb/config/database.yml ADD ./create_dev_user /cartodb/script/create_dev_user ADD ./setup_organization.sh /cartodb/script/setup_organization.sh -ADD ./config/cartodb.nginx.proxy.conf /etc/nginx/sites-enabled/default +ADD ./config/cartodb.nginx.proxy.conf /etc/nginx/nginx.conf ADD ./config/varnish.vcl /etc/varnish.vcl ADD ./geocoder.sh /cartodb/script/geocoder.sh ADD ./geocoder_server.sql /cartodb/script/geocoder_server.sql diff --git a/config/cartodb.nginx.proxy.conf b/config/cartodb.nginx.proxy.conf index 7878a98..d726e42 100644 --- a/config/cartodb.nginx.proxy.conf +++ b/config/cartodb.nginx.proxy.conf @@ -1,46 +1,80 @@ -server { - server_name cartodb.localhost *.cartodb.localhost; - - client_max_body_size 0; - - location ~* /(user/.*/)?api/v1/maps { - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_pass http://127.0.0.1:3000; - } - - location ~* /(user/.*/)?api/v1/map { - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_pass http://127.0.0.1:8181; - } - - location ~* /(user/.*)?/api/v2/sql { - # RedHog: Hack to work around bug in cartodb local hosting but using cdn for js libs - rewrite /(user/.*)?/api/v2/sql(.*) /$1/api/v2/sql$2 break; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_pass http://127.0.0.1:8080; - } - - location ^~ /assets { - root /cartodb/public; - } - - location / { - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_pass http://127.0.0.1:3000; - } - - error_log /var/log/nginx/cartodb_error.log; - access_log /var/log/nginx/cartodb_access.log; +user www-data; +worker_processes auto; +pid /run/nginx.pid; +include /etc/nginx/modules-enabled/*.conf; + +events { + worker_connections 768; +} + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + include /etc/nginx/mime.types; + default_type application/octet-stream; + + ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE + ssl_prefer_server_ciphers on; + + access_log /var/log/nginx/access.log; + error_log /var/log/nginx/error.log; + + log_format main '[$time_local] $status REQUEST: "$request" REFERER: "$http_referer" FWD_FOR "$http_x_forwarded_for" PROXY_HOST: "$proxy_host" UPSTREAM_ADDR: "$upstream_addr"'; + + gzip on; + + server { + server_name cartodb.localhost *.cartodb.localhost; + client_max_body_size 0; + + location ~* /(user/.*/)?api/v1/maps { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:3000; + } + + location ~* /(user/.*/)?api/v1/map { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Carto-Service windshaft; # tell varnish what backend + proxy_no_cache true; # Make sure nginx doesn't cache + proxy_cache_bypass true; # Make sure nginx doesn't cache + proxy_pass http://127.0.0.1:6081; # hand off to Varnish + } + + location ~* /(user/.*/)?api/v2/sql { + # RedHog: Hack to work around bug in cartodb local hosting but using cdn for js libs + rewrite /(user/.*)?/api/v2/sql(.*) /$1/api/v2/sql$2 break; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Carto-Service sqlapi; # tell varnish what backend + proxy_no_cache true; # make sure nginx doesn't cache + proxy_cache_bypass true; # make sure nginx doesn't cache + proxy_pass http://127.0.0.1:6081; # hand off to Varnish + } + + location ^~ /assets { + root /cartodb/public; + } + + location / { + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:3000; + } + + error_log /var/log/nginx/cartodb_error.log; + access_log /var/log/nginx/cartodb_access.log main; + } } From de29a46c57d2a4e1faff22fd2461ed67e45a5d23 Mon Sep 17 00:00:00 2001 From: Nick Ballenger Date: Thu, 18 Jul 2019 16:16:45 -0700 Subject: [PATCH 2/4] Changing path to nginx proxy conf file --- startup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/startup.sh b/startup.sh index 3210fbe..c40ac39 100755 --- a/startup.sh +++ b/startup.sh @@ -2,7 +2,7 @@ export CARTO_HOSTNAME=${CARTO_HOSTNAME:=$HOSTNAME} -perl -pi -e 's/cartodb\.localhost/$ENV{"CARTO_HOSTNAME"}/g' /etc/nginx/sites-enabled/default /cartodb/config/app_config.yml /Windshaft-cartodb/config/environments/development.js +perl -pi -e 's/cartodb\.localhost/$ENV{"CARTO_HOSTNAME"}/g' /etc/nginx/nginx.conf /cartodb/config/app_config.yml /Windshaft-cartodb/config/environments/development.js PGDATA=/var/lib/postgresql if [ "$(stat -c %U $PGDATA)" != "postgres" ]; then From d02a917f3898bc3feee0617612d1ffce08188d56 Mon Sep 17 00:00:00 2001 From: Nick Ballenger Date: Thu, 18 Jul 2019 16:17:28 -0700 Subject: [PATCH 3/4] Commented out 'invalidation_service' section I commented out the entire 'invalidation_service' section from app_config.yml. It _should_ be sufficient to set 'enabled' to false in that block, in order to prevent the Redis/Resque based invalidation service from being used inside the postgres trigger for invalidating cache items, but it's actually easier to just comment out the whole block. See this portion of the Carto code for reference: https://github.com/CartoDB/cartodb/blob/05a05fd6959bf4cc42480daec08d28449532cd8e/app/models/user/db_service.rb#L1062-L1070 The branch we want to go down in that code is the middle one--we want to end up with `create_function_invalidate_varnish_http` running. That will create a postgres trigger based on hitting the Varnish server's HTTP listener, which is running on 6081. (You could have it hit the telnet port by taking the third branch of that code, but given that telnet isn't included in later Varnish versions, best not to.) You want to avoid the first branch of that code, `create_function_invalidate_varnish_invalidation_service`, because it includes this line: https://github.com/CartoDB/cartodb/blob/05a05fd6959bf4cc42480daec08d28449532cd8e/app/models/user/db_service.rb#L1601 That's calling a custom Redis command, `TCH`, which is defined in a repo that Carto has not open sourced--meaning the 'invalidation service' (as a Redis job queue for the Resque job runner) can't be used in open source Carto (unless you reverse engineer the Redis commands it uses.) --- config/app_config.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/config/app_config.yml b/config/app_config.yml index 42460a5..025116e 100644 --- a/config/app_config.yml +++ b/config/app_config.yml @@ -163,14 +163,14 @@ defaults: &defaults timeout: 5 # 'warning' or 'error' trigger_verbose: true - invalidation_service: - enabled: false - host: '127.0.0.1' - port: 3142 - retries: 5 # number of retries before considering failure - critical: false # either the failure is considered an error or a warning - timeout: 5 # socket timeout - trigger_verbose: true +# invalidation_service: +# enabled: false +# host: '127.0.0.1' +# port: 3142 +# retries: 5 # number of retries before considering failure +# critical: false # either the failure is considered an error or a warning +# timeout: 5 # socket timeout +# trigger_verbose: true redis: host: '127.0.0.1' port: 6379 From f0b5b7e79f0b3f08651490f2cdc3cf58576f8abd Mon Sep 17 00:00:00 2001 From: Nick Ballenger Date: Thu, 18 Jul 2019 16:27:25 -0700 Subject: [PATCH 4/4] Updated Varnish to cache SQL API / Windshaft The previous version of this file was enough to cache requests for the SQL API, but unfortunately no traffic was ever reaching Varnish to be cached. Nginx was proxying directly to the SQL API port, and Varnish was set to listen on 6081, so it wasn't able to intercept those requests. I updated the Nginx proxy config to aim at 6081 for requests to both SQL API and Windshaft, so now Varnish is receiving traffic. However, in order to know which backend to send traffic to, I had to add a custom HTTP header in the Nginx proxy pass. That header is picked up in the `vcl_recv` varnish subroutine and used to switch between backends. Additionally I've added logic for controlling what hosts can issue an HTTP PURGE command--in this case just localhost, since everything is on a single image. The purges will typically come from a Postgres trigger. As an overview of the purge related changes, see the Varnish docs here: https://varnish-cache.org/docs/3.0/tutorial/purging.html#http-purges --- config/varnish.vcl | 50 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/config/varnish.vcl b/config/varnish.vcl index 0e3fed4..45bd7fb 100644 --- a/config/varnish.vcl +++ b/config/varnish.vcl @@ -1,4 +1,48 @@ -backend default { - .host = "127.0.0.1"; - .port = "8080"; +acl purge { + "localhost"; + "127.0.0.1"; +} + +backend sqlapi { + .host = "127.0.0.1"; + .port = "8080"; +} + +backend windshaft { + .host = "127.0.0.1"; + .port = "8181"; +} + +sub vcl_recv { + # Allowing PURGE from localhost + if (req.request == "PURGE") { + if (!client.ip ~ purge) { + error 405 "Not allowed."; + } + return (lookup); + } + + # Routing request to backend based on X-Carto-Service header from nginx + if (req.http.X-Carto-Service == "sqlapi") { + set req.backend = sqlapi; + remove req.http.X-Carto-Service; + } + if (req.http.X-Carto-Service == "windshaft") { + set req.backend = windshaft; + remove req.http.X-Carto-Service; + } +} + +sub vcl_hit { + if (req.request == "PURGE") { + purge; + error 200 "Purged."; + } +} + +sub vcl_miss { + if (req.request == "PURGE") { + purge; + error 200 "Purged."; + } }