cartodb/services/user-mover/import_user.rb

781 lines
29 KiB
Ruby
Raw Normal View History

2020-06-15 10:58:47 +08:00
require 'pg'
require 'redis'
require 'yaml'
require 'json'
require 'logger'
require 'optparse'
require 'digest'
require 'securerandom'
require_relative 'config'
require_relative 'utils'
require_relative 'legacy_functions'
module CartoDB
module DataMover
class ImportJob
include CartoDB::DataMover::Utils
include CartoDB::DataMover::LegacyFunctions
attr_reader :logger
def initialize(options)
default_options = { data: true, metadata: true, set_banner: true, update_metadata: true }
@options = default_options.merge(options)
@config = CartoDB::DataMover::Config.config
@logger = @options[:logger] || default_logger
@@importjob_logger = @options[:import_job_logger]
@start = Time.now
@logger.debug "Starting import job with options: #{@options}"
@target_dbport = ENV['USER_DB_PORT'] || @config[:dbport]
@target_dbhost = @options[:host] || @config[:dbhost]
raise "File #{@options[:file]} does not exist!" unless File.exists?(@options[:file])
# User(s) metadata json
@pack_config = JSON::parse File.read(@options[:file])
@path = File.expand_path(File.dirname(@options[:file])) + "/"
job_uuid = @options[:job_uuid] || SecureRandom.uuid
@import_log = { job_uuid: job_uuid,
id: nil,
type: 'import',
path: @path,
start: @start,
end: nil,
elapsed_time: nil,
server: `hostname`.strip,
pid: Process.pid,
db_target: @target_dbhost,
status: nil,
trace: nil
}
@target_dbname = target_dbname
end
def run!
if @pack_config['organization']
process_org
else
process_user
end
end
def rollback!
close_all_database_connections
if @pack_config['organization']
rollback_org
else
rollback_user
end
end
def terminate_connections
@user_conn && @user_conn.close
@user_conn = nil
@superuser_user_conn && @superuser_user_conn.close
@superuser_user_conn = nil
@superuser_conn && @superuser_conn.close
@superuser_conn = nil
end
def db_exists?
superuser_pg_conn.query("select 1 from pg_database where datname = '#{@target_dbname}'").count > 0
end
private
def for_each_oauth_app_user(user_id)
Carto::User.find(user_id).oauth_app_users.each do |oau|
yield superuser_user_pg_conn, oau
end
rescue PG::Error => e
# Ignore role already exists errors
if e.message =~ /already exists/
@logger.warn "Warning: Oauth app user role already exists"
else
throw e
end
end
def process_user
@target_username = @pack_config['user']['username']
@target_userid = @pack_config['user']['id']
@import_log[:id] = @pack_config['user']['username']
@target_port = ENV['USER_DB_PORT'] || @config[:dbport]
if org_import?
@target_dbuser = database_username(@target_userid)
@target_schema = @pack_config['user']['database_schema']
@target_org_id = nil
else
organization_data = get_org_info(@options[:target_org])
@target_dbuser = database_username(@target_userid)
@target_schema = @pack_config['user']['database_schema']
@target_org_id = organization_data['id']
if owner?(organization_data)
# If the user being imported into an org is the owner of the org, we make the import as it were a single-user
@target_is_owner = true
else
# We fill the missing configuration data for the owner
organization_owner_data = get_user_info(organization_data['owner_id'])
@target_dbhost = @options[:host] || organization_owner_data['database_host']
@target_is_owner = false
end
end
if @options[:mode] == :import
import_user
elsif @options[:mode] == :rollback
rollback_user
end
end
def process_org
@organization_id = @pack_config['organization']['id']
@owner_id = @pack_config['organization']['owner_id']
@import_log[:id] = @organization_id
if @options[:mode] == :import
import_org
elsif @options[:mode] == :rollback
rollback_org
end
end
def rollback_user
if @options[:metadata]
rollback_metadata("user_#{@target_userid}_metadata_undo.sql")
rollback_redis("user_#{@target_userid}_metadata_undo.redis")
end
if @options[:data]
drop_database(@target_dbname) if @options[:drop_database] && !@options[:schema_mode]
drop_role(@target_dbuser) if @options[:drop_roles]
end
end
def import_user
begin
if @options[:metadata]
check_user_exists_redis
check_user_exists_postgres
end
rescue => e
@logger.error "Error in sanity checks: #{e}"
log_error(e)
remove_user_mover_banner(@pack_config['user']['id']) if @options[:set_banner]
throw e
end
if @options[:data]
# Password should be passed here too
create_user(@target_dbuser)
create_org_role(@target_dbname) # Create org role for the original org
create_org_owner_role(@target_dbname)
if org_import?
grant_user_org_role(@target_dbuser, @target_dbname)
end
if @target_schema != 'public'
set_user_search_path(@target_dbuser, @pack_config['user']['database_schema'])
create_public_db_user(@target_userid, @pack_config['user']['database_schema'])
end
@pack_config['roles'].each do |user, roles|
roles.each { |role| grant_user_role(user, role) }
end
begin
if @target_org_id && @target_is_owner && File.exists?("#{@path}org_#{@target_org_id}.dump")
create_db(@target_dbname, true)
create_org_oauth_app_user_roles(@target_org_id)
create_org_api_key_roles(@target_org_id)
import_pgdump("org_#{@target_org_id}.dump")
grant_org_oauth_app_user_roles(@target_org_id)
grant_org_api_key_roles(@target_org_id)
elsif File.exists? "#{@path}user_#{@target_userid}.dump"
create_db(@target_dbname, true)
create_user_oauth_app_user_roles(@target_userid)
create_user_api_key_roles(@target_userid)
import_pgdump("user_#{@target_userid}.dump")
grant_user_oauth_app_user_roles(@target_userid)
grant_user_api_key_roles(@target_userid)
elsif File.exists? "#{@path}#{@target_username}.schema.sql"
create_db(@target_dbname, false)
run_file_restore_schema("#{@target_username}.schema.sql")
end
rescue => e
begin
remove_user_mover_banner(@pack_config['user']['id'])
ensure
log_error(e)
throw e
end
end
end
if @options[:metadata]
begin
import_redis("user_#{@target_userid}_metadata.redis")
import_metadata("user_#{@target_userid}_metadata.sql")
rescue => e
rollback_metadata("user_#{@target_userid}_metadata_undo.sql")
rollback_redis("user_#{@target_userid}_metadata_undo.redis")
log_error(e)
remove_user_mover_banner(@pack_config['user']['id']) if @options[:set_banner]
throw e
end
end
if @options[:data]
configure_database(@target_dbhost)
end
if @options[:update_metadata]
update_metadata_user(@target_dbhost)
end
log_success
remove_user_mover_banner(@pack_config['user']['id']) if @options[:set_banner]
end
def import_org
import_metadata("org_#{@organization_id}_metadata.sql") if @options[:metadata]
create_org_role(@pack_config['users'][0]['database_name']) # Create org role for the original org
@pack_config['groups'].each do |group|
create_role(group['database_role'])
end
@pack_config['users'].each do |user|
# Password should be passed here too
create_user(database_username(user['id']))
create_public_db_user(user['id'], user['database_schema'])
grant_user_org_role(database_username(user['id']), user['database_name'])
end
org_user_ids = @pack_config['users'].map{|u| u['id']}
# We set the owner to be imported first (if schemas are not split, this will also import the whole org database)
org_user_ids = org_user_ids.insert(0, org_user_ids.delete(@owner_id))
org_user_ids.each do |user|
@logger.info("Importing org user #{user}..")
i = ImportJob.new(file: @path + "user_#{user}.json",
mode: @options[:mode],
host: @target_dbhost,
target_org: @pack_config['organization']['name'],
logger: @logger, data: @options[:data], metadata: @options[:metadata],
update_metadata: @options[:update_metadata])
i.run!
end
rescue => e
rollback_metadata("org_#{@organization_id}_metadata_undo.sql") if @options[:metadata]
log_error(e)
raise e
else
log_success
ensure
@pack_config['users'].each do |user|
remove_user_mover_banner(user['id']) if @options[:set_banner]
end
end
def rollback_org
db = @pack_config['users'][0]['database_name']
@pack_config['users'].each do |user|
@logger.info("Rolling back metadata for org user #{user['id']}..")
ImportJob.new(file: @path + "user_#{user['id']}.json",
mode: :rollback,
host: @target_dbhost,
target_org: @pack_config['organization']['name'],
logger: @logger,
metadata: @options[:metadata],
data: false).run!
end
rollback_metadata("org_#{@organization_id}_metadata_undo.sql") if @options[:metadata]
if @options[:data]
drop_database(db) if @options[:drop_database]
if @options[:drop_roles]
drop_role(org_role_name(db))
@pack_config['users'].each { |u| drop_role(database_username(u['id'])) }
@pack_config['groups'].each { |g| drop_role(g['database_role']) }
end
end
end
def drop_role(role)
superuser_pg_conn.query("DROP ROLE IF EXISTS \"#{role}\"")
end
def get_org_info(orgname)
result = metadata_pg_conn.query('SELECT * FROM organizations WHERE name = $1', [orgname])
raise "Organization #{orgname} not found" if result.cmd_tuples == 0
result[0]
end
def get_user_info(user_id)
result = metadata_pg_conn.query('SELECT * FROM users WHERE id = $1', [user_id])
raise "User with ID #{user_id} not found" if result.cmd_tuples == 0
result[0]
end
def set_user_statement_timeout(user, timeout)
superuser_pg_conn.query("ALTER USER #{superuser_pg_conn.quote_ident(user)} SET statement_timeout = #{timeout}")
end
def set_db_statement_timeout(db, timeout)
superuser_pg_conn.query("ALTER DATABASE #{superuser_pg_conn.quote_ident(db)} SET statement_timeout = #{timeout}")
end
def close_all_database_connections(database_name = @target_dbname)
superuser_pg_conn.query("SELECT pg_terminate_backend(pg_stat_activity.pid)
FROM pg_stat_activity
WHERE pg_stat_activity.datname = '#{database_name}'
AND pid <> pg_backend_pid();")
terminate_connections
end
def user_pg_conn
@user_conn ||= PG.connect(host: @target_dbhost,
user: @target_dbuser,
dbname: @target_dbname,
port: @config[:dbport],
connect_timeout: CartoDB::DataMover::Config.config[:connect_timeout])
end
def superuser_user_pg_conn
@superuser_user_conn ||= PG.connect(host: @target_dbhost,
user: @config[:dbuser],
dbname: @target_dbname,
port: @target_dbport,
connect_timeout: CartoDB::DataMover::Config.config[:connect_timeout])
end
def superuser_pg_conn
@superuser_conn ||= PG.connect(host: @target_dbhost,
user: @config[:dbuser],
dbname: 'postgres',
port: @target_dbport,
connect_timeout: CartoDB::DataMover::Config.config[:connect_timeout])
end
def drop_database(db_name)
close_all_database_connections(db_name)
superuser_pg_conn.query("DROP DATABASE IF EXISTS \"#{db_name}\"")
end
def clean_oids(user_id, user_schema)
tables = superuser_user_pg_conn.query("SELECT pg_class.oid, pg_class.relname
FROM pg_class inner join pg_namespace on pg_namespace.oid=pg_class.relnamespace where relkind='r' and nspname=$1;", [user_schema])
tables.each do |row|
metadata_pg_conn.query('UPDATE user_tables SET table_id=$1 where user_id = $2 and name=$3', [row['oid'], user_id, row['relname']])
end
end
def check_user_exists_postgres
@logger.debug 'Checking if user does not exist on Postgres metadata...'
result = metadata_pg_conn.query('SELECT * FROM USERS WHERE id = $1', [@target_userid])
raise "User already exists in PostgreSQL metadata" if result.cmd_tuples != 0
end
def check_user_exists_redis
@logger.debug 'Checking if user does not exist on Redis metadata...'
result = metadata_redis_conn.hgetall("rails:users:#{@target_dbname}")
raise "User already exists in Redis metadata" if result != {}
end
def conn_string(user, host, port, name)
%{#{!user ? '' : '-U ' + user} -h #{host} -p #{port} -d #{name} }
end
def run_redis_command(config)
"redis-cli -p #{config[:redis_port]} -h #{config[:redis_host]} --pipe"
end
def run_file_redis(file)
run_command("cat #{@path}#{file} | #{run_redis_command(@config)}")
end
def run_file_metadata_postgres(file)
run_command("cat #{@path}#{file} | psql -v ON_ERROR_STOP=1 #{conn_string(@config[:dbuser], @config[:dbhost], @config[:dbport], @config[:dbname])}")
end
def run_file_restore_postgres(file, sections = nil)
file_path = "#{@path}#{file}"
command = "#{pg_restore_bin_path(file_path)} -e --verbose -j4 --disable-triggers -Fc #{file_path} #{conn_string(
@config[:dbuser],
@target_dbhost,
@config[:user_dbport],
@target_dbname)}"
command += " --section=#{sections}" if sections
command += " --use-list=\"#{@toc_file}\""
run_command(command)
end
def run_file_restore_schema(file)
run_command("cat #{@path}#{file} | psql -v ON_ERROR_STOP=1 #{conn_string(
@config[:dbuser],
@target_dbhost,
@config[:user_dbport],
@target_dbname)}")
end
def import_redis(file)
@logger.info("Importing Redis keys from #{file}..")
run_file_redis(file)
end
def import_metadata(file)
@logger.info("Importing PostgreSQL metadata from #{file}..")
run_file_metadata_postgres(file)
end
def rollback_redis(file)
@logger.info("Rolling back Redis keys from #{file}..")
run_file_redis(file)
end
def rollback_metadata(file)
@logger.info("Rolling back PostgreSQL metadata from #{file}..")
run_file_metadata_postgres(file)
end
def clean_toc_file(file)
tmp = Tempfile.new("extract_#{@target_username}.txt")
File.open(file, 'r').each do |l|
tmp << l unless remove_line?(l)
end
tmp.close
FileUtils.mv(tmp.path, file)
ensure
tmp.delete
end
def toc_file(file)
toc_file = "#{@path}user_#{@target_username}.list"
command = "#{pg_restore_bin_path(file)} -l #{file} --file='#{toc_file}'"
run_command(command)
clean_toc_file(toc_file)
toc_file
end
# It would be a good idea to "disable" the invalidation trigger during the load
# This way, the restore will be much faster and won't also cause a big overhead
# in the old database while the process is ongoing
# Disabling it may be hard. Maybe it's easier to just exclude it in the export.
def import_pgdump(dump)
@logger.info("Importing dump from #{dump} using pg_restore..")
@toc_file = toc_file("#{@path}#{dump}")
run_file_restore_postgres(dump, 'pre-data')
run_file_restore_postgres(dump, 'data')
run_file_restore_postgres(dump, 'post-data')
end
def create_user(username, password = nil)
@logger.info "Creating user #{username} on target db.."
begin
superuser_pg_conn.query("CREATE ROLE \"#{username}\";
ALTER ROLE \"#{username}\" WITH NOSUPERUSER INHERIT NOCREATEROLE NOCREATEDB LOGIN NOREPLICATION;
GRANT publicuser TO \"#{username}\";")
superuser_pg_conn.query("ALTER ROLE #{username} WITH PASSWORD '#{password}'") unless password.nil?
rescue PG::Error => e
@logger.info "Target Postgres role already exists: #{e.inspect}"
end
end
def create_org_api_key_roles(org_id)
Carto::Organization.find(org_id).users.each { |u| create_user_api_key_roles(u.id) }
end
def create_user_api_key_roles(user_id)
Carto::User.find(user_id).api_keys.select(&:needs_setup?).each do |k|
begin
k.role_creation_queries.each { |q| superuser_user_pg_conn.query(q) }
rescue PG::Error => e
# Ignore role already exists errors
throw e unless e.message =~ /already exists/
end
end
end
def grant_org_api_key_roles(org_id)
Carto::Organization.find(org_id).users.each { |u| grant_user_api_key_roles(u.id) }
end
def grant_user_api_key_roles(user_id)
Carto::User.find(user_id).api_keys.select(&:needs_setup?).each do |k|
k.role_permission_queries.each { |q| superuser_user_pg_conn.query(q) }
k.grant_ownership_role_privileges
end
end
def create_org_oauth_app_user_roles(org_id)
Carto::Organization.find(org_id).users.each { |u| create_user_oauth_app_user_roles(u.id) }
end
def create_user_oauth_app_user_roles(user_id)
for_each_oauth_app_user(user_id) do |conn, oau|
conn.query(oau.create_dataset_role_query)
end
# different loops to avoid failing to create ownership role due to an error in the dataset role
for_each_oauth_app_user(user_id) do |conn, oau|
conn.query(oau.create_ownership_role_query)
end
end
def grant_org_oauth_app_user_roles(org_id)
Carto::Organization.find(org_id).users.each { |u| grant_user_oauth_app_user_roles(u.id) }
end
def grant_user_oauth_app_user_roles(user_id)
Carto::User.find(user_id).oauth_app_users.each do |oau|
begin
oau.grant_dataset_role_privileges
rescue Carto::OauthProvider::Errors::InvalidScope => e
# Ignore managed oauth_app_user errors
@logger.error "Error granting permissions to dataset role: #{e}"
end
begin
oau.grant_ownership_role_privileges
rescue Carto::OauthProvider::Errors::InvalidScope => e
# Ignore managed oauth_app_user errors
@logger.error "Error granting permissions to ownership role: #{e}"
end
end
end
def org_role_name(database_name)
"cdb_org_member_#{Digest::MD5.hexdigest(database_name)}"
end
def org_owner_role_name(database_name)
"#{database_name}_a"
end
def grant_user_role(user, role)
superuser_pg_conn.query("GRANT \"#{role}\" TO \"#{user}\"")
end
def grant_user_org_role(user, database_name)
grant_user_role(user, org_role_name(database_name))
end
def create_role(role, createrole = false)
@logger.info "Creating role #{role} on target db.."
begin
superuser_pg_conn.query("CREATE ROLE \"#{role}\" #{createrole ? 'CREATEROLE' : ''} NOLOGIN;")
rescue PG::Error => e
@logger.info "Target org role already exists: #{e.inspect}"
end
end
def create_org_role(database_name)
create_role(org_role_name(database_name))
end
def create_org_owner_role(database_name)
create_role(org_owner_role_name(database_name), true)
end
def create_public_db_user(user_id, schema)
user = "cartodb_publicuser_#{user_id}"
create_user(user)
superuser_pg_conn.query("GRANT publicuser TO \"#{user}\"")
set_user_search_path(user, schema)
end
def set_user_search_path(user, search_path_prefix)
search_path = CartoDB::UserModule::DBService.build_search_path(search_path_prefix)
superuser_pg_conn.query("ALTER USER \"#{user}\" SET search_path= #{search_path}")
end
def create_db(dbname, blank)
# Blank is when the database should be created empty (will receive a pg_dump).
# blank = false: it should have postgis, cartodb/cdb_importer/cdb schemas
# connect as superuser (postgres)
@logger.info "Creating user DB #{dbname}..."
begin
if blank
superuser_pg_conn.query("CREATE DATABASE \"#{dbname}\"")
else
superuser_pg_conn.query("CREATE DATABASE \"#{dbname}\" WITH TEMPLATE template_postgis")
end
# This rescue can be improved a little bit. The way it is it assumes that the error
# will always be that the db already exists
rescue PG::Error => e
if blank
@logger.error "Error: Database already exists"
raise e
else
@logger.warn "Warning: Database already exists"
end
end
setup_db(dbname) unless blank
end
def setup_db(_dbname)
['plpythonu', 'postgis'].each do |extension|
superuser_user_pg_conn.query("CREATE EXTENSION IF NOT EXISTS #{extension}")
end
cartodb_schema = superuser_user_pg_conn.query("SELECT nspname FROM pg_catalog.pg_namespace where nspname = 'cartodb'")
superuser_user_pg_conn.query("CREATE SCHEMA cartodb") if cartodb_schema.count == 0
cdb_importer_schema = superuser_user_pg_conn.query("SELECT nspname FROM pg_catalog.pg_namespace where nspname = 'cdb_importer'")
superuser_user_pg_conn.query("CREATE SCHEMA cdb_importer") if cdb_importer_schema.count == 0
cdb_schema = superuser_user_pg_conn.query("SELECT nspname FROM pg_catalog.pg_namespace where nspname = 'cdb'")
superuser_user_pg_conn.query("CREATE SCHEMA cdb") if cdb_schema.count == 0
superuser_user_pg_conn.query("CREATE EXTENSION IF NOT EXISTS cartodb WITH SCHEMA cartodb")
superuser_user_pg_conn.query("SELECT CDB_DisableGhostTablesTrigger()")
rescue PG::Error => e
@logger.error "Error: Cannot setup DB"
raise e
end
def update_database_retries(userid, username, db_host, db_name, retries = 1)
update_database(userid, username, db_host, db_name)
rescue => e
@logger.error "Error updating database"
if retries > 0
@logger.info "Retrying..."
update_database_retries(userid, username, db_host, db_name, retries - 1)
else
@logger.info "No more retries"
throw e
end
end
def update_database(userid, username, db_host, db_name)
update_postgres_database_host(userid, db_host)
update_redis_database_host(username, db_host)
update_postgres_database_name(userid, db_name)
update_redis_database_name(username, db_name)
end
def update_postgres_database_host(userid, db)
@logger.info "Updating PostgreSQL database_host..."
metadata_pg_conn.exec("UPDATE users SET database_host = $1 WHERE id = $2", [db, userid])
end
def update_postgres_organization(userid, org_id)
@logger.info "Updating PostgreSQL organization..."
metadata_pg_conn.exec("UPDATE users SET organization_id = $1 WHERE id = $2", [org_id, userid])
end
def update_redis_database_host(user, db)
@logger.info "Updating Redis database_host..."
metadata_redis_conn.hset("rails:users:#{user}", 'database_host', db)
end
def update_postgres_database_name(userid, db)
@logger.info "Updating PostgreSQL database_name..."
metadata_pg_conn.exec("UPDATE users SET database_name = $1 WHERE id = $2", [db, userid])
end
def update_redis_database_name(user, db)
@logger.info "Updating Redis database_name..."
metadata_redis_conn.hset("rails:users:#{user}", 'database_name', db)
end
def configure_database(target_dbhost)
# Note: this will change database_host on the user model to perform configuration but will not actually store
# the change
user_model = ::User.find(username: @target_username)
user_model.database_host = target_dbhost
user_model.database_name = @target_dbname
user_model.organization_id = @target_org_id if !@target_org_id.nil?
user_model.db_service.setup_organization_owner if @target_is_owner
user_model.db_service.monitor_user_notification # Used to inform the database_server
user_model.db_service.configure_database
user_model.db_service.monitor_user_notification
end
def update_metadata_user(target_dbhost)
user_model = ::User.find(username: @target_username)
orig_dbhost = user_model.database_host
changed_metadata = false
begin
clean_oids(@target_userid, @target_schema)
if @target_org_id
update_postgres_organization(@target_userid, @target_org_id)
else
update_postgres_organization(@target_userid, nil)
end
begin
update_database_retries(@target_userid, @target_username, target_dbhost, @target_dbname, 1)
changed_metadata = true
user_model.reload
end
rescue => e
if changed_metadata
update_database_retries(@target_userid, @target_username, orig_dbhost, @target_dbname, 1)
end
log_error(e)
remove_user_mover_banner(@pack_config['user']['id']) if @options[:set_banner]
throw e
end
end
def importjob_logger
@@importjob_logger ||= CartoDB.unformatted_logger("#{Rails.root}/log/datamover.log")
end
def log_error(e)
@logger.error e
@import_log[:end] = Time.now
@import_log[:elapsed_time] = (@import_log[:end] - @import_log[:start]).ceil
@import_log[:status] = 'failure'
@import_log[:trace] = e.to_s
importjob_logger.info(@import_log.to_json)
end
def log_success
@import_log[:end] = Time.now
@import_log[:elapsed_time] = (@import_log[:end] - @import_log[:start]).ceil
@import_log[:status] = 'success'
importjob_logger.info(@import_log.to_json)
end
def pg_restore_bin_path(dump)
get_pg_restore_bin_path(superuser_pg_conn, dump)
end
def target_dbname
return @pack_config['users'][0]['database_name'] if @pack_config['organization']
@target_userid = @pack_config['user']['id']
if org_import?
user_database(@target_userid)
else
organization_data = get_org_info(@options[:target_org])
if owner?(organization_data)
user_database(@target_userid)
else
organization_owner_data = get_user_info(organization_data['owner_id'])
organization_owner_data['database_name']
end
end
end
def owner?(organization_data)
@pack_config['user']['id'] == organization_data['owner_id']
end
def org_import?
@options[:target_org] == nil
end
def organization_import?
@pack_config['organization'] != nil
end
end
end
end