cartodb-4.42/lib/carto/db/sanitize.rb
2024-04-06 05:25:13 +00:00

90 lines
4.3 KiB
Ruby

module Carto
module DB
module Sanitize
PREFIX_REPLACEMENT = 'table_'.freeze
SUFFIX_REPLACEMENT = '_t'.freeze
CHARACTER_REPLACEMENT = '_'.freeze
# See https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
MAX_IDENTIFIER_LENGTH = 63
DISALLOWED_STARTING_CHARACTERS_REGEX = /^[^a-z]+/
DISALLOWED_CHARACTERS_REGEX = /[^a-z|_|0-9]/
REPEATED_UNDERSCORES_REGEX = /_{2,}/
# PG12_DEPRECATED raster, raster_overviews and raster_columns not supported in postgis 3+
SYSTEM_TABLE_NAMES = %w(spatial_ref_sys geography_columns geometry_columns raster_columns raster_overviews
cdb_tablemetadata geometry raster).freeze
RESERVED_TABLE_NAMES = %w(layergroup all public).freeze
# See https://www.postgresql.org/docs/current/ddl-system-columns.html
SYSTEM_COLUMN_NAMES = %w(tableoid xmin cmin xmax cmax ctid).freeze
# See https://www.postgresql.org/docs/current/sql-keywords-appendix.html
RESERVED_WORDS = %w(all analyse analyze and any array as asc asymmetric authorization binary both
case cast check collate collation column concurrently constraint create cross
current_catalog current_date current_role current_schema current_time
current_timestamp current_user default deferrable desc distinct do else end
except false fetch for foreign freeze from full grant group having ilike in
initially inner intersect into is isnull join lateral leading left like limit
localtime localtimestamp natural not notnull null offset on only or order outer
overlaps placing primary references returning right select session_user similar
some symmetric table tablesample then to trailing true union unique user using
variadic verbose when where window with).freeze
ADDITIONAL_RESERVED_COLUMNS = %w(oid ogc_fid).freeze
REJECTED_COLUMN_NAMES = (SYSTEM_COLUMN_NAMES + ADDITIONAL_RESERVED_COLUMNS).freeze
# FIXME we have been reserving these name in columns but I don't know the reason ¯\_(ツ)_/¯
ADDITIONAL_WORDS = %w(between new off old format controller action).freeze
RESERVED_COLUMN_NAMES = SYSTEM_COLUMN_NAMES + RESERVED_WORDS + ADDITIONAL_WORDS
def self.append_with_truncate_and_sanitize(identifier, suffix)
suffix_length = suffix.length
unless suffix_length < MAX_IDENTIFIER_LENGTH
raise "'#{suffix}' is too long (#{suffix_length} >= #{MAX_IDENTIFIER_LENGTH}) for append"
end
truncated_identifier = if identifier.length + suffix_length > MAX_IDENTIFIER_LENGTH
identifier[0..(MAX_IDENTIFIER_LENGTH - suffix_length - 1)]
else
identifier
end
sanitize_identifier("#{truncated_identifier}#{suffix}")
end
def self.sanitize_identifier(identifier)
# Make lowercase
sanitized_identifier = identifier.downcase
# Strip of starting or ending white spaces
sanitized_identifier = sanitized_identifier.strip
# Remove disallowed starting characters
if sanitized_identifier =~ DISALLOWED_STARTING_CHARACTERS_REGEX
sanitized_identifier = PREFIX_REPLACEMENT + sanitized_identifier
end
# Replace disallowed characters with '_'
sanitized_identifier = sanitized_identifier.gsub(DISALLOWED_CHARACTERS_REGEX, CHARACTER_REPLACEMENT)
# Remove repated '_'
sanitized_identifier = sanitized_identifier.gsub(REPEATED_UNDERSCORES_REGEX, '_')
# Make sure it's not too long
sanitized_identifier = sanitized_identifier[0..(MAX_IDENTIFIER_LENGTH - 1)]
# Append _t if is a reserved word or reserved table name
# NOTE: strictly, we don't need to avoid ADDITIONAL_WORDS for table names
if (RESERVED_WORDS + RESERVED_TABLE_NAMES + SYSTEM_TABLE_NAMES + ADDITIONAL_WORDS).map(&:downcase).include?(sanitized_identifier)
sanitized_identifier += SUFFIX_REPLACEMENT
end
sanitized_identifier
end
end
end
end