cartodb-4.42/services/importer/spec/unit/column_spec.rb
2024-04-06 05:25:13 +00:00

497 lines
17 KiB
Ruby
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

require_relative '../../lib/importer/column'
require_relative '../factories/pg_connection'
require_relative '../doubles/log'
require_relative '../../../../spec/rspec_configuration.rb'
require_relative '../../../../spec/spec_helper'
include CartoDB::Importer2
describe Column do
before(:all) do
@user = create_user
@user.save
@db = @user.in_database
@db.execute('CREATE SCHEMA IF NOT EXISTS cdb_importer')
@db.execute('CREATE EXTENSION IF NOT EXISTS postgis WITH SCHEMA public')
@db.execute('CREATE EXTENSION IF NOT EXISTS postgis_topology')
@db.execute('SET search_path TO cdb_importer,public')
end
before(:each) do
@table_name = create_table(@db)
@column_name = 'the_geom'
@column = Column.new(@db, @table_name, @column_name, @user, Column::DEFAULT_SCHEMA, nil, CartoDB::Importer2::Doubles::Log.new(@user), capture_exceptions = false)
@dataset = @db[@table_name.to_sym]
end
after(:each) do
@db.drop_table?(@table_name.to_sym)
end
after(:all) do
@db.execute('DROP SCHEMA cdb_importer CASCADE')
@db.disconnect
@user.destroy
end
describe '#type' do
it 'returns the type of the column as returned by the database' do
5.times { @dataset.insert(random_hexewkb_record) }
@column.type.should eq 'text'
@column.geometrify
@column.type.should match 'geometry'
end
end #type
describe '#geometrify' do
it "parses and updates the passed geometry column if it's in WKT" do
5.times { @dataset.insert(random_wkt_record) }
@column.geometrify
end
it 'raises if empty geometry column' do
5.times { @dataset.insert(name: '', description:'', the_geom: '') }
lambda { @column.geometrify }.should raise_error RuntimeError
end
it "guarantees the geometry column ends up with a geometry type" do
5.times { @dataset.insert(random_hexewkb_record) }
@column.geometrify
@column.type.should match /geometry/
end
end #parse
describe '#convert_from_wkt' do
it 'populates an existing geometry column parsing its values in WKT' do
@dataset.insert(random_wkt_record)
@column.convert_from_wkt
@column.sample.should match /^0101/
end
it "raises if column contents aren't in WKT" do
@dataset.insert(bogus_record)
lambda { @column.convert_from_wkt }.should raise_error Sequel::DatabaseError
end
end #convert_from_wkt
describe '#convert_from_geojson' do
it 'populates an existing geometry column parsing its values in GeoJSON' do
@dataset.insert(random_geojson_record)
@column.convert_from_geojson
@column.sample.should match /^0101/
end
it "raises if column contents aren't in GeoJSON" do
@dataset.insert(bogus_record)
lambda { @column.convert_from_geojson }.should raise_error Sequel::DatabaseError
end
end
describe '#convert_from_kml_point' do
it 'populates an existing geometry column parsing its values in KML Point' do
@dataset.insert(random_kml_point_record)
@column.convert_from_kml_point
@column.sample.should match /^0101/
end
it "raises if column contents aren't in KML Point" do
@dataset.insert(bogus_record)
lambda { @column.convert_from_kml_point }.should raise_error Sequel::DatabaseError
end
end
describe '#convert_from_kml_multi' do
it 'populates an existing geometry column parsing its values in KML Multi' do
@dataset.insert(random_kml_multi_record)
@column.convert_from_kml_multi
@column.sample.should match /^0105/
end
it "raises if column contents aren't in KML Point" do
@dataset.insert(bogus_record)
expect { @column.convert_from_kml_multi }.to raise_error Sequel::DatabaseError
end
end
describe '#wkb?' do
it 'returns true if the passed column contains geometries in WKB' do
5.times { @dataset.insert(random_hexewkb_record) }
@column.wkb?.should eq true
end
it 'returns false otherwise' do
@dataset.insert(bogus_record)
@column.wkb?.should eq false
end
end #wkb?
describe '#geojson?' do
it 'returns true if the passed column contains geometries in WKB' do
5.times { @dataset.insert(random_hexewkb_record) }
@column.wkb?.should eq true
end
it 'returns false otherwise' do
@dataset.insert(bogus_record)
@column.wkb?.should eq false
end
end
describe '#kml_point?' do
it 'returns true if the passed column contains geometries in KML Point' do
5.times { @dataset.insert(random_kml_point_record) }
@column.kml_point?.should eq true
end
it 'returns false otherwise' do
@dataset.insert(bogus_record)
@column.kml_point?.should eq false
end
end
describe '#kml_multi?' do
it 'returns true if the passed column contains geometries in KML Multi' do
5.times { @dataset.insert(random_kml_multi_record) }
@column.kml_multi?.should eq true
end
it 'returns false otherwise' do
@dataset.insert(bogus_record)
@column.kml_multi?.should eq false
end
end
describe '#cast_to' do
it 'casts the passed column to a geometry type' do
5.times { @dataset.insert(random_hexewkb_record) }
@column.type.should eq 'text'
@column.cast_to('geometry')
@column.type.should match 'geometry'
end
it "raises if column contents aren't geometries" do
@dataset.insert(
name: 'bogus', description: 'bogus', the_geom: 'bogus'
)
expect { @column.cast_to('geometry') }.to raise_error Sequel::DatabaseError
end
end #cast_to
describe '#sample' do
it "retrieves the passed column from the first record
where it isn't null" do
5.times { @dataset.insert(random_hexewkb_record) }
@column.sample.should match /0101/
end
it 'returns nil if no records with data in the column' do
@column.sample.should_not be
end
end # sample
describe '#records_with_data' do
it 'returns a dataset with those records with data in this column' do
@column.records_with_data.should be_empty
@column.records_with_data.is_a?(Sequel::Postgres::Dataset).should be_true
@dataset.insert(random_wkt_record)
@column.records_with_data.should_not be_empty
end
end #records_with_data
describe '#empty?' do
it 'returns true if no records with data in this column' do
@column.empty?.should eq true
@dataset.insert(random_wkt_record)
@column.empty?.should eq false
end
end #empty?
describe '#rename_to' do
it 'renames the column' do
@dataset.insert(random_hexewkb_record)
@column.rename_to('bogus_name')
@dataset.first.keys.should include :bogus_name
end
it 'does nothing if the new name is the same as the current one' do
@dataset.insert(random_hexewkb_record)
@column.rename_to('the_geom')
@dataset.first.keys.should include @column_name.to_sym
end
end #rename_to
describe '#sanitized_name' do
it 'returns a sanitized version of the column name' do
Column.new(@db, @table_name, '+++sanitized+++', Column::DEFAULT_SCHEMA, nil, CartoDB::Importer2::Doubles::Log.new(@user)).sanitized_name
.should eq 'sanitized'
end
it 'returns the same name if no sanitization needed' do
Column.new(@db, @table_name, 'sanitized', Column::DEFAULT_SCHEMA, nil, CartoDB::Importer2::Doubles::Log.new(@user)).sanitized_name
.should eq 'sanitized'
end
end #sanitized_name
describe '#reserved?' do
it 'returns true if name is a reserved keyword' do
Column.reserved?('select').should eq true
Column.reserved?('bogus').should eq false
end
end #reserved?
describe '#unsupported?' do
it 'returns true if name is not supported by Postgres' do
Column.unsupported?('9name').should eq true
Column.unsupported?('name9').should eq false
end
end #unsupported?
def create_table(db, options={})
table_name = options.fetch(:table_name, "importer_#{rand(99999)}")
db.drop_table?(table_name)
db.create_table?(table_name) do
String :name
String :description
String :the_geom
String :ogc_fid
end
table_name
rescue StandardError
db.run(%Q{DROP TABLE "cdb_importer"."#{table_name}"})
table_name
end #create_table
def bogus_record
{
name: 'bogus',
description: 'bogus',
the_geom: 'bogus',
ogc_fid: 1
}
end #bogus_record
def random_hexewkb_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: "0101000020E61000004486E281C5C257C068B89DDA998F4640",
ogc_fid: 1
}
end #random_hexewkb_record
def random_wkt_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: 'POINT(-71.060316 48.432044)',
ogc_fid: 1
}
end #random_wkt_record
def random_geojson_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: { type: "Point", coordinates: [102.0, 0.5] }.to_json,
ogc_fid: 1
}
end #random_geojson_record
def random_kml_point_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: "<Point><coordinates>137.625,36.975</coordinates></Point>",
ogc_fid: 1
}
end #random_kml_point_record
def random_kml_multi_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: %Q{
<LineString>
<coordinates>
-112.2550785337791,36.07954952145647,2357
-112.2549277039738,36.08117083492122,2357
-112.2552505069063,36.08260761307279,2357
-112.2564540158376,36.08395660588506,2357
-112.2580238976449,36.08511401044813,2357
-112.2595218489022,36.08584355239394,2357
-112.2608216347552,36.08612634548589,2357
-112.262073428656,36.08626019085147,2357
-112.2633204928495,36.08621519860091,2357
-112.2644963846444,36.08627897945274,2357
-112.2656969554589,36.08649599090644,2357
</coordinates>
</LineString>
},
ogc_fid: 1
}
end #random_kml_multi_record
LEGACY_SANITIZATION_EXAMPLES = {
"abc" => "abc",
"abc xyz" => "abc_xyz",
"2abc" => "column_2abc",
"Abc" => "_bc",
"\u0432\u044b\u0445\u043b\u043e\u043f\u044b \u0430\u0432\u0442\u043e\u0442\u0440\u0430\u043d\u0441\u043f\u043e\u0440\u0442\u04302" => "_2",
"\u043d\u0435\u0443\u0441\u0442\u0430\u043d\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0439 \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a2" => "_2",
"\u0432\u044b\u0431\u0440\u043e\u0441\u044b \u043f\u0440\u0435\u0434\u043f\u0440\u0438\u044f\u0442\u0438\u04392" => "_2",
"\u013ar" => "_r",
"CONVERT(BlueNumber USING utf8)" => '_lue_umber_utf8_',
"is growing site fenced?" => "is_growing_site_fenced_",
"if it\u2019s a community garden, is it collective or allotment?" => "if_it_s_a_community_garden_is_it_collective_or_allotment_",
"Paddock" => "_addock",
"Date Due" => "_ate_ue",
"__5" => "_5",
"__1" => "_1",
"tel\u00e9fono" => "tel_fono",
":@computed_region_wvic_k925" => "_computed_region_wvic_k925",
"\u0420\u0435\u0433\u0438\u043e\u043d" => "_",
"\u043d\u0435\u0443\u0441\u0442\u0430\u043d\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0439 \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a" => "_",
"> min" => "_min",
"12_ schedule of visits.0" => "column_12_schedule_of_visits_0",
"previous rent (\u00a3 per sq ft)" => "previous_rent_per_sq_ft_",
"description/\u540d\u7a31" => "description_",
"description/\u5730\u5740" => "description_",
"@relations" => "_relations",
"EntityName" => "_ntity_ame",
"_ injured" => "_injured",
"trips 11 _ 15 miles" => "trips_11_15_miles",
"as" => "as",
"any" => "any",
"xmin" => "xmin",
"action" => "action",
}
LEGACY_SANITIZATION_COLS = {
['выбросы предприятий2', 'выхлопы автотранспорта2', 'неустановленный источник2'] => ['_2', '_2_1', '_2_2'],
["description/\u540d\u7a31", "description/\u5730\u5740"] => ["description_", "description__1"]
}
VERSION_2_SANITIZATION_EXAMPLES = {
"abc" => "abc",
"abc xyz" => "abc_xyz",
"2abc" => "_2abc",
"Abc" => "abc",
"выхлопы автотранспорта2" => "vyxlopy_vtotr_nsport_2",
"неустановленный источник2" => "neust_novlennyj_istochnik2",
"выбросы предприятий2" => "vybrosy_predpriyatij2",
"ĺr" => "lr",
"CONVERT(BlueNumber USING utf8)" => "convert_bluenumber_using_utf8",
"is growing site fenced?" => "is_growing_site_fenced",
"if its a community garden, is it collective or allotment?" => "if_it_s_a_community_garden_is_it_collective_or_allotment",
"Paddock" => "paddock",
"Date Due" => "date_due",
"__5" => "_5",
"__1" => "_1",
"teléfono" => "telefono",
":@computed_region_wvic_k925" => "computed_region_wvic_k925",
"Регион" => "region",
"неустановленный источник" => "neust_novlennyj_istochnik",
"> min" => "min",
"12_ schedule of visits.0" => "_12_schedule_of_visits_0",
"previous rent (£ per sq ft)" => "previous_rent_per_sq_ft",
"description/名稱" => "description",
"description/地址" => "description",
"@relations" => "relations",
"EntityName" => "entityname",
"_ injured" => "_injured",
"trips 11 _ 15 miles" => "trips_11_15_miles",
"as" => "_as",
"any" => "_any",
"xmin" => "_xmin",
"action" => "_action",
}
VERSION_2_SANITIZATION_COLS = {
['выбросы предприятий2', 'выхлопы автотранспорта2', 'неустановленный источник2'] => ['vybrosy_predpriyatij2', 'vyxlopy_vtotr_nsport_2', 'neust_novlennyj_istochnik2'],
["description/\u540d\u7a31", "description/\u5730\u5740"] => ["description", "description_1"],
["abc", "Abc", "aBc", "ABC"] => ["abc", "abc_1", "abc_2", "abc_3"]
}
describe '.get_valid_column_name' do
it 'can apply legacy sanitization to single columns' do
LEGACY_SANITIZATION_EXAMPLES.each do |input_name, output_name|
name = Column::get_valid_column_name(input_name, Column::INITIAL_COLUMN_SANITIZATION_VERSION, [])
name.should eq output_name
end
end
it 'can apply legacy sanitization to multiple columns' do
LEGACY_SANITIZATION_COLS.each do |input_columns, output_columns|
columns = []
input_columns.zip(output_columns).each do |input_column, output_column|
column = Column::get_valid_column_name(input_column, Column::INITIAL_COLUMN_SANITIZATION_VERSION, columns)
columns << column
column.should eq output_column
end
end
end
it 'can apply sanitization v2 to single columns' do
VERSION_2_SANITIZATION_EXAMPLES.each do |input_name, output_name|
name = Column::get_valid_column_name(input_name, 2, [])
name.should eq output_name
end
end
it 'v2 sanitization is idempotent' do
VERSION_2_SANITIZATION_EXAMPLES.each_key do |input_name|
first_name = Column::get_valid_column_name(input_name, 2, [])
second_name = Column::get_valid_column_name(first_name, 2, [])
second_name.should eq first_name
end
end
it 'can apply v2 sanitization to multiple columns' do
VERSION_2_SANITIZATION_COLS.each do |input_columns, output_columns|
columns = []
input_columns.zip(output_columns).each do |input_column, output_column|
column = Column::get_valid_column_name(input_column, 2, columns)
puts "--- ADDING COL #{column}"
columns << column
puts " >> #{columns.inspect}"
column.should eq output_column
end
end
end
it 'multiple column sanitization is idempotent' do
VERSION_2_SANITIZATION_COLS.each_key do |input_columns|
columns1 = []
input_columns.each do |input_column|
column1 = Column::get_valid_column_name(input_column, 2, columns1)
columns1 << column1
end
columns2 = []
columns1.each do |input_column|
column2 = Column::get_valid_column_name(input_column, 2, columns2)
columns2 << column2
column2.should eq input_column
end
end
end
end # .get_valid_column_name
end # Column