require_relative '../../lib/importer/column'
require_relative '../factories/pg_connection'
require_relative '../doubles/log'
require_relative '../../../../spec/rspec_configuration.rb'
require_relative '../../../../spec/spec_helper'
include CartoDB::Importer2
describe Column do
before(:all) do
@user = create_user
@user.save
@db = @user.in_database
@db.execute('CREATE SCHEMA IF NOT EXISTS cdb_importer')
@db.execute('CREATE EXTENSION IF NOT EXISTS postgis WITH SCHEMA public')
@db.execute('CREATE EXTENSION IF NOT EXISTS postgis_topology')
@db.execute('SET search_path TO cdb_importer,public')
end
before(:each) do
@table_name = create_table(@db)
@column_name = 'the_geom'
@column = Column.new(@db, @table_name, @column_name, @user, Column::DEFAULT_SCHEMA, nil, CartoDB::Importer2::Doubles::Log.new(@user), capture_exceptions = false)
@dataset = @db[@table_name.to_sym]
end
after(:each) do
@db.drop_table?(@table_name.to_sym)
end
after(:all) do
@db.execute('DROP SCHEMA cdb_importer CASCADE')
@db.disconnect
@user.destroy
end
describe '#type' do
it 'returns the type of the column as returned by the database' do
5.times { @dataset.insert(random_hexewkb_record) }
@column.type.should eq 'text'
@column.geometrify
@column.type.should match 'geometry'
end
end #type
describe '#geometrify' do
it "parses and updates the passed geometry column if it's in WKT" do
5.times { @dataset.insert(random_wkt_record) }
@column.geometrify
end
it 'raises if empty geometry column' do
5.times { @dataset.insert(name: '', description:'', the_geom: '') }
lambda { @column.geometrify }.should raise_error RuntimeError
end
it "guarantees the geometry column ends up with a geometry type" do
5.times { @dataset.insert(random_hexewkb_record) }
@column.geometrify
@column.type.should match /geometry/
end
end #parse
describe '#convert_from_wkt' do
it 'populates an existing geometry column parsing its values in WKT' do
@dataset.insert(random_wkt_record)
@column.convert_from_wkt
@column.sample.should match /^0101/
end
it "raises if column contents aren't in WKT" do
@dataset.insert(bogus_record)
lambda { @column.convert_from_wkt }.should raise_error Sequel::DatabaseError
end
end #convert_from_wkt
describe '#convert_from_geojson' do
it 'populates an existing geometry column parsing its values in GeoJSON' do
@dataset.insert(random_geojson_record)
@column.convert_from_geojson
@column.sample.should match /^0101/
end
it "raises if column contents aren't in GeoJSON" do
@dataset.insert(bogus_record)
lambda { @column.convert_from_geojson }.should raise_error Sequel::DatabaseError
end
end
describe '#convert_from_kml_point' do
it 'populates an existing geometry column parsing its values in KML Point' do
@dataset.insert(random_kml_point_record)
@column.convert_from_kml_point
@column.sample.should match /^0101/
end
it "raises if column contents aren't in KML Point" do
@dataset.insert(bogus_record)
lambda { @column.convert_from_kml_point }.should raise_error Sequel::DatabaseError
end
end
describe '#convert_from_kml_multi' do
it 'populates an existing geometry column parsing its values in KML Multi' do
@dataset.insert(random_kml_multi_record)
@column.convert_from_kml_multi
@column.sample.should match /^0105/
end
it "raises if column contents aren't in KML Point" do
@dataset.insert(bogus_record)
expect { @column.convert_from_kml_multi }.to raise_error Sequel::DatabaseError
end
end
describe '#wkb?' do
it 'returns true if the passed column contains geometries in WKB' do
5.times { @dataset.insert(random_hexewkb_record) }
@column.wkb?.should eq true
end
it 'returns false otherwise' do
@dataset.insert(bogus_record)
@column.wkb?.should eq false
end
end #wkb?
describe '#geojson?' do
it 'returns true if the passed column contains geometries in WKB' do
5.times { @dataset.insert(random_hexewkb_record) }
@column.wkb?.should eq true
end
it 'returns false otherwise' do
@dataset.insert(bogus_record)
@column.wkb?.should eq false
end
end
describe '#kml_point?' do
it 'returns true if the passed column contains geometries in KML Point' do
5.times { @dataset.insert(random_kml_point_record) }
@column.kml_point?.should eq true
end
it 'returns false otherwise' do
@dataset.insert(bogus_record)
@column.kml_point?.should eq false
end
end
describe '#kml_multi?' do
it 'returns true if the passed column contains geometries in KML Multi' do
5.times { @dataset.insert(random_kml_multi_record) }
@column.kml_multi?.should eq true
end
it 'returns false otherwise' do
@dataset.insert(bogus_record)
@column.kml_multi?.should eq false
end
end
describe '#cast_to' do
it 'casts the passed column to a geometry type' do
5.times { @dataset.insert(random_hexewkb_record) }
@column.type.should eq 'text'
@column.cast_to('geometry')
@column.type.should match 'geometry'
end
it "raises if column contents aren't geometries" do
@dataset.insert(
name: 'bogus', description: 'bogus', the_geom: 'bogus'
)
expect { @column.cast_to('geometry') }.to raise_error Sequel::DatabaseError
end
end #cast_to
describe '#sample' do
it "retrieves the passed column from the first record
where it isn't null" do
5.times { @dataset.insert(random_hexewkb_record) }
@column.sample.should match /0101/
end
it 'returns nil if no records with data in the column' do
@column.sample.should_not be
end
end # sample
describe '#records_with_data' do
it 'returns a dataset with those records with data in this column' do
@column.records_with_data.should be_empty
@column.records_with_data.is_a?(Sequel::Postgres::Dataset).should be_true
@dataset.insert(random_wkt_record)
@column.records_with_data.should_not be_empty
end
end #records_with_data
describe '#empty?' do
it 'returns true if no records with data in this column' do
@column.empty?.should eq true
@dataset.insert(random_wkt_record)
@column.empty?.should eq false
end
end #empty?
describe '#rename_to' do
it 'renames the column' do
@dataset.insert(random_hexewkb_record)
@column.rename_to('bogus_name')
@dataset.first.keys.should include :bogus_name
end
it 'does nothing if the new name is the same as the current one' do
@dataset.insert(random_hexewkb_record)
@column.rename_to('the_geom')
@dataset.first.keys.should include @column_name.to_sym
end
end #rename_to
describe '#sanitized_name' do
it 'returns a sanitized version of the column name' do
Column.new(@db, @table_name, '+++sanitized+++', Column::DEFAULT_SCHEMA, nil, CartoDB::Importer2::Doubles::Log.new(@user)).sanitized_name
.should eq 'sanitized'
end
it 'returns the same name if no sanitization needed' do
Column.new(@db, @table_name, 'sanitized', Column::DEFAULT_SCHEMA, nil, CartoDB::Importer2::Doubles::Log.new(@user)).sanitized_name
.should eq 'sanitized'
end
end #sanitized_name
describe '#reserved?' do
it 'returns true if name is a reserved keyword' do
Column.reserved?('select').should eq true
Column.reserved?('bogus').should eq false
end
end #reserved?
describe '#unsupported?' do
it 'returns true if name is not supported by Postgres' do
Column.unsupported?('9name').should eq true
Column.unsupported?('name9').should eq false
end
end #unsupported?
def create_table(db, options={})
table_name = options.fetch(:table_name, "importer_#{rand(99999)}")
db.drop_table?(table_name)
db.create_table?(table_name) do
String :name
String :description
String :the_geom
String :ogc_fid
end
table_name
rescue StandardError
db.run(%Q{DROP TABLE "cdb_importer"."#{table_name}"})
table_name
end #create_table
def bogus_record
{
name: 'bogus',
description: 'bogus',
the_geom: 'bogus',
ogc_fid: 1
}
end #bogus_record
def random_hexewkb_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: "0101000020E61000004486E281C5C257C068B89DDA998F4640",
ogc_fid: 1
}
end #random_hexewkb_record
def random_wkt_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: 'POINT(-71.060316 48.432044)',
ogc_fid: 1
}
end #random_wkt_record
def random_geojson_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: { type: "Point", coordinates: [102.0, 0.5] }.to_json,
ogc_fid: 1
}
end #random_geojson_record
def random_kml_point_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: "137.625,36.975",
ogc_fid: 1
}
end #random_kml_point_record
def random_kml_multi_record
random = rand(999)
{
name: "bogus #{rand(999)}",
description: "bogus #{rand(999)}",
the_geom: %Q{
-112.2550785337791,36.07954952145647,2357
-112.2549277039738,36.08117083492122,2357
-112.2552505069063,36.08260761307279,2357
-112.2564540158376,36.08395660588506,2357
-112.2580238976449,36.08511401044813,2357
-112.2595218489022,36.08584355239394,2357
-112.2608216347552,36.08612634548589,2357
-112.262073428656,36.08626019085147,2357
-112.2633204928495,36.08621519860091,2357
-112.2644963846444,36.08627897945274,2357
-112.2656969554589,36.08649599090644,2357
},
ogc_fid: 1
}
end #random_kml_multi_record
LEGACY_SANITIZATION_EXAMPLES = {
"abc" => "abc",
"abc xyz" => "abc_xyz",
"2abc" => "column_2abc",
"Abc" => "_bc",
"\u0432\u044b\u0445\u043b\u043e\u043f\u044b \u0430\u0432\u0442\u043e\u0442\u0440\u0430\u043d\u0441\u043f\u043e\u0440\u0442\u04302" => "_2",
"\u043d\u0435\u0443\u0441\u0442\u0430\u043d\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0439 \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a2" => "_2",
"\u0432\u044b\u0431\u0440\u043e\u0441\u044b \u043f\u0440\u0435\u0434\u043f\u0440\u0438\u044f\u0442\u0438\u04392" => "_2",
"\u013ar" => "_r",
"CONVERT(BlueNumber USING utf8)" => '_lue_umber_utf8_',
"is growing site fenced?" => "is_growing_site_fenced_",
"if it\u2019s a community garden, is it collective or allotment?" => "if_it_s_a_community_garden_is_it_collective_or_allotment_",
"Paddock" => "_addock",
"Date Due" => "_ate_ue",
"__5" => "_5",
"__1" => "_1",
"tel\u00e9fono" => "tel_fono",
":@computed_region_wvic_k925" => "_computed_region_wvic_k925",
"\u0420\u0435\u0433\u0438\u043e\u043d" => "_",
"\u043d\u0435\u0443\u0441\u0442\u0430\u043d\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0439 \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a" => "_",
"> min" => "_min",
"12_ schedule of visits.0" => "column_12_schedule_of_visits_0",
"previous rent (\u00a3 per sq ft)" => "previous_rent_per_sq_ft_",
"description/\u540d\u7a31" => "description_",
"description/\u5730\u5740" => "description_",
"@relations" => "_relations",
"EntityName" => "_ntity_ame",
"_ injured" => "_injured",
"trips 11 _ 15 miles" => "trips_11_15_miles",
"as" => "as",
"any" => "any",
"xmin" => "xmin",
"action" => "action",
}
LEGACY_SANITIZATION_COLS = {
['выбросы предприятий2', 'выхлопы автотранспорта2', 'неустановленный источник2'] => ['_2', '_2_1', '_2_2'],
["description/\u540d\u7a31", "description/\u5730\u5740"] => ["description_", "description__1"]
}
VERSION_2_SANITIZATION_EXAMPLES = {
"abc" => "abc",
"abc xyz" => "abc_xyz",
"2abc" => "_2abc",
"Abc" => "abc",
"выхлопы автотранспорта2" => "vyxlopy_vtotr_nsport_2",
"неустановленный источник2" => "neust_novlennyj_istochnik2",
"выбросы предприятий2" => "vybrosy_predpriyatij2",
"ĺr" => "lr",
"CONVERT(BlueNumber USING utf8)" => "convert_bluenumber_using_utf8",
"is growing site fenced?" => "is_growing_site_fenced",
"if it’s a community garden, is it collective or allotment?" => "if_it_s_a_community_garden_is_it_collective_or_allotment",
"Paddock" => "paddock",
"Date Due" => "date_due",
"__5" => "_5",
"__1" => "_1",
"teléfono" => "telefono",
":@computed_region_wvic_k925" => "computed_region_wvic_k925",
"Регион" => "region",
"неустановленный источник" => "neust_novlennyj_istochnik",
"> min" => "min",
"12_ schedule of visits.0" => "_12_schedule_of_visits_0",
"previous rent (£ per sq ft)" => "previous_rent_per_sq_ft",
"description/名稱" => "description",
"description/地址" => "description",
"@relations" => "relations",
"EntityName" => "entityname",
"_ injured" => "_injured",
"trips 11 _ 15 miles" => "trips_11_15_miles",
"as" => "_as",
"any" => "_any",
"xmin" => "_xmin",
"action" => "_action",
}
VERSION_2_SANITIZATION_COLS = {
['выбросы предприятий2', 'выхлопы автотранспорта2', 'неустановленный источник2'] => ['vybrosy_predpriyatij2', 'vyxlopy_vtotr_nsport_2', 'neust_novlennyj_istochnik2'],
["description/\u540d\u7a31", "description/\u5730\u5740"] => ["description", "description_1"],
["abc", "Abc", "aBc", "ABC"] => ["abc", "abc_1", "abc_2", "abc_3"]
}
describe '.get_valid_column_name' do
it 'can apply legacy sanitization to single columns' do
LEGACY_SANITIZATION_EXAMPLES.each do |input_name, output_name|
name = Column::get_valid_column_name(input_name, Column::INITIAL_COLUMN_SANITIZATION_VERSION, [])
name.should eq output_name
end
end
it 'can apply legacy sanitization to multiple columns' do
LEGACY_SANITIZATION_COLS.each do |input_columns, output_columns|
columns = []
input_columns.zip(output_columns).each do |input_column, output_column|
column = Column::get_valid_column_name(input_column, Column::INITIAL_COLUMN_SANITIZATION_VERSION, columns)
columns << column
column.should eq output_column
end
end
end
it 'can apply sanitization v2 to single columns' do
VERSION_2_SANITIZATION_EXAMPLES.each do |input_name, output_name|
name = Column::get_valid_column_name(input_name, 2, [])
name.should eq output_name
end
end
it 'v2 sanitization is idempotent' do
VERSION_2_SANITIZATION_EXAMPLES.each_key do |input_name|
first_name = Column::get_valid_column_name(input_name, 2, [])
second_name = Column::get_valid_column_name(first_name, 2, [])
second_name.should eq first_name
end
end
it 'can apply v2 sanitization to multiple columns' do
VERSION_2_SANITIZATION_COLS.each do |input_columns, output_columns|
columns = []
input_columns.zip(output_columns).each do |input_column, output_column|
column = Column::get_valid_column_name(input_column, 2, columns)
puts "--- ADDING COL #{column}"
columns << column
puts " >> #{columns.inspect}"
column.should eq output_column
end
end
end
it 'multiple column sanitization is idempotent' do
VERSION_2_SANITIZATION_COLS.each_key do |input_columns|
columns1 = []
input_columns.each do |input_column|
column1 = Column::get_valid_column_name(input_column, 2, columns1)
columns1 << column1
end
columns2 = []
columns1.each do |input_column|
column2 = Column::get_valid_column_name(input_column, 2, columns2)
columns2 << column2
column2.should eq input_column
end
end
end
end # .get_valid_column_name
end # Column