require_relative '../../lib/importer/column' require_relative '../factories/pg_connection' require_relative '../doubles/log' require_relative '../../../../spec/rspec_configuration.rb' require_relative '../../../../spec/spec_helper' include CartoDB::Importer2 describe Column do before(:all) do @user = create_user @user.save @db = @user.in_database @db.execute('CREATE SCHEMA IF NOT EXISTS cdb_importer') @db.execute('CREATE EXTENSION IF NOT EXISTS postgis WITH SCHEMA public') @db.execute('CREATE EXTENSION IF NOT EXISTS postgis_topology') @db.execute('SET search_path TO cdb_importer,public') end before(:each) do @table_name = create_table(@db) @column_name = 'the_geom' @column = Column.new(@db, @table_name, @column_name, @user, Column::DEFAULT_SCHEMA, nil, CartoDB::Importer2::Doubles::Log.new(@user), capture_exceptions = false) @dataset = @db[@table_name.to_sym] end after(:each) do @db.drop_table?(@table_name.to_sym) end after(:all) do @db.execute('DROP SCHEMA cdb_importer CASCADE') @db.disconnect @user.destroy end describe '#type' do it 'returns the type of the column as returned by the database' do 5.times { @dataset.insert(random_hexewkb_record) } @column.type.should eq 'text' @column.geometrify @column.type.should match 'geometry' end end #type describe '#geometrify' do it "parses and updates the passed geometry column if it's in WKT" do 5.times { @dataset.insert(random_wkt_record) } @column.geometrify end it 'raises if empty geometry column' do 5.times { @dataset.insert(name: '', description:'', the_geom: '') } lambda { @column.geometrify }.should raise_error RuntimeError end it "guarantees the geometry column ends up with a geometry type" do 5.times { @dataset.insert(random_hexewkb_record) } @column.geometrify @column.type.should match /geometry/ end end #parse describe '#convert_from_wkt' do it 'populates an existing geometry column parsing its values in WKT' do @dataset.insert(random_wkt_record) @column.convert_from_wkt @column.sample.should match /^0101/ end it "raises if column contents aren't in WKT" do @dataset.insert(bogus_record) lambda { @column.convert_from_wkt }.should raise_error Sequel::DatabaseError end end #convert_from_wkt describe '#convert_from_geojson' do it 'populates an existing geometry column parsing its values in GeoJSON' do @dataset.insert(random_geojson_record) @column.convert_from_geojson @column.sample.should match /^0101/ end it "raises if column contents aren't in GeoJSON" do @dataset.insert(bogus_record) lambda { @column.convert_from_geojson }.should raise_error Sequel::DatabaseError end end describe '#convert_from_kml_point' do it 'populates an existing geometry column parsing its values in KML Point' do @dataset.insert(random_kml_point_record) @column.convert_from_kml_point @column.sample.should match /^0101/ end it "raises if column contents aren't in KML Point" do @dataset.insert(bogus_record) lambda { @column.convert_from_kml_point }.should raise_error Sequel::DatabaseError end end describe '#convert_from_kml_multi' do it 'populates an existing geometry column parsing its values in KML Multi' do @dataset.insert(random_kml_multi_record) @column.convert_from_kml_multi @column.sample.should match /^0105/ end it "raises if column contents aren't in KML Point" do @dataset.insert(bogus_record) expect { @column.convert_from_kml_multi }.to raise_error Sequel::DatabaseError end end describe '#wkb?' do it 'returns true if the passed column contains geometries in WKB' do 5.times { @dataset.insert(random_hexewkb_record) } @column.wkb?.should eq true end it 'returns false otherwise' do @dataset.insert(bogus_record) @column.wkb?.should eq false end end #wkb? describe '#geojson?' do it 'returns true if the passed column contains geometries in WKB' do 5.times { @dataset.insert(random_hexewkb_record) } @column.wkb?.should eq true end it 'returns false otherwise' do @dataset.insert(bogus_record) @column.wkb?.should eq false end end describe '#kml_point?' do it 'returns true if the passed column contains geometries in KML Point' do 5.times { @dataset.insert(random_kml_point_record) } @column.kml_point?.should eq true end it 'returns false otherwise' do @dataset.insert(bogus_record) @column.kml_point?.should eq false end end describe '#kml_multi?' do it 'returns true if the passed column contains geometries in KML Multi' do 5.times { @dataset.insert(random_kml_multi_record) } @column.kml_multi?.should eq true end it 'returns false otherwise' do @dataset.insert(bogus_record) @column.kml_multi?.should eq false end end describe '#cast_to' do it 'casts the passed column to a geometry type' do 5.times { @dataset.insert(random_hexewkb_record) } @column.type.should eq 'text' @column.cast_to('geometry') @column.type.should match 'geometry' end it "raises if column contents aren't geometries" do @dataset.insert( name: 'bogus', description: 'bogus', the_geom: 'bogus' ) expect { @column.cast_to('geometry') }.to raise_error Sequel::DatabaseError end end #cast_to describe '#sample' do it "retrieves the passed column from the first record where it isn't null" do 5.times { @dataset.insert(random_hexewkb_record) } @column.sample.should match /0101/ end it 'returns nil if no records with data in the column' do @column.sample.should_not be end end # sample describe '#records_with_data' do it 'returns a dataset with those records with data in this column' do @column.records_with_data.should be_empty @column.records_with_data.is_a?(Sequel::Postgres::Dataset).should be_true @dataset.insert(random_wkt_record) @column.records_with_data.should_not be_empty end end #records_with_data describe '#empty?' do it 'returns true if no records with data in this column' do @column.empty?.should eq true @dataset.insert(random_wkt_record) @column.empty?.should eq false end end #empty? describe '#rename_to' do it 'renames the column' do @dataset.insert(random_hexewkb_record) @column.rename_to('bogus_name') @dataset.first.keys.should include :bogus_name end it 'does nothing if the new name is the same as the current one' do @dataset.insert(random_hexewkb_record) @column.rename_to('the_geom') @dataset.first.keys.should include @column_name.to_sym end end #rename_to describe '#sanitized_name' do it 'returns a sanitized version of the column name' do Column.new(@db, @table_name, '+++sanitized+++', Column::DEFAULT_SCHEMA, nil, CartoDB::Importer2::Doubles::Log.new(@user)).sanitized_name .should eq 'sanitized' end it 'returns the same name if no sanitization needed' do Column.new(@db, @table_name, 'sanitized', Column::DEFAULT_SCHEMA, nil, CartoDB::Importer2::Doubles::Log.new(@user)).sanitized_name .should eq 'sanitized' end end #sanitized_name describe '#reserved?' do it 'returns true if name is a reserved keyword' do Column.reserved?('select').should eq true Column.reserved?('bogus').should eq false end end #reserved? describe '#unsupported?' do it 'returns true if name is not supported by Postgres' do Column.unsupported?('9name').should eq true Column.unsupported?('name9').should eq false end end #unsupported? def create_table(db, options={}) table_name = options.fetch(:table_name, "importer_#{rand(99999)}") db.drop_table?(table_name) db.create_table?(table_name) do String :name String :description String :the_geom String :ogc_fid end table_name rescue StandardError db.run(%Q{DROP TABLE "cdb_importer"."#{table_name}"}) table_name end #create_table def bogus_record { name: 'bogus', description: 'bogus', the_geom: 'bogus', ogc_fid: 1 } end #bogus_record def random_hexewkb_record random = rand(999) { name: "bogus #{rand(999)}", description: "bogus #{rand(999)}", the_geom: "0101000020E61000004486E281C5C257C068B89DDA998F4640", ogc_fid: 1 } end #random_hexewkb_record def random_wkt_record random = rand(999) { name: "bogus #{rand(999)}", description: "bogus #{rand(999)}", the_geom: 'POINT(-71.060316 48.432044)', ogc_fid: 1 } end #random_wkt_record def random_geojson_record random = rand(999) { name: "bogus #{rand(999)}", description: "bogus #{rand(999)}", the_geom: { type: "Point", coordinates: [102.0, 0.5] }.to_json, ogc_fid: 1 } end #random_geojson_record def random_kml_point_record random = rand(999) { name: "bogus #{rand(999)}", description: "bogus #{rand(999)}", the_geom: "137.625,36.975", ogc_fid: 1 } end #random_kml_point_record def random_kml_multi_record random = rand(999) { name: "bogus #{rand(999)}", description: "bogus #{rand(999)}", the_geom: %Q{ -112.2550785337791,36.07954952145647,2357 -112.2549277039738,36.08117083492122,2357 -112.2552505069063,36.08260761307279,2357 -112.2564540158376,36.08395660588506,2357 -112.2580238976449,36.08511401044813,2357 -112.2595218489022,36.08584355239394,2357 -112.2608216347552,36.08612634548589,2357 -112.262073428656,36.08626019085147,2357 -112.2633204928495,36.08621519860091,2357 -112.2644963846444,36.08627897945274,2357 -112.2656969554589,36.08649599090644,2357 }, ogc_fid: 1 } end #random_kml_multi_record LEGACY_SANITIZATION_EXAMPLES = { "abc" => "abc", "abc xyz" => "abc_xyz", "2abc" => "column_2abc", "Abc" => "_bc", "\u0432\u044b\u0445\u043b\u043e\u043f\u044b \u0430\u0432\u0442\u043e\u0442\u0440\u0430\u043d\u0441\u043f\u043e\u0440\u0442\u04302" => "_2", "\u043d\u0435\u0443\u0441\u0442\u0430\u043d\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0439 \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a2" => "_2", "\u0432\u044b\u0431\u0440\u043e\u0441\u044b \u043f\u0440\u0435\u0434\u043f\u0440\u0438\u044f\u0442\u0438\u04392" => "_2", "\u013ar" => "_r", "CONVERT(BlueNumber USING utf8)" => '_lue_umber_utf8_', "is growing site fenced?" => "is_growing_site_fenced_", "if it\u2019s a community garden, is it collective or allotment?" => "if_it_s_a_community_garden_is_it_collective_or_allotment_", "Paddock" => "_addock", "Date Due" => "_ate_ue", "__5" => "_5", "__1" => "_1", "tel\u00e9fono" => "tel_fono", ":@computed_region_wvic_k925" => "_computed_region_wvic_k925", "\u0420\u0435\u0433\u0438\u043e\u043d" => "_", "\u043d\u0435\u0443\u0441\u0442\u0430\u043d\u043e\u0432\u043b\u0435\u043d\u043d\u044b\u0439 \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a" => "_", "> min" => "_min", "12_ schedule of visits.0" => "column_12_schedule_of_visits_0", "previous rent (\u00a3 per sq ft)" => "previous_rent_per_sq_ft_", "description/\u540d\u7a31" => "description_", "description/\u5730\u5740" => "description_", "@relations" => "_relations", "EntityName" => "_ntity_ame", "_ injured" => "_injured", "trips 11 _ 15 miles" => "trips_11_15_miles", "as" => "as", "any" => "any", "xmin" => "xmin", "action" => "action", } LEGACY_SANITIZATION_COLS = { ['выбросы предприятий2', 'выхлопы автотранспорта2', 'неустановленный источник2'] => ['_2', '_2_1', '_2_2'], ["description/\u540d\u7a31", "description/\u5730\u5740"] => ["description_", "description__1"] } VERSION_2_SANITIZATION_EXAMPLES = { "abc" => "abc", "abc xyz" => "abc_xyz", "2abc" => "_2abc", "Abc" => "abc", "выхлопы автотранспорта2" => "vyxlopy_vtotr_nsport_2", "неустановленный источник2" => "neust_novlennyj_istochnik2", "выбросы предприятий2" => "vybrosy_predpriyatij2", "ĺr" => "lr", "CONVERT(BlueNumber USING utf8)" => "convert_bluenumber_using_utf8", "is growing site fenced?" => "is_growing_site_fenced", "if it’s a community garden, is it collective or allotment?" => "if_it_s_a_community_garden_is_it_collective_or_allotment", "Paddock" => "paddock", "Date Due" => "date_due", "__5" => "_5", "__1" => "_1", "teléfono" => "telefono", ":@computed_region_wvic_k925" => "computed_region_wvic_k925", "Регион" => "region", "неустановленный источник" => "neust_novlennyj_istochnik", "> min" => "min", "12_ schedule of visits.0" => "_12_schedule_of_visits_0", "previous rent (£ per sq ft)" => "previous_rent_per_sq_ft", "description/名稱" => "description", "description/地址" => "description", "@relations" => "relations", "EntityName" => "entityname", "_ injured" => "_injured", "trips 11 _ 15 miles" => "trips_11_15_miles", "as" => "_as", "any" => "_any", "xmin" => "_xmin", "action" => "_action", } VERSION_2_SANITIZATION_COLS = { ['выбросы предприятий2', 'выхлопы автотранспорта2', 'неустановленный источник2'] => ['vybrosy_predpriyatij2', 'vyxlopy_vtotr_nsport_2', 'neust_novlennyj_istochnik2'], ["description/\u540d\u7a31", "description/\u5730\u5740"] => ["description", "description_1"], ["abc", "Abc", "aBc", "ABC"] => ["abc", "abc_1", "abc_2", "abc_3"] } describe '.get_valid_column_name' do it 'can apply legacy sanitization to single columns' do LEGACY_SANITIZATION_EXAMPLES.each do |input_name, output_name| name = Column::get_valid_column_name(input_name, Column::INITIAL_COLUMN_SANITIZATION_VERSION, []) name.should eq output_name end end it 'can apply legacy sanitization to multiple columns' do LEGACY_SANITIZATION_COLS.each do |input_columns, output_columns| columns = [] input_columns.zip(output_columns).each do |input_column, output_column| column = Column::get_valid_column_name(input_column, Column::INITIAL_COLUMN_SANITIZATION_VERSION, columns) columns << column column.should eq output_column end end end it 'can apply sanitization v2 to single columns' do VERSION_2_SANITIZATION_EXAMPLES.each do |input_name, output_name| name = Column::get_valid_column_name(input_name, 2, []) name.should eq output_name end end it 'v2 sanitization is idempotent' do VERSION_2_SANITIZATION_EXAMPLES.each_key do |input_name| first_name = Column::get_valid_column_name(input_name, 2, []) second_name = Column::get_valid_column_name(first_name, 2, []) second_name.should eq first_name end end it 'can apply v2 sanitization to multiple columns' do VERSION_2_SANITIZATION_COLS.each do |input_columns, output_columns| columns = [] input_columns.zip(output_columns).each do |input_column, output_column| column = Column::get_valid_column_name(input_column, 2, columns) puts "--- ADDING COL #{column}" columns << column puts " >> #{columns.inspect}" column.should eq output_column end end end it 'multiple column sanitization is idempotent' do VERSION_2_SANITIZATION_COLS.each_key do |input_columns| columns1 = [] input_columns.each do |input_column| column1 = Column::get_valid_column_name(input_column, 2, columns1) columns1 << column1 end columns2 = [] columns1.each do |input_column| column2 = Column::get_valid_column_name(input_column, 2, columns2) columns2 << column2 column2.should eq input_column end end end end # .get_valid_column_name end # Column