create obs_meta out of dump band

This commit is contained in:
John Krauss 2016-07-11 13:40:42 -04:00
parent 6b41994a87
commit 66e2c6be54
3 changed files with 12182 additions and 8523 deletions

View File

@ -34,7 +34,7 @@ def select_star(tablename):
cdb = Dumpr('observatory.cartodb.com','') cdb = Dumpr('observatory.cartodb.com','')
metadata = ['obs_table', 'obs_column_table', 'obs_column', 'obs_column_tag', metadata = ['obs_table', 'obs_column_table', 'obs_column', 'obs_column_tag',
'obs_tag', 'obs_column_to_column', 'obs_dump_version'] 'obs_tag', 'obs_column_to_column', 'obs_dump_version', ]
fixtures = [ fixtures = [
('us.census.tiger.census_tract', 'us.census.tiger.census_tract', '2014'), ('us.census.tiger.census_tract', 'us.census.tiger.census_tract', '2014'),
@ -90,3 +90,98 @@ with open('src/pg/test/fixtures/load_fixtures.sql', 'w') as outfile:
cdb.dump(' '.join([select_star(tablename), "WHERE {}::text {} {}".format(colname, compare, where)]), cdb.dump(' '.join([select_star(tablename), "WHERE {}::text {} {}".format(colname, compare, where)]),
tablename, outfile, schema='observatory') tablename, outfile, schema='observatory')
dropfiles.write('DROP TABLE IF EXISTS observatory.{};\n'.format(tablename)) dropfiles.write('DROP TABLE IF EXISTS observatory.{};\n'.format(tablename))
outfile.write('''
CREATE TABLE observatory.obs_meta AS
SELECT numer_c.id numer_id,
denom_c.id denom_id,
geom_c.id geom_id,
MAX(numer_c.name) numer_name,
MAX(denom_c.name) denom_name,
MAX(geom_c.name) geom_name,
MAX(numer_c.description) numer_description,
MAX(denom_c.description) denom_description,
MAX(geom_c.description) geom_description,
MAX(numer_c.aggregate) numer_aggregate,
MAX(denom_c.aggregate) denom_aggregate,
MAX(geom_c.aggregate) geom_aggregate,
MAX(numer_c.type) numer_type,
MAX(denom_c.type) denom_type,
MAX(geom_c.type) geom_type,
MAX(numer_data_ct.colname) numer_colname,
MAX(denom_data_ct.colname) denom_colname,
MAX(geom_geom_ct.colname) geom_colname,
MAX(numer_geomref_ct.colname) numer_geomref_colname,
MAX(denom_geomref_ct.colname) denom_geomref_colname,
MAX(geom_geomref_ct.colname) geom_geomref_colname,
MAX(numer_t.tablename) numer_tablename,
MAX(denom_t.tablename) denom_tablename,
MAX(geom_t.tablename) geom_tablename,
MAX(numer_t.timespan) numer_timespan,
MAX(denom_t.timespan) denom_timespan,
MAX(numer_c.weight) numer_weight,
MAX(denom_c.weight) denom_weight,
MAX(geom_c.weight) geom_weight,
MAX(geom_t.timespan) geom_timespan,
MAX(geom_t.the_geom_webmercator)::geometry AS the_geom_webmercator,
ARRAY_AGG(DISTINCT s_tag.id) section_tags,
ARRAY_AGG(DISTINCT ss_tag.id) subsection_tags,
ARRAY_AGG(DISTINCT unit_tag.id) unit_tags
FROM observatory.obs_column_table numer_data_ct,
observatory.obs_table numer_t,
observatory.obs_column_table numer_geomref_ct,
observatory.obs_column geomref_c,
observatory.obs_column_to_column geomref_c2c,
observatory.obs_column geom_c,
observatory.obs_column_table geom_geom_ct,
observatory.obs_column_table geom_geomref_ct,
observatory.obs_table geom_t,
observatory.obs_column_tag ss_ctag,
observatory.obs_tag ss_tag,
observatory.obs_column_tag s_ctag,
observatory.obs_tag s_tag,
observatory.obs_column_tag unit_ctag,
observatory.obs_tag unit_tag,
observatory.obs_column numer_c
LEFT JOIN (
observatory.obs_column_to_column denom_c2c
JOIN observatory.obs_column denom_c ON denom_c2c.target_id = denom_c.id
JOIN observatory.obs_column_table denom_data_ct ON denom_data_ct.column_id = denom_c.id
JOIN observatory.obs_table denom_t ON denom_data_ct.table_id = denom_t.id
JOIN observatory.obs_column_table denom_geomref_ct ON denom_geomref_ct.table_id = denom_t.id
) ON denom_c2c.source_id = numer_c.id
WHERE numer_c.id = numer_data_ct.column_id
AND numer_data_ct.table_id = numer_t.id
AND numer_t.id = numer_geomref_ct.table_id
AND numer_geomref_ct.column_id = geomref_c.id
AND geomref_c2c.reltype = 'geom_ref'
AND geomref_c.id = geomref_c2c.source_id
AND geom_c.id = geomref_c2c.target_id
AND geom_geomref_ct.column_id = geomref_c.id
AND geom_geomref_ct.table_id = geom_t.id
AND geom_geom_ct.column_id = geom_c.id
AND geom_geom_ct.table_id = geom_t.id
AND geom_c.type ILIKE 'geometry'
AND numer_c.type NOT ILIKE 'geometry'
AND numer_t.id != geom_t.id
AND numer_c.id != geomref_c.id
AND unit_tag.type = 'unit'
AND ss_tag.type = 'subsection'
AND s_tag.type = 'section'
AND unit_ctag.column_id = numer_c.id
AND unit_ctag.tag_id = unit_tag.id
AND ss_ctag.column_id = numer_c.id
AND ss_ctag.tag_id = ss_tag.id
AND s_ctag.column_id = numer_c.id
AND s_ctag.tag_id = s_tag.id
AND (denom_c2c.reltype = 'denominator' OR denom_c2c.reltype IS NULL)
AND (denom_geomref_ct.column_id = geomref_c.id OR denom_geomref_ct.column_id IS NULL)
AND (denom_t.timespan = numer_t.timespan OR denom_t.timespan IS NULL)
GROUP BY numer_c.id, denom_c.id, geom_c.id,
numer_t.id, denom_t.id, geom_t.id;
''')
dropfiles.write('''
DROP TABLE IF EXISTS observatory.obs_meta
''')

View File

@ -20,3 +20,6 @@ DROP TABLE IF EXISTS observatory.obs_6c1309a64d8f3e6986061f4d1ca7b57743e75e74;
DROP TABLE IF EXISTS observatory.obs_d39f7fe5959891c8296490d83c22ded31c54af13; DROP TABLE IF EXISTS observatory.obs_d39f7fe5959891c8296490d83c22ded31c54af13;
DROP TABLE IF EXISTS observatory.obs_144e8b4f906885b2e057ac4842644a553ae49c6e; DROP TABLE IF EXISTS observatory.obs_144e8b4f906885b2e057ac4842644a553ae49c6e;
DROP TABLE IF EXISTS observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308; DROP TABLE IF EXISTS observatory.obs_c6fb99c47d61289fbb8e561ff7773799d3fcc308;
DROP TABLE IF EXISTS observatory.obs_meta

File diff suppressed because one or more lines are too long