diff --git a/carto-package.json b/carto-package.json new file mode 100644 index 0000000..8b48ec0 --- /dev/null +++ b/carto-package.json @@ -0,0 +1,9 @@ +{ + "name": "observatory-server-extension", + "current_version": { + "requires": { + "postgresql": "^10.0.0", + "postgis": "^2.4.0.0" + } + } +} diff --git a/docs/examples/01-measures-functions.md b/docs/examples/01-measures-functions.md new file mode 100644 index 0000000..7221eac --- /dev/null +++ b/docs/examples/01-measures-functions.md @@ -0,0 +1,185 @@ + +## Measures functions examples + +- Add a measure to an empty numeric column based on point locations in your table. + +```SQL +UPDATE tablename +SET total_population = OBS_GetUSCensusMeasure(the_geom, 'Total Population') + + +- Add a measure to an empty numeric column based on polygons in your table + +```SQL +UPDATE tablename +SET local_male_population = OBS_GetUSCensusMeasure(the_geom, 'Male Population') +``` + +- Add a measure to an empty numeric column based on point locations in your table + +```SQL +UPDATE tablename +SET median_home_value_sqft = OBS_GetMeasure(the_geom, 'us.zillow.AllHomes_MedianValuePerSqft') +``` + + +- Add a measure to an empty column based on polygons in your table + +```SQL +UPDATE tablename +SET household_count = OBS_GetMeasure(the_geom, 'us.census.acs.B11001001') +``` + + +- Add the Category to an empty column text column based on point locations in your table + +```SQL +UPDATE tablename +SET segmentation = OBS_GetCategory(the_geom, 'us.census.spielman_singleton_segments.X55') +``` + + +- Obtain metadata that can augment with one additional column of US population +data, using a boundary relevant for the geometry provided and latest timespan. +Limit to only the most recent column most relevant to the extent & density of +input geometries in `tablename`. + +```SQL +SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001"}]', + 1, 1, + COUNT(*) +) FROM tablename +``` + +- Obtain metadata that can augment with one additional column of US population +data, using census tract boundaries. + +```SQL +SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.census_tract"}]', + 1, 1, + COUNT(*) +) FROM tablename +``` + +- Obtain metadata that can augment with two additional columns, one for total +population and one for male population. + +```SQL +SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]', + 1, 1, + COUNT(*) +) FROM tablename +``` + + +- Validate metadata with two additional columns of US census data; using a boundary relevant for the geometry provided and the latest timespan. Limited to the most recent column, and the most relevant, based on the extent and density of input geometries in `tablename`. + +```SQL +SELECT OBS_MetadataValidation( + ST_SetSRID(ST_Extent(the_geom), 4326), + ST_GeometryType(the_geom), + '[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]', + COUNT(*)::INTEGER +) FROM tablename +GROUP BY ST_GeometryType(the_geom) +``` + + +- Obtain population densities for every geometry in a table, keyed by cartodb_id: + +```SQL +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001"}]', + 1, 1, COUNT(*) +) meta FROM tablename) +SELECT id AS cartodb_id, (data->0->>'value')::Numeric AS pop_density +FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename), + (SELECT meta FROM meta)) +``` + +- Update a table with a blank numeric column called `pop_density` with population +densities: + +```SQL +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001"}]', + 1, 1, COUNT(*) +) meta FROM tablename), +data AS ( + SELECT id AS cartodb_id, (data->0->>'value')::Numeric AS pop_density + FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename), + (SELECT meta FROM meta))) +UPDATE tablename +SET pop_density = data.pop_density +FROM data +WHERE cartodb_id = data.id +``` + +- Update a table with two measurements at once, population density and household +density. The table should already have a Numeric column `pop_density` and +`household_density`. + +```SQL +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom),4326), + '[{"numer_id": "us.census.acs.B01003001"},{"numer_id": "us.census.acs.B11001001"}]', + 1, 1, COUNT(*) +) meta from tablename), +data AS ( + SELECT id, + data->0->>'value' AS pop_density, + data->1->>'value' AS household_density + FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename), + (SELECT meta FROM meta))) +UPDATE tablename +SET pop_density = data.pop_density, + household_density = data.household_density +FROM data +WHERE cartodb_id = data.id +``` + + +- Obtain population densities for every row of a table with FIPS code county IDs +(USA). + +```SQL +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]' +) meta FROM tablename) +SELECT id AS fips, (data->0->>'value')::Numeric AS pop_density +FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename), + (SELECT meta FROM meta)) +``` + +- Update a table with population densities for every FIPS code county ID (USA). +This table has a blank column called `pop_density` and fips codes stored in a +column `fips`. + +```SQL +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]' +) meta FROM tablename), +data as ( + SELECT id AS fips, (data->0->>'value') AS pop_density + FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename), + (SELECT meta FROM meta))) +UPDATE tablename +SET pop_density = data.pop_density +FROM data +WHERE fips = data.id +``` diff --git a/docs/examples/02-boundary-functions.md b/docs/examples/02-boundary-functions.md new file mode 100644 index 0000000..8339e38 --- /dev/null +++ b/docs/examples/02-boundary-functions.md @@ -0,0 +1,76 @@ +- Insert all Census Tracts from Lower Manhattan and nearby areas within the supplied bounding box to a table named `manhattan_census_tracts` which has columns `the_geom` (geometry) and `geom_refs` (text). + +```sql +INSERT INTO manhattan_census_tracts(the_geom, geom_refs) +SELECT * +FROM OBS_GetBoundariesByGeometry( + ST_MakeEnvelope(-74.0251922607,40.6945658517, + -73.9651107788,40.7377626342, + 4326), + 'us.census.tiger.census_tract') +``` + +- Insert points that lie on Census Tracts from Lower Manhattan and nearby areas within the supplied bounding box to a table named `manhattan_tract_points` which has columns `the_geom` (geometry) and `geom_refs` (text). + +```sql +INSERT INTO manhattan_tract_points (the_geom, geom_refs) +SELECT * +FROM OBS_GetPointsByGeometry( + ST_MakeEnvelope(-74.0251922607,40.6945658517, + -73.9651107788,40.7377626342, + 4326), + 'us.census.tiger.census_tract') +``` + + +- Overwrite a point geometry with a boundary geometry that contains it in your table + +```SQL +UPDATE tablename +SET the_geom = OBS_GetBoundary(the_geom, 'us.census.tiger.block_group') +``` + + +- Write the US Census block group geoid that contains the point geometry for every row as a new column in your table. + +```SQL +UPDATE tablename +SET geometry_id = OBS_GetBoundaryId(the_geom, 'us.census.tiger.block_group') +``` + + +- Use a table of `geometry_id`s (e.g., geoid from the U.S. Census) to select the unique boundaries that they correspond to and insert into a table called, `overlapping_polygons`. This is a useful method for creating new choropleths of aggregate data. + +```SQL +INSERT INTO overlapping_polygons (the_geom, geometry_id, point_count) +SELECT + OBS_GetBoundaryById(geometry_id, 'us.census.tiger.county') As the_geom, + geometry_id, + count(*) +FROM tablename +GROUP BY geometry_id +``` + + +- Insert into table `denver_census_tracts` the census tract boundaries and geom_refs of census tracts which intersect within 10 miles of downtown Denver, Colorado. + +```sql +INSERT INTO denver_census_tracts(the_geom, geom_refs) +SELECT * +FROM OBS_GetBoundariesByPointAndRadius( + CDB_LatLng(39.7392, -104.9903), -- Denver, Colorado + 10000 * 1.609, -- 10 miles (10km * conversion to miles) + 'us.census.tiger.census_tract') +``` + + +- Insert into table `denver_tract_points` points on US census tracts and their corresponding geoids for census tracts which intersect within 10 miles of downtown Denver, Colorado, USA. + +```sql +INSERT INTO denver_tract_points(the_geom, geom_refs) +SELECT * +FROM OBS_GetPointsByPointAndRadius( + CDB_LatLng(39.7392, -104.9903), -- Denver, Colorado + 10000 * 1.609, -- 10 miles (10km * conversion to miles) + 'us.census.tiger.census_tract') +``` \ No newline at end of file diff --git a/docs/examples/03-discovery-functions.md b/docs/examples/03-discovery-functions.md new file mode 100644 index 0000000..679192f --- /dev/null +++ b/docs/examples/03-discovery-functions.md @@ -0,0 +1,160 @@ + +```SQL +SELECT * FROM OBS_Search('home value') +``` + +```SQL +SELECT * FROM OBS_GetAvailableBoundaries(CDB_LatLng(40.7, -73.9)) +``` + +- Obtain all numerators that are available within a small rectangle. + +```SQL +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326)) +``` + +- Obtain all numerators that are available within a small rectangle and are for +the United States only. + +```SQL +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}'); +``` + +- Obtain all numerators that are available within a small rectangle and are +employment related for the United States only. + +```SQL +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment}'); +``` + +- Obtain all numerators that are available within a small rectangle and are +related to both employment and age & gender for the United States only. + +```SQL +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment, subsection/tags.age_gender}'); +``` + +- Obtain all numerators that work with US population (`us.census.acs.B01003001`) +as a denominator. + +```SQL +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001') +WHERE valid_denom IS True; +``` + +- Obtain all numerators that work with US states (`us.census.tiger.state`) +as a geometry. + +```SQL +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state') +WHERE valid_geom IS True; +``` + +- Obtain all numerators available in the timespan `2011 - 2015`. + +```SQL +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015') +WHERE valid_timespan IS True; +``` + +- Obtain all denominators that are available within a small rectangle. + +```SQL +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326)); +``` + +- Obtain all denominators that are available within a small rectangle and are for +the United States only. + +```SQL +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}'); +``` + +- Obtain all denominators for male population (`us.census.acs.B01001002`). + +```SQL +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01001002') +WHERE valid_numer IS True; +``` + +- Obtain all denominators that work with US states (`us.census.tiger.state`) +as a geometry. + +```SQL +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state') +WHERE valid_geom IS True; +``` + +- Obtain all denominators available in the timespan `2011 - 2015`. + +```SQL +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015') +WHERE valid_timespan IS True; +``` + +- Obtain all geometries that are available within a small rectangle. + +```SQL +SELECT * FROM OBS_GetAvailableGeometries( + ST_MakeEnvelope(-74, 41, -73, 40, 4326)); +``` + +- Obtain all geometries that are available within a small rectangle and are for +the United States only. + +```SQL +SELECT * FROM OBS_GetAvailableGeometries( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}'); +``` + +- Obtain all geometries that work with total population (`us.census.acs.B01003001`). + +```SQL +SELECT * FROM OBS_GetAvailableGeometries( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001') +WHERE valid_numer IS True; +``` + +- Obtain all geometries with timespan `2015`. + +```SQL +SELECT * FROM OBS_GetAvailableGeometries( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2015') +WHERE valid_timespan IS True; +``` + +- Obtain all timespans that are available within a small rectangle. + +```SQL +SELECT * FROM OBS_GetAvailableTimespans( + ST_MakeEnvelope(-74, 41, -73, 40, 4326)); +``` + +- Obtain all timespans for total population (`us.census.acs.B01003001`). + +```SQL +SELECT * FROM OBS_GetAvailableTimespans( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001') +WHERE valid_numer IS True; +``` + +- Obtain all timespans that work with US states (`us.census.tiger.state`) +as a geometry. + +```SQL +SELECT * FROM OBS_GetAvailableTimespans( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, 'us.census.tiger.state') +WHERE valid_geom IS True; +``` \ No newline at end of file diff --git a/docs/examples/examples.json b/docs/examples/examples.json new file mode 100644 index 0000000..1da51f8 --- /dev/null +++ b/docs/examples/examples.json @@ -0,0 +1,107 @@ +{ + "main": { + "file": "import/import-from-database.md" + }, + "categories": [ + { + "title": "Import", + "samples": [ + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import standard table", + "desc": "Import standard table into your CARTO account.", + "file": "import/import-standard-table.md" + }, + { + "title": "Import sync table", + "desc": "Import sync table into your CARTO account from database.", + "file": "import/import-sync-table.md" + }, + { + "title": "Import sync table as dataset", + "desc": "Import sync table as dataset into your CARTO account.", + "file": "import/import-from-database.md" + } + ] + }, + { + "title": "Export", + "samples": [ + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + } + ] + }, + { + "title": "Tables", + "samples": [ + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + } + ] + }, + { + "title": "Misc", + "samples": [ + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + }, + { + "title": "Import from database", + "desc": "Import data into your CARTO account from database.", + "file": "import/import-from-database.md" + } + ] + } + ] +} diff --git a/docs/guides/01-overview.md b/docs/guides/01-overview.md new file mode 100644 index 0000000..a30d871 --- /dev/null +++ b/docs/guides/01-overview.md @@ -0,0 +1,88 @@ +## Overview + +For Enterprise account plans, the [Data Observatory](https://carto.com/data) provides access to a searchable catalog of advanced location data, such as census block, population segments, boundaries and so on. A set of SQL functions allow you to augment your own data and broaden your analysis by discovering boundaries and measures of data from this catalog. + +This section describes the Data Observatory functions and the type of data that it returns. + +### Functions Overview + +There are several functions for accessing different categories of data into your visualizations. You can discover and retrieve data by requesting OBS functions from the Data Observatory. These Data Observatory functions are designed for specific, targeted methods of data analysis. The response for these functions are classified into two primary types of data results; measures and boundaries. + +- Boundaries are the geospatial boundaries you need to map or aggregate your data. Examples include Country Borders, Zip Code Tabulation Areas, and Counties + +- Measures are the various dimensions of information that CARTO can tell you about a place. Examples include, Population, Household Income, and Median Age + +Depending on the OBS function, you will get one, or both, types of data in your result. See [Measures and Boundary Data](#measures-and-boundary-results) for details about available data. + +#### Measures Functions + +Use location-based measures to analyze your data by accessing population and industry measurements at point locations, or within a region or polygon. These include variables for demographic, economic, and other types of information. + +- See [Measures Functions]({{ site.dataobservatory_docs }}/reference/#measures-functions) for specific OBS functions +- Returns Measures data results + +#### Boundary Functions + +Use global boundaries to analyze your data by accessing multi-scaled geometries for visualizations. Examples include US Block Groups and Census Tracts. These enable you to aggregate your data into geometric polygons. You can also use your own data to query specific boundaries. + +- See [Boundary Functions]({{ site.dataobservatory_docs }}/reference/#boundary-functions) for specific OBS functions +- Returns Boundary data results + +#### Discovery Functions + +Discovery Functions provide easier ways for you to find Measures and Boundaries of interest in the Data Observatory. The Discovery functions allow you to perform targeted searches for Measures, or use your own data to discover what is available at a given location. As this is a **retrieval tool** of the Data Observatory, the query results do not change your table. The response back displays one or more identifiers as matches to your search criteria. Each unique identifier can _then_ be used as part of other OBS functions to access any of the other Data Observatory functions. + +- See [Discovery Functions]({{ site.dataobservatory_docs }}/reference/#discovery-functions) for specific OBS functions +- Returns Boundary or Measures matches for your data + +### Measures and Boundary Results + +The response from the Data Observatory functions are classified as either Measures or Boundary. Depending on your OBS function, you will get one, or both, types of data in your result. + +#### Measures Data + +Measures provide details about local populations, markets, industries and other dimensions. You can search for available Measures using the Discovery functions, or by viewing the Data Catalog. Measures can be requested for Point locations, or can be summarized for Polygons (regions). In general, Point location requests will return raw aggregate values (e.g. Median Rent), or will provide amounts per square kilometer (e.g. Population). The total square kilometers of the area searched will be returned, allowing you to get raw counts, if needed. Alternatively, if you search over a polygon, raw counts will be returned. + +The following table indicates where Measures data results are available. Measures can include raw measures and when indicated, can provide geometries. + +Data Category | Examples | Type of Data Response | Availability +--- | --- +Housing | Vacant Housing Units, Median Rent, Units for Sale, Mortgage Count | Point measurement, Area measurement, With Geo Border | United States +Income | Median Household Income, Gini Index | Point measurement, Area measurement, With Geo Border | United States +Education | Students Enrolled in School, Population Completed H.S | Point measurement, Area measurement, With Geo Border | United States +Languages | Speaks Spanish at Home, Speaks only English at Home | Point measurement, Area measurement, With Geo Border | United States +Employment | Workers over the Age of 16 | Point measurement, Area measurement, With Geo Border | United States +Jobs and Workforce | Origin-Destination of Workforce, Job Wages by job type | Point measurement, Area measurement, With Geo Border | United States +Transportation | Commuters by Public Transportation, Work at Home | Point measurement, Area measurement, With Geo Border | United States +Race, Age and Gender | Asian Population, Median Age, Job wages by race | Point measurement, Area measurement, With Geo Border | United States, Spain +Population | Population per Square Kilometer | Point measurement, Area measurement | United States, Spain + +#### Boundary Data + +The following table indicates where Boundary data results are available. + +Boundary Name | Availability +--- | --- +Countries | Global +First-level administrative subdivisions | Global +Second-level administrative subdivisions | United States +Zip Code Tabulation Areas (ZCTA) | United States +Congressional Districts | United States +Digital Marketing Areas | United States +Census Public Use Microdata Areas | United States +Census Tracts |United States +Census Block Groups | United States +US Census Blocks | United States +Disputed Areas | Global +Marine Area | Global +Oceans | Global +Continents | Global +Timezones | Global + +##### Water Clipping Levels + +Many geometries come with various degrees of water accuracy (how closely they follow features such as coastlines). Water clipping refers to how the level of accuracy is returned by the Data Observatory. Data results can either include no clip (no water areas are clipped in the geometry), or high clip (coastlines and inland waterways are clipped out of the final geometry). For example, US Census data might only show coastlines as a straight border line, and not as an inland water area. To find out which levels of water clipping are available for Boundary layers, refer to the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html). + +**Note:** While high clip water levels may be better for some kinds of maps and analysis, this type of data consumes more account storage space and may be subject to quota limitations. + +For details about how to access any of this data, see [Accessing the Data Observatory]({{ site.dataobservatory_docs }}/guides/accesssing-the-data-observatory/). diff --git a/docs/guides/02-accessing-the-data-observatory.md b/docs/guides/02-accessing-the-data-observatory.md new file mode 100644 index 0000000..96825de --- /dev/null +++ b/docs/guides/02-accessing-the-data-observatory.md @@ -0,0 +1,121 @@ +## Accessing the Data Observatory + +The workflow for accessing the Data Observatory includes using a SQL query to apply a specific method of data enrichment or analysis to your data. You can access the Data Observatory by applying a custom query in CARTO Builder, or directly through the SQL API. + +#### Prerequisites + +You must have an Enterprise account and be familiar with using SQL requests. + +- The Data Observatory catalog includes data that is managed by CARTO, on a SaaS cloud platform. For Enterprise users, the Data Observatory can be enabled by contacting CARTO. + +- A set of Data Observatory functions (prefaced with "OBS" for Observatory), allow you to retrieve boundaries and measures data through a SQL request. These functions should be used with UPDATE and INSERT statements, not SELECT statements, as we are currently not supporting dynamic use of the Data Observatory + +**Tip:** See the recommended [Best Practices](#best-practices) for using the Data Observatory. + +### Enrich from Data Observatory + +As an alternative to using SQL queries, you can apply the _Enrich from Data Observatory_ ANALYSIS to a selected map layer in CARTO Builder. This enables you add a new column with contextual demographic and economic measures, without having to apply the code yourself. For details, see the [Enrich from Data Observatory Guide](https://carto.com/learn/guides/analysis/enrich-from-data-observatory) in our Learn hub. + +### Apply OBS Functions to a Dataset + +This procedure describes how to access the Data Observatory functions by applying SQL queries in a selected dataset. + +1) Review the [prerequisites](#prerequisites) section before attempting to access any of the Data Observatory functions + +2) [View the Data Observatory Catalog](https://cartodb.github.io/bigmetadata/index.html) + +An overview for each of the analyzed functions of data appears, and indicates the unique function signature needed to access the catalog item. You can copy the OBS function from the Data Observatory catalog and modify the placeholder parameters shown in curly brackets (e.g. "{table_name}"). + +3) From _Your datasets_ dashboard in CARTO, click _NEW DATASET_ and _CREATE EMPTY DATASET_. + +This creates an untitled table. You can get population measurements from the Data Observatory to build your dataset and create a map. + +4) The SQL view is available when you are viewing your dataset in table view (Data View). Click the slider to switch between viewing your data by METADATA (table) to _SQL_ (opens the SQL view). + +5) Apply the OBS function to modify your table. + +For example, the following image displays a SQL query using the Boundary function, [`OBS_GetBoundariesByGeometry(geom geometry, geometry_id text)`](https://carto.com/docs/carto-engine/data/boundary-functions/#obsgetboundariesbygeometrygeom-geometry-geometryid-text) function. The SQL query inserts the boundary data as a single polygon geometry for each row of data. + +![Query OBS Function in empty dataset](../img/obs_getboundary.jpg) + + +**Tip:** Want to insert population data to create a dataset? Replace `{my table name}` with your dataset name, and apply the SQL query: + +```sql +INSERT INTO {my table name} (the_geom, name) +SELECT * +FROM OBS_GetBoundariesByGeometry( + st_makeenvelope(-73.97257804870605,40.671134192879286,-73.89052391052246,40.722868115036974, 4326), + 'us.census.tiger.census_tract' +) As m(the_geom, geoid); +``` + +Another example shows how to get the local male population into your dataset. Before applying the SQL query, click _ADD COLUMN_ to create and name a column to store the [`OBS_GetMeasure`]({{ site.dataobservatory_docs}}/reference/#obsgetmeasurepolygon-geometry-measureid-text) data. + +![Query local male population and apply to data](../img/local_male_pop.jpg) + +**Tip:** Want to update your dataset to include the local male population from the Data Observatory? Replace `{my table name}` with your dataset name, and apply the SQL query: + +```sql +UPDATE {my table name} +SET local_male_population = OBS_GetMeasure(the_geom, 'us.census.acs.B01001002') +``` +6) Click _CREATE MAP_ from your dataset, to visualize the Data Observatory results. You can add custom styling, and add widgets to better visualize your data + +![Visualize Data Observatory results](../img/visualize_obs_data.jpg) + + +### SQL API and OBS Functions + +This procedure describes how to access the Data Observatory functions directly through the SQL API. + +1. In order to use the SQL API, you must be [authenticated]({{ site.bdataobservatory_docs }}/guides/authentication/#authentication) using API keys + + **Note:** Review the [prerequisites](#prerequisites) section before attempting to access any of the Data Observatory functions and [view the Data Observatory Catalog](https://cartodb.github.io/bigmetadata/index.html) to identify the OBS function you are looking for. + +2. Query the Data Observatory directly with a specified `OBS` function to apply the results (Measures/Boundaries data) to your table, with the INSERT or UPDATE function + +```sql +https://{username}.carto.com/api/v2/sql?q=UPDATE {tablename} +SET local_male_population = OBS_GetMeasure(the_geom, 'us.census.acs.B01001002')&api_key={api_key} +``` +### Tips + +Other useful tips about OBS functions: + +- Some Data Observatory functions return geometries, enabling you to apply an UPDATE statement with an OBS function, to update `the_geom` column +- To include [water clipping levels]({{ site.dataobservatory_docs }}/guides/overview/#water-clipping-levels) as part of your results, append `_clipped` as part of the OBS function. For example: + +```sql +UPDATE {tablename} +SET local_male_population = OBS_GetMeasure(the_geom, 'us.census.acs.B01001002','area','us.census.tiger.census_tract_clipped') +``` + +### Best Practices + +The following usage notes are recommended when using the Data Observatory functions in SQL queries: + +- It is discouraged to use the SELECT operation with the Data Observatory functions in your map layers. The results may be visible, but CARTO may not support dynamic rendering of the Data Observatory in the future, so your visualizations may break + + The Data Observatory is **recommended** to be used with INSERT or UPDATE operations, for applying analyzed measures and boundaries data to your tables. While SELECT (retrieve) is standard for SQL API requests, be mindful of quota consumption and use INSERT (to insert a new record) or UPDATE (to update an existing record), for best practices. + + **Exception:** [Discovery Functions]({{ site.dataobservatory_docs }}/guides/overview/#discovery-functions) are the exception. You can use SELECT as these functions are not actually retrieving data, they are retrieving ids that you can use for other functions. + +- You can reduce storage space for unneeded geometries and optimize query optimizations by applying the PostGIS [`ST_Simplify`](http://www.postgis.org/docs/ST_Simplify.html) function. For example, you can simplify the `the_geom` for a large table of polygons and reduce the size of them for quicker rendering. For other tips, see the [most commonly used PostGIS functions](https://carto.com/docs/faqs/postgresql-and-postgis/#what-are-the-most-common-postgis-functions) that you can apply with CARTO + +- Only point or polygon geometries are supported for OBS functions. If you attempt to apply Measures or Boundary results to line geometries, an error appears + +- The Data Observatory is optimal for modifying existing tables with analytical results, not for building new tables of data + + **Exception:** Exceptions apply for the following boundary functions, since they were designed to return multiple responses of geographical identifiers, as opposed to a single geometry. Create an empty dataset and build a new dataset from a SQL query, using any one of these boundary functions. + + - [`OBS_GetBoundariesByGeometry(geom geometry, geometry_id text)`]({{ site.dataobservatory_docs }}/reference/#boundary-functions#obsgetboundariesbygeometrygeom-geometry-geometryid-text) + - [`OBS_GetPointsByGeometry(polygon geometry, geometry_id text)`]({{ site.dataobservatory_docs }}/reference/#boundary-functions#obsgetpointsbygeometrypolygon-geometry-geometryid-text) + - [`OBS_GetBoundariesByPointAndRadius(point geometry, radius numeric, boundary_id text`]({{ site.dataobservatory_docs }}/reference/#boundary-functions#obsgetboundariesbypointandradiuspoint-geometry-radius-numeric-boundaryid-text) + - [`OBS_GetPointsByPointAndRadius(point geometry, radius numeric, boundary_id text`]({{ site.dataobservatory_docs }}/reference/#boundary-functions#obsgetpointsbypointandradiuspoint-geometry-radius-numeric-boundaryid-text) + +- For optimal performance, each SQL request should not exceed 100 rows. As an alternative, you can use a [SQL Batch Query](/docs/carto-engine/sql-api/batch-queries) for queries with long-running CPU processing times + +### Examples + +View our [CARTO Blogs](https://carto.com/blog/categories/product/) for examples that highlight the benefits of using the Data Observatory. diff --git a/docs/guides/03-glossary.md b/docs/guides/03-glossary.md new file mode 100644 index 0000000..92cef15 --- /dev/null +++ b/docs/guides/03-glossary.md @@ -0,0 +1,126 @@ +## Glossary + +A list of boundary ids and measure_names for Data Observatory functions. For US based boundaries, the Shoreline Clipped version provides a high-quality shoreline clipping for mapping uses. + +### Boundary IDs + +Boundary Name | Boundary ID | Shoreline Clipped Boundary ID +--------------------- | --------------------- | --- +US States | us.census.tiger.state | us.census.tiger.state_clipped +US County | us.census.tiger.county | us.census.tiger.county_clipped +US Census Zip Code Tabulation Areas | us.census.tiger.zcta5 | us.census.tiger.zcta5_clipped +US Census Tracts | us.census.tiger.census_tract | us.census.tiger.census_tract_clipped +US Elementary School District | us.census.tiger.school_district_elementary | us.census.tiger.school_district_elementary_clipped +US Secondary School District | us.census.tiger.school_district_secondary | us.census.tiger.school_district_secondary_clipped +US Unified School District | us.census.tiger.school_district_unified | us.census.tiger.school_district_unified_clipped +US Congressional Districts | us.census.tiger.congressional_district | us.census.tiger.congressional_district_clipped +US Census Blocks | us.census.tiger.block | us.census.tiger.block_clipped +US Census Block Groups | us.census.tiger.block_group | us.census.tiger.block_group_clipped +US Census PUMAs | us.census.tiger.puma | us.census.tiger.puma_clipped +US Incorporated Places | us.census.tiger.place | us.census.tiger.place_clipped +ES Sección Censal | es.ine.geom | none +Regions (First-level Administrative) | whosonfirst.wof_region_geom | none +Continents | whosonfirst.wof_continent_geom | none +Countries | whosonfirst.wof_country_geom | none +Marine Areas | whosonfirst.wof_marinearea_geom | none +Disputed Areas | whosonfirst.wof_disputed_geom | none + + + +### OBS_GetUSCensusMeasure Names Table + +This list contains human readable names accepted in the ```OBS_GetUSCensusMeasure``` function. For the more comprehensive list of columns available to the ```OBS_GetMeasure``` function, see the [Data Observatory Catalog](https://cartodb.github.io/bigmetadata/index.html). + +Measure ID | Measure Name | Measure Description +--------------------- | --------------------- | --- +us.census.acs.B01002001 | Median Age | The median age of all people in a given geographic area. +us.census.acs.B15003021 | Population Completed Associate’s Degree | The number of people in a geographic area over the age of 25 who obtained a associate’s degree, and did not complete a more advanced degree. +us.census.acs.B15003022 | Population Completed Bachelor’s Degree | The number of people in a geographic area over the age of 25 who obtained a bachelor’s degree, and did not complete a more advanced degree. +us.census.acs.B15003023 | Population Completed Master’s Degree | The number of people in a geographic area over the age of 25 who obtained a master’s degree, but did not complete a more advanced degree. +us.census.acs.B14001007 | Students Enrolled in Grades 9 to 12 | The total number of people in each geography currently enrolled in grades 9 through 12 inclusive. This corresponds roughly to high school. +us.census.acs.B05001006 | Not a U.S. Citizen Population | The number of people within each geography who indicated that they are not U.S. citizens. +us.census.acs.B19001012 | Households with income of $60,000 To $74,999 | The number of households in a geographic area whose annual income was between $60,000 and $74,999. +us.census.acs.B01003001 | Total Population | The total number of all people living in a given geographic area. This is a very useful catch-all denominator when calculating rates. +us.census.acs.B01001002 | Male Population | The number of people within each geography who are male. +us.census.acs.B01001026 | Female Population | The number of people within each geography who are female. +us.census.acs.B03002003 | White Population | The number of people identifying as white, non-Hispanic in each geography. +us.census.acs.B03002004 | Black or African American Population | The number of people identifying as black or African American, non-Hispanic in each geography. +us.census.acs.B03002006 | Asian Population | The number of people identifying as Asian, non-Hispanic in each geography. +us.census.acs.B03002012 | Hispanic Population | The number of people identifying as Hispanic or Latino in each geography. +us.census.acs.B03002005 | American Indian and Alaska Native Population | The number of people identifying as American Indian or Alaska native in each geography. +us.census.acs.B03002008 | Other Race population | The number of people identifying as another race in each geography. +us.census.acs.B03002009 | Two or more races population | The number of people identifying as two or more races in each geography. +us.census.acs.B03002002 | Population not Hispanic | The number of people not identifying as Hispanic or Latino in each geography. +us.census.acs.B23025001 | Population age 16 and over | The number of people in each geography who are age 16 or over. +us.census.acs.B08006001 | Workers over the Age of 16 | The number of people in each geography who work. Workers include those employed at private for-profit companies, the self-employed, government workers and non-profit employees. +us.census.acs.B08006002 | Commuters by Car, Truck, or Van | The number of workers age 16 years and over within a geographic area who primarily traveled to work by car, truck or van. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work. +us.census.acs.B08006003 | Commuters who drove alone | The number of workers age 16 years and over within a geographic area who primarily traveled by car driving alone. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work. +us.census.acs.B11001001 | Households | A count of the number of households in each geography. A household consists of one or more people who live in the same dwelling and also share at meals or living accommodation, and may consist of a single family or some other grouping of people. +us.census.acs.B08006004 | Commuters by Carpool | The number of workers age 16 years and over within a geographic area who primarily traveled to work by carpool. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work. +us.census.acs.B08301010 | Commuters by Public Transportation | The number of workers age 16 years and over within a geographic area who primarily traveled to work by public transportation. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work. +us.census.acs.B08006009 | Commuters by Bus | The number of workers age 16 years and over within a geographic area who primarily traveled to work by bus. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work. This is a subset of workers who commuted by public transport. +us.census.acs.B08006011 | Commuters by Subway or Elevated | The number of workers age 16 years and over within a geographic area who primarily traveled to work by subway or elevated train. This is the principal mode of travel or type of conveyance, by distance rather than time, that the worker usually used to get from home to work. This is a subset of workers who commuted by public transport. +us.census.acs.B08006015 | Walked to Work | The number of workers age 16 years and over within a geographic area who primarily walked to work. This would mean that of any way of getting to work, they travelled the most distance walking. +us.census.acs.B08006017 | Worked at Home | The count within a geographical area of workers over the age of 16 who worked at home. +us.census.acs.B09001001 | Children under 18 Years of Age | The number of people within each geography who are under 18 years of age. +us.census.acs.B14001001 | Population 3 Years and Over | The total number of people in each geography age 3 years and over. This denominator is mostly used to calculate rates of school enrollment. +us.census.acs.B14001002 | Students Enrolled in School | The total number of people in each geography currently enrolled at any level of school, from nursery or pre-school to advanced post-graduate education. Only includes those over the age of 3. +us.census.acs.B14001005 | Students Enrolled in Grades 1 to 4 | The total number of people in each geography currently enrolled in grades 1 through 4 inclusive. This corresponds roughly to elementary school. +us.census.acs.B14001006 | Students Enrolled in Grades 5 to 8 | The total number of people in each geography currently enrolled in grades 5 through 8 inclusive. This corresponds roughly to middle school. +us.census.acs.B14001008 | Students Enrolled as Undergraduate in College | The number of people in a geographic area who are enrolled in college at the undergraduate level. Enrollment refers to being registered or listed as a student in an educational program leading to a college degree. This may be a public school or college, a private school or college. +us.census.acs.B15003001 | Population 25 Years and Over | The number of people in a geographic area who are over the age of 25. This is used mostly as a denominator of educational attainment. +us.census.acs.B15003017 | Population Completed High School | The number of people in a geographic area over the age of 25 who completed high school, and did not complete a more advanced degree. +us.census.acs.B15003019 | Population completed less than one year of college, no degree | The number of people in a geographic area over the age of 25 who attended college for less than one year and no further. +us.census.acs.B15003020 | Population completed more than one year of college, no degree | The number of people in a geographic area over the age of 25 who attended college for more than one year but did not obtain a degree. +us.census.acs.B16001001 | Population 5 Years and Over | The number of people in a geographic area who are over the age of 5. This is primarily used as a denominator of measures of language spoken at home. +us.census.acs.B16001002 | Speaks only English at Home | The number of people in a geographic area over age 5 who speak only English at home. +us.census.acs.B16001003 | Speaks Spanish at Home | The number of people in a geographic area over age 5 who speak Spanish at home, possibly in addition to other languages. +us.census.acs.B17001001 | Population for Whom Poverty Status Determined | The number of people in each geography who could be identified as either living in poverty or not. This should be used as the denominator when calculating poverty rates, as it excludes people for whom it was not possible to determine poverty. +us.census.acs.B17001002 | Income In The Past 12 Months Below Poverty Level | The number of people in a geographic area who are part of a family (which could be just them as an individual) determined to be in poverty following the Office of Management and Budget’s Directive 14. (https://www.census.gov/hhes/povmeas/methodology/ombdir14.html) +us.census.acs.B08134010 | Number of workers with a commute of over 60 minutes | The number of workers over the age of 16 who do not work from home and commute in over 60 minutes in a geographic area. +us.census.acs.B12005002 | Never Married | The number of people in a geographic area who have never been married. +us.census.acs.B12005005 | Currently married | The number of people in a geographic area who are currently married. +us.census.acs.B12005008 | Married but separated | The number of people in a geographic area who are married but separated. +us.census.acs.B12005012 | Widowed | The number of people in a geographic area who are widowed. +us.census.acs.B12005015 | Divorced | The number of people in a geographic area who are divorced. +us.census.acs.B19013001 | Median Household Income in the past 12 Months | Within a geographic area, the median income received by every household on a regular basis before payments for personal income taxes, social security, union dues, medicare deductions, etc. It includes income received from wages, salary, commissions, bonuses, and tips; self-employment income from own nonfarm or farm businesses, including proprietorships and partnerships; interest, dividends, net rental income, royalty income, or income from estates and trusts; Social Security or Railroad Retirement income; Supplemental Security Income (SSI); any cash public assistance or welfare payments from the state or local welfare office; retirement, survivor, or disability benefits; and any other sources of income received regularly such as Veterans’ (VA) payments, unemployment and/or worker’s compensation, child support, and alimony. +us.census.acs.B25001001 | Housing Units | A count of housing units in each geography. A housing unit is a house, an apartment, a mobile home or trailer, a group of rooms, or a single room occupied as separate living quarters, or if vacant, intended for occupancy as separate living quarters. +us.census.acs.B25002003 | Vacant Housing Units | The count of vacant housing units in a geographic area. A housing unit is vacant if no one is living in it at the time of enumeration, unless its occupants are only temporarily absent. Units temporarily occupied at the time of enumeration entirely by people who have a usual residence elsewhere are also classified as vacant. +us.census.acs.B25004002 | Vacant Housing Units for Rent | The count of vacant housing units in a geographic area that are for rent. A housing unit is vacant if no one is living in it at the time of enumeration, unless its occupants are only temporarily absent. Units temporarily occupied at the time of enumeration entirely by people who have a usual residence elsewhere are also classified as vacant. +us.census.acs.B19001013 | Households with income of $75,000 To $99,999 | The number of households in a geographic area whose annual income was between $75,000 and $99,999. +us.census.acs.B19001014 | Households with income of $100,000 To $124,999 | The number of households in a geographic area whose annual income was between $100,000 and $124,999. +us.census.acs.B25004004 | Vacant Housing Units for Sale | The count of vacant housing units in a geographic area that are for sale. A housing unit is vacant if no one is living in it at the time of enumeration, unless its occupants are only temporarily absent. Units temporarily occupied at the time of enumeration entirely by people who have a usual residence elsewhere are also classified as vacant. +us.census.acs.B25058001 | Median Rent | The median contract rent within a geographic area. The contract rent is the monthly rent agreed to or contracted for, regardless of any furnishings, utilities, fees, meals, or services that may be included. For vacant units, it is the monthly rent asked for the rental unit at the time of interview. +us.census.acs.B25071001 | Percent of Household Income Spent on Rent | Within a geographic area, the median percentage of household income which was spent on gross rent. Gross rent is the amount of the contract rent plus the estimated average monthly cost of utilities (electricity, gas, water, sewer etc.) and fuels (oil, coal, wood, etc.) if these are paid by the renter. Household income is the sum of the income of all people 15 years and older living in the household. +us.census.acs.B25075025 | Owner-occupied Housing Units valued at $1,000,000 or more. | The count of owner occupied housing units in a geographic area that are valued at $1,000,000 or more. Value is the respondent’s estimate of how much the property (house and lot, mobile home and lot, or condominium unit) would sell for if it were for sale. +us.census.acs.B25081002 | Owner-occupied Housing Units with a Mortgage | The count of housing units within a geographic area that are mortagaged. Mortgage refers to all forms of debt where the property is pledged as security for repayment of the debt, including deeds of trust, trust deed, contracts to purchase, land contracts, junior mortgages, and home equity loans. +us.census.acs.B23025002 | Population in Labor Force | The number of people in each geography who are either in the civilian labor force or are members of the U.S. Armed Forces (people on active duty with the United States Army, Air Force, Navy, Marine Corps, or Coast Guard). +us.census.acs.B23025003 | Population in Civilian Labor Force | The number of civilians 16 years and over in each geography who can be classified as either employed or unemployed below. +us.census.acs.B08135001 | Aggregate travel time to work | The total number of minutes every worker over the age of 16 who did not work from home spent spent commuting to work in one day in a geographic area. +us.census.acs.B19001002 | Households with income less than $10,000 | The number of households in a geographic area whose annual income was less than $10,000. +us.census.acs.B19001003 | Households with income of $10,000 to $14,999 | The number of households in a geographic area whose annual income was between $10,000 and $14,999. +us.census.acs.B19001004 | Households with income of $15,000 to $19,999 | The number of households in a geographic area whose annual income was between $15,000 and $19,999. +us.census.acs.B23025004 | Employed Population | The number of civilians 16 years old and over in each geography who either (1) were at work, that is, those who did any work at all during the reference week as paid employees, worked in their own business or profession, worked on their own farm, or worked 15 hours or more as unpaid workers on a family farm or in a family business; or (2) were with a job but not at work, that is, those who did not work during the reference week but had jobs or businesses from which they were temporarily absent due to illness, bad weather, industrial dispute, vacation, or other personal reasons. Excluded from the employed are people whose only activity consisted of work around the house or unpaid volunteer work for religious, charitable, and similar organizations; also excluded are all institutionalized people and people on active duty in the United States Armed Forces. +us.census.acs.B23025005 | Unemployed Population | The number of civilians in each geography who are 16 years old and over and are classified as unemployed. +us.census.acs.B23025006 | Population in Armed Forces | The number of people in each geography who are members of the U.S. Armed Forces (people on active duty with the United States Army, Air Force, Navy, Marine Corps, or Coast Guard). +us.census.acs.B23025007 | Population Not in Labor Force | The number of people in each geography who are 16 years old and over who are not classified as members of the labor force. This category consists mainly of students, homemakers, retired workers, seasonal workers interviewed in an off season who were not looking for work, institutionalized people, and people doing only incidental unpaid family work. +us.census.acs.B12005001 | Population 15 Years and Over | The number of people in a geographic area who are over the age of 15. This is used mostly as a denominator of marital status. +us.census.acs.B08134001 | Workers age 16 and over who do not work from home | The number of workers over the age of 16 who do not work from home in a geographic area. +us.census.acs.B08134002 | Number of workers with less than 10 minute commute | The number of workers over the age of 16 who do not work from home and commute in less than 10 minutes in a geographic area. +us.census.acs.B08303004 | Number of workers with a commute between 10 and 14 minutes | The number of workers over the age of 16 who do not work from home and commute in between 10 and 14 minutes in a geographic area. +us.census.acs.B08303005 | Number of workers with a commute between 15 and 19 minutes | The number of workers over the age of 16 who do not work from home and commute in between 15 and 19 minutes in a geographic area. +us.census.acs.B08303006 | Number of workers with a commute between 20 and 24 minutes | The number of workers over the age of 16 who do not work from home and commute in between 20 and 24 minutes in a geographic area. +us.census.acs.B08303007 | Number of workers with a commute between 25 and 29 minutes | The number of workers over the age of 16 who do not work from home and commute in between 25 and 29 minutes in a geographic area. +us.census.acs.B08303008 | Number of workers with a commute between 30 and 34 minutes | The number of workers over the age of 16 who do not work from home and commute in between 30 and 34 minutes in a geographic area. +us.census.acs.B08134008 | Number of workers with a commute between 35 and 44 minutes | The number of workers over the age of 16 who do not work from home and commute in between 35 and 44 minutes in a geographic area. +us.census.acs.B08303011 | Number of workers with a commute between 45 and 59 minutes | The number of workers over the age of 16 who do not work from home and commute in between 45 and 59 minutes in a geographic area. +us.census.acs.B19001005 | Households with income of $20,000 To $24,999 | The number of households in a geographic area whose annual income was between $20,000 and $24,999. +us.census.acs.B19001006 | Households with income of $25,000 To $29,999 | The number of households in a geographic area whose annual income was between $20,000 and $24,999. +us.census.acs.B19001007 | Households with income of $30,000 To $34,999 | The number of households in a geographic area whose annual income was between $30,000 and $34,999. +us.census.acs.B19001008 | Households with income of $35,000 To $39,999 | The number of households in a geographic area whose annual income was between $35,000 and $39,999. +us.census.acs.B19001009 | Households with income of $40,000 To $44,999 | The number of households in a geographic area whose annual income was between $40,000 and $44,999. +us.census.acs.B19001010 | Households with income of $45,000 To $49,999 | The number of households in a geographic area whose annual income was between $45,000 and $49,999. +us.census.acs.B19001011 | Households with income of $50,000 To $59,999 | The number of households in a geographic area whose annual income was between $50,000 and $59,999. +us.census.acs.B19001015 | Households with income of $125,000 To $149,999 | The number of households in a geographic area whose annual income was between $125,000 and $149,999. +us.census.acs.B19001016 | Households with income of $150,000 To $199,999 | The number of households in a geographic area whose annual income was between $150,000 and $1999,999. +us.census.acs.B19001017 | Households with income of $200,000 Or More | The number of households in a geographic area whose annual income was more than $200,000. \ No newline at end of file diff --git a/docs/img/avatar_do.gif b/docs/img/avatar_do.gif new file mode 100644 index 0000000..586a34d Binary files /dev/null and b/docs/img/avatar_do.gif differ diff --git a/docs/img/local_male_pop.jpg b/docs/img/local_male_pop.jpg new file mode 100644 index 0000000..8bdc0ed Binary files /dev/null and b/docs/img/local_male_pop.jpg differ diff --git a/docs/img/obs_getboundary.jpg b/docs/img/obs_getboundary.jpg new file mode 100644 index 0000000..dc0182d Binary files /dev/null and b/docs/img/obs_getboundary.jpg differ diff --git a/docs/img/visualize_obs_data.jpg b/docs/img/visualize_obs_data.jpg new file mode 100644 index 0000000..fe4c1af Binary files /dev/null and b/docs/img/visualize_obs_data.jpg differ diff --git a/docs/reference/01-introduction.md b/docs/reference/01-introduction.md new file mode 100644 index 0000000..f807234 --- /dev/null +++ b/docs/reference/01-introduction.md @@ -0,0 +1,5 @@ +## Introduction + +The Data Observatory, available for Enterprise accounts, provides access to a catalog of analyzed data methods, and enables you to apply the results to your own datasets. + +The contents described in this document are subject to CARTO's [Terms of Service](https://carto.com/legal/) diff --git a/docs/reference/02-authentication.md b/docs/reference/02-authentication.md new file mode 100644 index 0000000..ceb5716 --- /dev/null +++ b/docs/reference/02-authentication.md @@ -0,0 +1,9 @@ +## Authentication + +Data Observatory, like any other [CARTO platform's component]({{site.fundamental_docs}}/components/), requires using an API Key. From your CARTO dashboard, click _[Your API keys](https://carto.com/login)_ from the avatar drop-down menu to view your uniquely generated API Key for managing data with CARTO Engine. + +![Your API Keys](../img/avatar_do.gif) + +Learn more about the [basics of authorization]({{site.fundamental_docs}}/authorization/), or dig into the details of [Auth API]({{site.authapi_docs}}/), if you want to know more about this part of CARTO platform. + +The examples in this documentation may include a placeholder for the API Key. Ensure that you modify any placeholder parameters with your own credentials. diff --git a/docs/reference/03-versioning.md b/docs/reference/03-versioning.md new file mode 100644 index 0000000..f4fb1dd --- /dev/null +++ b/docs/reference/03-versioning.md @@ -0,0 +1,3 @@ +## Versioning + +Data Observartory uses [Semantic Versioning](http://semver.org/). View our Github repository to find tags for each [release](https://github.com/CartoDB/observatory-extension/releases). diff --git a/docs/reference/04-measures-functions.md b/docs/reference/04-measures-functions.md new file mode 100644 index 0000000..1694003 --- /dev/null +++ b/docs/reference/04-measures-functions.md @@ -0,0 +1,539 @@ +## Measures Functions + +[Data Observatory Measures]({{site.dataobservatory_docs}}/guides/overview/#methods-overview) are the numerical location data you can access. The measure functions allow you to access individual measures to augment your own data or integrate in your analysis workflows. Measures are used by sending an identifier or a geometry (point or polygon) and receiving back a measure (an absolute value) for that location. + +There are hundreds of measures and the list is growing with each release. You can currently discover and learn about measures contained in the Data Observatory by downloading our [Data Catalog](https://cartodb.github.io/bigmetadata/index.html). + +You can [access]({{site.dataobservatory_docs}}/guides/overview/accessing-the-data-observatory/) measures through CARTO Builder. The same methods will work if you are using the CARTO Engine to develop your application. We [encourage you]({{site.dataobservatory_docs}}/guides/overview/accessing-the-data-observatory/) to use table modifying methods (UPDATE and INSERT) over dynamic methods (SELECT). + +### OBS_GetUSCensusMeasure(point geometry, measure_name text) + +The ```OBS_GetUSCensusMeasure(point, measure_name)``` function returns a measure based on a subset of the US Census variables at a point location. The ```OBS_GetUSCensusMeasure``` function is limited to only a subset of all measures that are available in the Data Observatory. To access the full list, use measure IDs with the ```OBS_GetMeasure``` function below. + +#### Arguments + +Name |Description +--- | --- +point | a WGS84 point geometry (the_geom) +measure_name | a human-readable name of a US Census variable. The list of measure_names is [available in the Glossary](https://carto.com/docs/carto-engine/data/glossary/#obsgetuscensusmeasure-names-table). +normalize | for measures that are **sums** (e.g. population) the default normalization is 'area' and response comes back as a rate per square kilometer. Other options are 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html) (optional) +boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract') +time_span | time span of interest (e.g., 2010 - 2014) + +#### Returns + +A NUMERIC value + +Key | Description +--- | --- +value | the raw or normalized measure + +#### Example + +Add a measure to an empty numeric column based on point locations in your table. + +```sql +UPDATE tablename +SET total_population = OBS_GetUSCensusMeasure(the_geom, 'Total Population') +``` + +### OBS_GetUSCensusMeasure(polygon geometry, measure_name text) + +The ```OBS_GetUSCensusMeasure(polygon, measure_name)``` function returns a measure based on a subset of the US Census variables within a given polygon. The ```OBS_GetUSCensusMeasure``` function is limited to only a subset of all measures that are available in the Data Observatory. To access the full list, use the ```OBS_GetMeasure``` function below. + +#### Arguments + +Name |Description +--- | --- +polygon | a WGS84 polygon geometry (the_geom) +measure_name | a human readable string name of a US Census variable. The list of measure_names is [available in the Glossary](https://carto.com/docs/carto-engine/data/glossary/#obsgetuscensusmeasure-names-table). +normalize | for measures that are **sums** (e.g. population) the default normalization is 'none' and response comes back as a raw value. Other options are 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html) (optional) +boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract') +time_span | time span of interest (e.g., 2010 - 2014) + +#### Returns + +A NUMERIC value + +Key | Description +--- | --- +value | the raw or normalized measure + +#### Example + +Add a measure to an empty numeric column based on polygons in your table + +```sql +UPDATE tablename +SET local_male_population = OBS_GetUSCensusMeasure(the_geom, 'Male Population') +``` + +### OBS_GetMeasure(point geometry, measure_id text) + +The ```OBS_GetMeasure(point, measure_id)``` function returns any Data Observatory measure at a point location. You can browse all available Measures in the [Catalog](https://cartodb.github.io/bigmetadata/index.html). + +#### Arguments + +Name |Description +--- | --- +point | a WGS84 point geometry (the_geom) +measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf)). It is important to note that these are different than 'measure_name' used in the Census based functions above. +normalize | for measures that are **sums** (e.g. population) the default normalization is 'area' and response comes back as a rate per square kilometer. The other option is 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html). (optional) +boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract') +time_span | time span of interest (e.g., 2010 - 2014) + +#### Returns + +A NUMERIC value + +Key | Description +--- | --- +value | the raw or normalized measure + +#### Example + +Add a measure to an empty numeric column based on point locations in your table + +```sql +UPDATE tablename +SET median_home_value_sqft = OBS_GetMeasure(the_geom, 'us.zillow.AllHomes_MedianValuePerSqft') +``` + +### OBS_GetMeasure(polygon geometry, measure_id text) + +The ```OBS_GetMeasure(polygon, measure_id)``` function returns any Data Observatory measure calculated within a polygon. + +#### Arguments + +Name |Description +--- | --- +polygon_geometry | a WGS84 polygon geometry (the_geom) +measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf)) +normalize | for measures that are **sums** (e.g. population) the default normalization is 'none' and response comes back as a raw value. Other options are 'denominator', which will use the denominator specified in the [Data Catalog](https://cartodb.github.io/bigmetadata/index.html) (optional) +boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract') +time_span | time span of interest (e.g., 2010 - 2014) + +#### Returns + +A NUMERIC value + +Key | Description +--- | --- +value | the raw or normalized measure + +#### Example + +Add a measure to an empty column based on polygons in your table + +```sql +UPDATE tablename +SET household_count = OBS_GetMeasure(the_geom, 'us.census.acs.B11001001') +``` + +#### Errors + +* If an unrecognized normalization type is input, raises error: `'Only valid inputs for "normalize" are "area" (default) and "denominator".` + +### OBS_GetMeasureById(geom_ref text, measure_id text, boundary_id text) + +The ```OBS_GetMeasureById(geom_ref, measure_id, boundary_id)``` function returns any Data Observatory measure that corresponds to the boundary in ```boundary_id``` that has a geometry reference of ```geom_ref```. + +#### Arguments + +Name |Description +--- | --- +geom_ref | a geometry reference (e.g., a US Census geoid) +measure_id | a measure identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf)) +boundary_id | source of geometries to pull measure from (e.g., 'us.census.tiger.census_tract') +time_span (optional) | time span of interest (e.g., 2010 - 2014). If `NULL` is passed, the measure from the most recent data will be used. + +#### Returns + +A NUMERIC value + +Key | Description +--- | --- +value | the raw measure associated with `geom_ref` + +#### Example + +Add a measure to an empty column based on county geoids in your table + +```sql +UPDATE tablename +SET household_count = OBS_GetMeasureById(geoid_column, 'us.census.acs.B11001001', 'us.census.tiger.county') +``` + +#### Errors + +* Returns `NULL` if there is a mismatch between the geometry reference and the boundary id such as using the geoid of a county with the boundary of block groups + +## OBS_GetCategory(point geometry, category_id text) + +The ```OBS_GetCategory(point, category_id)``` function returns any Data Observatory Category value at a point location. The Categories available are currently limited to Segmentation categories. See the Segmentation section of the [Catalog](https://cartodb.github.io/bigmetadata/index.html) for more detail. + +#### Arguments + +Name |Description +--- | --- +point | a WGS84 point geometry (the_geom) +category_id | a category identifier from the Data Observatory ([see available measures](https://cartodb.github.io/bigmetadata/observatory.pdf)). + +#### Returns + +A TEXT value + +Key | Description +--- | --- +value | a text based category found at the supplied point + +#### Example + +Add the Category to an empty column text column based on point locations in your table + +```sql +UPDATE tablename +SET segmentation = OBS_GetCategory(the_geom, 'us.census.spielman_singleton_segments.X55') +``` + +### OBS_GetMeta(extent geometry, metadata json, max_timespan_rank, max_score_rank, target_geoms) + +The ```OBS_GetMeta(extent, metadata)``` function returns a completed Data +Observatory metadata JSON Object for use in ```OBS_GetData(geomvals, +metadata)``` or ```OBS_GetData(ids, metadata)```. It is not possible to pass +metadata to those functions if it is not processed by ```OBS_GetMeta(extent, +metadata)``` first. + +`OBS_GetMeta` makes it possible to automatically select appropriate timespans +and boundaries for the measurement you want. + +#### Arguments + +Name | Description +---- | ----------- +extent | A geometry of the extent of the input geometries +metadata | A JSON array composed of metadata input objects. Each indicates one desired measure for an output column, and optionally additional parameters about that column +num_timespan_options | How many historical time periods to include. Defaults to 1 +num_score_options | How many alternative boundary levels to include. Defaults to 1 +target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest. + +The schema of the metadata input objects are as follows: + +Metadata Input Key | Description +--- | ----------- +numer_id | The identifier for the desired measurement. If left blank, but a `geom_id` is specified, the column will return a geometry instead of a measurement. +geom_id | Identifier for a desired geographic boundary level to use when calculating measures. Will be automatically assigned if undefined. If defined but `numer_id` is blank, then the column will return a geometry instead of a measurement. +normalization | The desired normalization. One of 'area', 'prenormalized', or 'denominated'. 'Area' will normalize the measure per square kilometer, 'prenormalized' will return the original value, and 'denominated' will normalize by a denominator. Ignored if this metadata object specifies a geometry. +denom_id | Identifier for a desired normalization column in case `normalization` is 'denominated'. Will be automatically assigned if necessary. Ignored if this metadata object specifies a geometry. +numer_timespan | The desired timespan for the measurement. Defaults to most recent timespan available if left unspecified. +geom_timespan | The desired timespan for the geometry. Defaults to timespan matching numer_timespan if left unspecified. +target_area | Instead of aiming to have `target_geoms` in the area of the geometry passed as `extent`, fill this area. Unit is square degrees WGS84. Set this to `0` if you want to use the smallest source geometry for this element of metadata, for example if you're passing in points. +target_geoms | Override global `target_geoms` for this element of metadata +max_timespan_rank | Only include timespans of this recency (for example, `1` is only the most recent timespan). No limit by default +max_score_rank | Only include boundaries of this relevance (for example, `1` is the most relevant boundary). Is `1` by default + +#### Returns + +A JSON array composed of metadata output objects. + +Key | Description +--- | ----------- +meta | A JSON array with completed metadata for the requested data, including all keys below + +The schema of the metadata output objects are as follows. You should pass this +array as-is to ```OBS_GetData```. If you modify any values the function will +fail. + +Metadata Output Key | Description +--- | ----------- +suggested_name | A suggested column name for adding this to an existing table +numer_id | Identifier for desired measurement +numer_timespan | Timespan that will be used of the desired measurement +numer_name | Human-readable name of desired measure +numer_description | Long human-readable description of the desired measure +numer_t_description | Further information about the source table +numer_type | PostgreSQL/PostGIS type of desired measure +numer_colname | Internal identifier for column name +numer_tablename | Internal identifier for table +numer_geomref_colname | Internal identifier for geomref column name +denom_id | Identifier for desired normalization +denom_timespan | Timespan that will be used of the desired normalization +denom_name | Human-readable name of desired measure's normalization +denom_description | Long human-readable description of the desired measure's normalization +denom_t_description | Further information about the source table +denom_type | PostgreSQL/PostGIS type of desired measure's normalization +denom_colname | Internal identifier for normalization column name +denom_tablename | Internal identifier for normalization table +denom_geomref_colname | Internal identifier for normalization geomref column name +geom_id | Identifier for desired boundary geometry +geom_timespan | Timespan that will be used of the desired boundary geometry +geom_name | Human-readable name of desired boundary geometry +geom_description | Long human-readable description of the desired boundary geometry +geom_t_description | Further information about the source table +geom_type | PostgreSQL/PostGIS type of desired boundary geometry +geom_colname | Internal identifier for boundary geometry column name +geom_tablename | Internal identifier for boundary geometry table +geom_geomref_colname | Internal identifier for boundary geometry ref column name +timespan_rank | Ranking of this measurement by time, most recent is 1, second most recent 2, etc. +score | The score of this measurement's boundary compared to the `extent` and `target_geoms` passed in. Between 0 and 100. +score_rank | The ranking of this measurement's boundary, highest ranked is 1, second is 2, etc. +numer_aggregate | The aggregate type of the numerator, either `sum`, `average`, `median`, or blank +denom_aggregate | The aggregate type of the denominator, either `sum`, `average`, `median`, or blank +normalization | The sort of normalization that will be used for this measure, either `area`, `predenominated`, or `denominated` + +#### Examples + +Obtain metadata that can augment with one additional column of US population +data, using a boundary relevant for the geometry provided and latest timespan. +Limit to only the most recent column most relevant to the extent & density of +input geometries in `tablename`. + +```sql +SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001"}]', + 1, 1, + COUNT(*) +) FROM tablename +``` + +Obtain metadata that can augment with one additional column of US population +data, using census tract boundaries. + +```sql +SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.census_tract"}]', + 1, 1, + COUNT(*) +) FROM tablename +``` + +Obtain metadata that can augment with two additional columns, one for total +population and one for male population. + +```sql +SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]', + 1, 1, + COUNT(*) +) FROM tablename +``` + +### OBS_MetadataValidation(extent geometry, geometry_type text, metadata json, target_geoms) + +The ```OBS_MetadataValidation``` function performs a validation check over the known issues using the extent, type of geometry, and metadata that is being used in the ```OBS_GetMeta``` function. + +#### Arguments + +Name | Description +---- | ----------- +extent | A geometry of the extent of the input geometries +geometry_type | The geometry type of the source data +metadata | A JSON array composed of metadata input objects. Each indicates one desired measure for an output column, and optional additional parameters about that column +target_geoms | Target number of geometries. Boundaries with close to this many objects within `extent` will be ranked highest + +The schema of the metadata input objects are as follows: + +Metadata Input Key | Description +--- | ----------- +numer_id | The identifier for the desired measurement. If left blank, a `geom_id` is specified and the column returns a geometry, instead of a measurement +geom_id | Identifier for a desired geographic boundary level used to calculate measures. If undefined, this is automatically assigned. If defined, `numer_id` is blank and the column returns a geometry, instead of a measurement +normalization | The desired normalization. One of 'area', 'prenormalized', or 'denominated'. 'Area' will normalize the measure per square kilometer, 'prenormalized' will return the original value, and 'denominated' will normalize by a denominator. If the metadata object specifies a geometry, this is ignored +denom_id | When `normalization` is 'denominated', this is the identifier for a desired normalization column. This is automatically assigned. If the metadata object specifies a geometry, this is ignored +numer_timespan | The desired timespan for the measurement. If left unspecified, it defaults to the most recent timespan available +geom_timespan | The desired timespan for the geometry. If left unspecified, it defaults to the timespan matching `numer_timespan` +target_area | Instead of aiming to have `target_geoms` in the area of the geometry passed as `extent`, fill this area. Unit is square degrees WGS84. Set this to `0` if you want to use the smallest source geometry for this element of metadata. For example, if you are passing in points +target_geoms | Override global `target_geoms` for this element of metadata +max_timespan_rank | Only include timespans of this recency (For example, `1` is only the most recent timespan). There is no limit by default +max_score_rank | Only include boundaries of this relevance (for example, `1` is the most relevant boundary). The default is `1` + +#### Returns + +Key | Description +--- | ----------- +valid | A boolean field that represents if the validation was successful or not +errors | A text array with all possible errors + +#### Examples + +Validate metadata with two additional columns of US census data; using a boundary relevant for the geometry provided and the latest timespan. Limited to the most recent column, and the most relevant, based on the extent and density of input geometries in `tablename`. + +```sql +SELECT OBS_MetadataValidation( + ST_SetSRID(ST_Extent(the_geom), 4326), + ST_GeometryType(the_geom), + '[{"numer_id": "us.census.acs.B01003001"}, {"numer_id": "us.census.acs.B01001002"}]', + COUNT(*)::INTEGER +) FROM tablename +GROUP BY ST_GeometryType(the_geom) +``` + +### OBS_GetData(geomvals array[geomval], metadata json) + +The ```OBS_GetData(geomvals, metadata)``` function returns a measure and/or +geometry corresponding to the `metadata` JSON array for each every Geometry of +the `geomval` element in the `geomvals` array. The metadata argument must be +obtained from ```OBS_GetMeta(extent, metadata)```. + +#### Arguments + +Name | Description +---- | ----------- +geomvals | An array of `geomval` elements, which are obtained by casting together a `Geometry` and a `Numeric`. This should be obtained by using `ARRAY_AGG((the_geom, cartodb_id)::geomval)` from the CARTO table one wishes to obtain data for. +metadata | A JSON array composed of metadata output objects from ```OBS_GetMeta(extent, metadata)```. The schema of the elements of the `metadata` JSON array corresponds to that of the output of ```OBS_GetMeta(extent, metadata)```, and this argument must be obtained from that function in order for the call to be valid. + +#### Returns + +A TABLE with the following schema, where each element of the input `geomvals` +array corresponds to one row: + +Column | Type | Description +------ | ---- | ----------- +id | Numeric | ID corresponding to the `val` component of an element of the input `geomvals` array +data | JSON | A JSON array with elements corresponding to the input `metadata` JSON array + +Each `data` object has the following keys: + +Key | Description +--- | ----------- +value | The value of the measurement or geometry for the geometry corresponding to this row and measurement corresponding to this position in the `metadata` JSON array + +To determine the appropriate cast for `value`, one can use the `numer_type` +or `geom_type` key corresponding to that value in the input `metadata` JSON +array. + +#### Examples + +Obtain population densities for every geometry in a table, keyed by cartodb_id: + +```sql +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001"}]', + 1, 1, COUNT(*) +) meta FROM tablename) +SELECT id AS cartodb_id, (data->0->>'value')::Numeric AS pop_density +FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename), + (SELECT meta FROM meta)) +``` + +Update a table with a blank numeric column called `pop_density` with population +densities: + +```sql +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001"}]', + 1, 1, COUNT(*) +) meta FROM tablename), +data AS ( + SELECT id AS cartodb_id, (data->0->>'value')::Numeric AS pop_density + FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename), + (SELECT meta FROM meta))) +UPDATE tablename +SET pop_density = data.pop_density +FROM data +WHERE cartodb_id = data.id +``` + +Update a table with two measurements at once, population density and household +density. The table should already have a Numeric column `pop_density` and +`household_density`. + +```sql +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom),4326), + '[{"numer_id": "us.census.acs.B01003001"},{"numer_id": "us.census.acs.B11001001"}]', + 1, 1, COUNT(*) +) meta from tablename), +data AS ( + SELECT id, + data->0->>'value' AS pop_density, + data->1->>'value' AS household_density + FROM OBS_GetData((SELECT ARRAY_AGG((the_geom, cartodb_id)::geomval) FROM tablename), + (SELECT meta FROM meta))) +UPDATE tablename +SET pop_density = data.pop_density, + household_density = data.household_density +FROM data +WHERE cartodb_id = data.id +``` + +## OBS_GetData(ids array[text], metadata json) + +The ```OBS_GetData(ids, metadata)``` function returns a measure and/or +geometry corresponding to the `metadata` JSON array for each every id of +the `ids` array. The metadata argument must be obtained from +`OBS_GetMeta(extent, metadata)`. When obtaining metadata, one must include +the `geom_id` corresponding to the boundary that the `ids` refer to. + +#### Arguments + +Name | Description +---- | ----------- +ids | An array of `TEXT` elements. This should be obtained by using `ARRAY_AGG(col_of_geom_refs)` from the CARTO table one wishes to obtain data for. +metadata | A JSON array composed of metadata output objects from ```OBS_GetMeta(extent, metadata)```. The schema of the elements of the `metadata` JSON array corresponds to that of the output of ```OBS_GetMeta(extent, metadata)```, and this argument must be obtained from that function in order for the call to be valid. + +For this function to work, the `metadata` argument must include a `geom_id` +that corresponds to the ids found in `col_of_geom_refs`. + +#### Returns + +A TABLE with the following schema, where each element of the input `ids` array +corresponds to one row: + +Column | Type | Description +------ | ---- | ----------- +id | Text | ID corresponding to an element of the input `ids` array +data | JSON | A JSON array with elements corresponding to the input `metadata` JSON array + +Each `data` object has the following keys: + +Key | Description +--- | ----------- +value | The value of the measurement or geometry for the geometry corresponding to this row and measurement corresponding to this position in the `metadata` JSON array + +To determine the appropriate cast for `value`, one can use the `numer_type` +or `geom_type` key corresponding to that value in the input `metadata` JSON +array. + +#### Examples + +Obtain population densities for every row of a table with FIPS code county IDs +(USA). + +```sql +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]' +) meta FROM tablename) +SELECT id AS fips, (data->0->>'value')::Numeric AS pop_density +FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename), + (SELECT meta FROM meta)) +``` + +Update a table with population densities for every FIPS code county ID (USA). +This table has a blank column called `pop_density` and fips codes stored in a +column `fips`. + +```sql +WITH meta AS ( + SELECT OBS_GetMeta( + ST_SetSRID(ST_Extent(the_geom), 4326), + '[{"numer_id": "us.census.acs.B01003001", "geom_id": "us.census.tiger.county"}]' +) meta FROM tablename), +data as ( + SELECT id AS fips, (data->0->>'value') AS pop_density + FROM OBS_GetData((SELECT ARRAY_AGG(fips) FROM tablename), + (SELECT meta FROM meta))) +UPDATE tablename +SET pop_density = data.pop_density +FROM data +WHERE fips = data.id +``` diff --git a/docs/reference/05-boundary-functions.md b/docs/reference/05-boundary-functions.md new file mode 100644 index 0000000..36164c1 --- /dev/null +++ b/docs/reference/05-boundary-functions.md @@ -0,0 +1,273 @@ +## Boundary Functions + +Use the following functions to retrieve [Boundary](https://carto.com/docs/carto-engine/data/overview/#boundary-data) data. Data ranges from small areas (e.g. US Census Block Groups) to large areas (e.g. Countries). You can access boundaries by point location lookup, bounding box lookup, direct ID access and several other methods described below. + +You can [access](https://carto.com/docs/carto-engine/data/accessing) boundaries through CARTO Builder. The same methods will work if you are using the CARTO Engine to develop your application. We [encourage you](https://carto.com/docs/carto-engine/data/accessing/#best-practices) to use table modifying methods (UPDATE and INSERT) over dynamic methods (SELECT). + +### OBS_GetBoundariesByGeometry(geom geometry, geometry_id text) + +The ```OBS_GetBoundariesByGeometry(geometry, geometry_id)``` method returns a set of boundary geometries that intersect a supplied geometry. This can be used to find all boundaries that are within or overlap a bounding box. You have the ability to choose whether to retrieve all boundaries that intersect your supplied bounding box or only those that fall entirely inside of your bounding box. + +#### Arguments + +Name |Description +--- | --- +geom | a WGS84 geometry +geometry_id | a string identifier for a boundary geometry +timespan (optional) | year(s) to request from ('NULL' (default) gives most recent) +overlap_type (optional) | one of '[intersects](http://postgis.net/docs/manual-2.2/ST_Intersects.html)' (default), '[contains](http://postgis.net/docs/manual-2.2/ST_Contains.html)', or '[within](http://postgis.net/docs/manual-2.2/ST_Within.html)'. + +#### Returns + +A table with the following columns: + +Column Name | Description +--- | --- +the_geom | a boundary geometry (e.g., US Census tract boundaries) +geom_refs | a string identifier for the geometry (e.g., geoids of US Census tracts) + +If geometries are not found for the requested `geom`, `geometry_id`, `timespan`, or `overlap_type`, then null values are returned. + +#### Example + +Insert all Census Tracts from Lower Manhattan and nearby areas within the supplied bounding box to a table named `manhattan_census_tracts` which has columns `the_geom` (geometry) and `geom_refs` (text). + +```sql +INSERT INTO manhattan_census_tracts(the_geom, geom_refs) +SELECT * +FROM OBS_GetBoundariesByGeometry( + ST_MakeEnvelope(-74.0251922607,40.6945658517, + -73.9651107788,40.7377626342, + 4326), + 'us.census.tiger.census_tract') +``` + +#### Errors + +* If an `overlap_type` other than the valid ones listed above is entered, then an error is thrown + +## OBS_GetPointsByGeometry(polygon geometry, geometry_id text) + +The ```OBS_GetPointsByGeometry(polygon, geometry_id)``` method returns point geometries and their geographical identifiers that intersect (or are contained by) a bounding box polygon and lie on the surface of a boundary corresponding to the boundary with same geographical identifiers (e.g., a point that is on a census tract with the same geoid). This is a useful alternative to ```OBS_GetBoundariesByGeometry``` listed above because it returns much less data for each location. + +#### Arguments + +Name |Description +--- | --- +polygon | a bounding box or other geometry +geometry_id | a string identifier for a boundary geometry +timespan (optional) | year(s) to request from (`NULL` (default) gives most recent) +overlap_type (optional) | one of '[intersects](http://postgis.net/docs/manual-2.2/ST_Intersects.html)' (default), '[contains](http://postgis.net/docs/manual-2.2/ST_Contains.html)', or '[within](http://postgis.net/docs/manual-2.2/ST_Within.html)'. + +#### Returns + +A table with the following columns: + +Column Name | Description +--- | --- +the_geom | a point geometry on a boundary (e.g., a point that lies on a US Census tract) +geom_refs| a string identifier for the geometry (e.g., the geoid of a US Census tract) + +If geometries are not found for the requested geometry, `geometry_id`, `timespan`, or `overlap_type`, then NULL values are returned. + +#### Example + +Insert points that lie on Census Tracts from Lower Manhattan and nearby areas within the supplied bounding box to a table named `manhattan_tract_points` which has columns `the_geom` (geometry) and `geom_refs` (text). + +```sql +INSERT INTO manhattan_tract_points (the_geom, geom_refs) +SELECT * +FROM OBS_GetPointsByGeometry( + ST_MakeEnvelope(-74.0251922607,40.6945658517, + -73.9651107788,40.7377626342, + 4326), + 'us.census.tiger.census_tract') +``` + +#### Errors + +* If a geometry other than a point is passed as the first argument, an error is thrown: `Invalid geometry type (ST_Point), expecting 'ST_MultiPolygon' or 'ST_Polygon'` + +### OBS_GetBoundary(point_geometry, boundary_id) + +The ```OBS_GetBoundary(point_geometry, boundary_id)``` method returns a boundary geometry defined as overlapping the point geometry and from the desired boundary set (e.g. Census Tracts). See the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids). This is a useful method for performing aggregations of points. + +#### Arguments + +Name | Description +--- | --- +point_geometry | a WGS84 polygon geometry (the_geom) +boundary_id | a boundary identifier from the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids) +timespan (optional) | year(s) to request from (`NULL` (default) gives most recent) + +#### Returns + +A boundary geometry. If no value is found at the requested `boundary_id` or `timespan`, a null value is returned. + +Value | Description +--- | --- +geom | WKB geometry + +#### Example + +Overwrite a point geometry with a boundary geometry that contains it in your table + +```sql +UPDATE tablename +SET the_geom = OBS_GetBoundary(the_geom, 'us.census.tiger.block_group') +``` + +#### Errors + +* If a geometry other than a point is passed, an error is thrown: `Invalid geometry type (ST_Line), expecting 'ST_Point'` + +### OBS_GetBoundaryId(point_geometry, boundary_id) + +The ```OBS_GetBoundaryId(point_geometry, boundary_id)``` returns a unique geometry_id for the boundary geometry that contains a given point geometry. See the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids). The method can be combined with ```OBS_GetBoundaryById(geometry_id)``` to create a point aggregation workflow. + +#### Arguments + +Name |Description +--- | --- +point_geometry | a WGS84 point geometry (the_geom) +boundary_id | a boundary identifier from the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids) +timespan (optional) | year(s) to request from (`NULL` (default) gives most recent) + +#### Returns + +A TEXT boundary geometry id. If no value is found at the requested `boundary_id` or `timespan`, a null value is returned. + +Value | Description +--- | --- +geometry_id | a string identifier of a geometry in the Boundaries + +#### Example + +Write the US Census block group geoid that contains the point geometry for every row as a new column in your table. + +```sql +UPDATE tablename +SET geometry_id = OBS_GetBoundaryId(the_geom, 'us.census.tiger.block_group') +``` + +#### Errors + +* If a geometry other than a point is passed, an error is thrown: `Invalid geometry type (ST_Line), expecting 'ST_Point'` + +### OBS_GetBoundaryById(geometry_id, boundary_id) + +The ```OBS_GetBoundaryById(geometry_id, boundary_id)``` returns the boundary geometry for a unique geometry_id. A geometry_id can be found using the ```OBS_GetBoundaryId(point_geometry, boundary_id)``` method described above. + +#### Arguments + +Name | Description +--- | --- +geometry_id | a string identifier for a Boundary geometry +boundary_id | a boundary identifier from the [Boundary ID Glossary](https://carto.com/docs/carto-engine/data/glossary/#boundary-ids) +timespan (optional) | year(s) to request from (`NULL` (default) gives most recent) + +#### Returns + +A boundary geometry. If a geometry is not found for the requested `geometry_id`, `boundary_id`, or `timespan`, then a null value is returned. + +Key | Description +--- | --- +geom | a WGS84 polygon geometry + +#### Example + +Use a table of `geometry_id`s (e.g., geoid from the U.S. Census) to select the unique boundaries that they correspond to and insert into a table called, `overlapping_polygons`. This is a useful method for creating new choropleths of aggregate data. + +```sql +INSERT INTO overlapping_polygons (the_geom, geometry_id, point_count) +SELECT + OBS_GetBoundaryById(geometry_id, 'us.census.tiger.county') As the_geom, + geometry_id, + count(*) +FROM tablename +GROUP BY geometry_id +``` + +### OBS_GetBoundariesByPointAndRadius(point geometry, radius numeric, boundary_id text) + +The ```OBS_GetBoundariesByPointAndRadius(point, radius, boundary_id)``` method returns boundary geometries and their geographical identifiers that intersect (or are contained by) a circle centered on a point with a radius. + +#### Arguments + +Name |Description +--- | --- +point | a WGS84 point geometry +radius | a radius (in meters) from the center point +geometry_id | a string identifier for a boundary geometry +timespan (optional) | year(s) to request from (`NULL` (default) gives most recent) +overlap_type (optional) | one of '[intersects](http://postgis.net/docs/manual-2.2/ST_Intersects.html)' (default), '[contains](http://postgis.net/docs/manual-2.2/ST_Contains.html)', or '[within](http://postgis.net/docs/manual-2.2/ST_Within.html)'. + +#### Returns + +A table with the following columns: + +Column Name | Description +--- | --- +the_geom | a boundary geometry (e.g., a US Census tract) +geom_refs| a string identifier for the geometry (e.g., the geoid of a US Census tract) + +If geometries are not found for the requested point and radius, `geometry_id`, `timespan`, or `overlap_type`, then null values are returned. + +#### Example + +Insert into table `denver_census_tracts` the census tract boundaries and geom_refs of census tracts which intersect within 10 miles of downtown Denver, Colorado. + +```sql +INSERT INTO denver_census_tracts(the_geom, geom_refs) +SELECT * +FROM OBS_GetBoundariesByPointAndRadius( + CDB_LatLng(39.7392, -104.9903), -- Denver, Colorado + 10000 * 1.609, -- 10 miles (10km * conversion to miles) + 'us.census.tiger.census_tract') +``` + +#### Errors + +* If a geometry other than a point is passed, an error is thrown. E.g., `Invalid geometry type (ST_Line), expecting 'ST_Point'` + +### OBS_GetPointsByPointAndRadius(point geometry, radius numeric, boundary_id text) + +The ```OBS_GetPointsByPointAndRadius(point, radius, boundary_id)``` method returns point geometries on boundaries (e.g., a point that lies on a Census tract) and their geographical identifiers that intersect (or are contained by) a circle centered on a point with a radius. + +#### Arguments + +Name |Description +--- | --- +point | a WGS84 point geometry +radius | radius (in meters) +geometry_id | a string identifier for a boundary geometry +timespan (optional) | year(s) to request from (`NULL` (default) gives most recent) +overlap_type (optional) | one of '[intersects](http://postgis.net/docs/manual-2.2/ST_Intersects.html)' (default), '[contains](http://postgis.net/docs/manual-2.2/ST_Contains.html)', or '[within](http://postgis.net/docs/manual-2.2/ST_Within.html)'. + +#### Returns + +A table with the following columns: + +Column Name | Description +--- | --- +the_geom | a point geometry (e.g., a point on a US Census tract) +geom_refs | a string identifier for the geometry (e.g., the geoid of a US Census tract) + +If geometries are not found for the requested point and radius, `geometry_id`, `timespan`, or `overlap_type`, then null values are returned. + +#### Example + +Insert into table `denver_tract_points` points on US census tracts and their corresponding geoids for census tracts which intersect within 10 miles of downtown Denver, Colorado, USA. + +```sql +INSERT INTO denver_tract_points(the_geom, geom_refs) +SELECT * +FROM OBS_GetPointsByPointAndRadius( + CDB_LatLng(39.7392, -104.9903), -- Denver, Colorado + 10000 * 1.609, -- 10 miles (10km * conversion to miles) + 'us.census.tiger.census_tract') +``` + +#### Errors + +* If a geometry other than a point is passed, an error is thrown. E.g., `Invalid geometry type (ST_Line), expecting 'ST_Point'` diff --git a/docs/reference/06-discovery-functions.md b/docs/reference/06-discovery-functions.md new file mode 100644 index 0000000..0e22b80 --- /dev/null +++ b/docs/reference/06-discovery-functions.md @@ -0,0 +1,365 @@ +## Discovery Functions + +If you are using the [discovery methods]({{ site.dataobservatory_docs}}/guides/overview/#discovery-methods) from the Data Observatory, use the following functions to retrieve [boundary]({{ site.dataobservatory_docs}}/guides/overview/#boundary-data) and [measures]({{ site.dataobservatory_docs}}/guides/overview/#measures-data) data. + +### OBS_Search(search_term) + +Use arbitrary text to search all available measures + +#### Arguments + +Name | Description +--- | --- +search_term | a string to search for available measures +boundary_id | a string identifier for a boundary geometry (optional) + +#### Returns + +A TABLE containing the following properties + +Key | Description +--- | --- +id | the unique id of the measure for use with the ```OBS_GetMeasure``` function +name | the human readable name of the measure +description | a brief description of the measure +aggregate | **sum** are raw count values, **median** are statistical medians, **average** are statistical averages, **undefined** other (e.g. an index value) +source | where the data came from (e.g. US Census Bureau) + +#### Example + +```sql +SELECT * FROM OBS_Search('home value') +``` + +### OBS_GetAvailableBoundaries(point_geometry) + +Returns available `boundary_id`s at a given point geometry. + +#### Arguments + +Name | Description +--- | --- +point_geometry | a WGS84 point geometry (e.g. the_geom) + +#### Returns + +A TABLE containing the following properties + +Key | Description +--- | --- +boundary_id | a boundary identifier from the [Boundary ID Glossary]({{ site.dataobservatory_docs}}/guides/glossary/#boundary-ids) +description | a brief description of the boundary dataset +time_span | the timespan attached the boundary. this does not mean that the boundary is invalid outside of the timespan, but is the explicit timespan published with the geometry. + +#### Example + +```sql +SELECT * FROM OBS_GetAvailableBoundaries(CDB_LatLng(40.7, -73.9)) +``` + +### OBS_GetAvailableNumerators(bounds, filter_tags, denom_id, geom_id, timespan) + +Return available numerators within a boundary and with the specified +`filter_tags`. + +#### Arguments + +Name | Type | Description +--- | --- | --- +bounds | Geometry(Geometry, 4326) | a geometry which some of the numerator's data must intersect with +filter_tags | Text[] | a list of filters. Only numerators for which all of these apply are returned `NULL` to ignore (optional) +denom_id | Text | the ID of a denominator to check whether the numerator is valid against. Will not reduce length of returned table, but will change values for `valid_denom` (optional) +geom_id | Text | the ID of a geometry to check whether the numerator is valid against. Will not reduce length of returned table, but will change values for `valid_geom` (optional) +timespan | Text | the ID of a timespan to check whether the numerator is valid against. Will not reduce length of returned table, but will change values for `valid_timespan` (optional) + +#### Returns + +A TABLE containing the following properties + +Key | Type | Description +--- | ---- | ----------- +numer_id | Text | The ID of the numerator +numer_name | Text | A human readable name for the numerator +numer_description | Text | Description of the numerator. Is sometimes NULL +numer_weight | Numeric | Numeric "weight" of the numerator. Ignored. +numer_license | Text | ID of the license for the numerator +numer_source | Text | ID of the source for the numerator +numer_type | Text | Postgres type of the numerator +numer_aggregate | Text | Aggregate type of the numerator. If `'SUM'`, this can be normalized by area +numer_extra | JSONB | Extra information about the numerator column. Ignored. +numer_tags | Text[] | Array of all tags applying to this numerator +valid_denom | Boolean | True if the `denom_id` argument is a valid denominator for this numerator, False otherwise +valid_geom | Boolean | True if the `geom_id` argument is a valid geometry for this numerator, False otherwise +valid_timespan | Boolean | True if the `timespan` argument is a valid timespan for this numerator, False otherwise + +#### Examples + +Obtain all numerators that are available within a small rectangle. + +```sql +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326)) +``` + +Obtain all numerators that are available within a small rectangle and are for +the United States only. + +```sql +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}'); +``` + +Obtain all numerators that are available within a small rectangle and are +employment related for the United States only. + +```sql +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment}'); +``` + +Obtain all numerators that are available within a small rectangle and are +related to both employment and age & gender for the United States only. + +```sql +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states, subsection/tags.employment, subsection/tags.age_gender}'); +``` + +Obtain all numerators that work with US population (`us.census.acs.B01003001`) +as a denominator. + +```sql +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001') +WHERE valid_denom IS True; +``` + +Obtain all numerators that work with US states (`us.census.tiger.state`) +as a geometry. + +```sql +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state') +WHERE valid_geom IS True; +``` + +Obtain all numerators available in the timespan `2011 - 2015`. + +```sql +SELECT * FROM OBS_GetAvailableNumerators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015') +WHERE valid_timespan IS True; +``` + +### OBS_GetAvailableDenominators(bounds, filter_tags, numer_id, geom_id, timespan) + +Return available denominators within a boundary and with the specified +`filter_tags`. + +#### Arguments + +Name | Type | Description +--- | --- | --- +bounds | Geometry(Geometry, 4326) | a geometry which some of the denominator's data must intersect with +filter_tags | Text[] | a list of filters. Only denominators for which all of these apply are returned `NULL` to ignore (optional) +numer_id | Text | the ID of a numerator to check whether the denominator is valid against. Will not reduce length of returned table, but will change values for `valid_numer` (optional) +geom_id | Text | the ID of a geometry to check whether the denominator is valid against. Will not reduce length of returned table, but will change values for `valid_geom` (optional) +timespan | Text | the ID of a timespan to check whether the denominator is valid against. Will not reduce length of returned table, but will change values for `valid_timespan` (optional) + +#### Returns + +A TABLE containing the following properties + +Key | Type | Description +--- | ---- | ----------- +denom_id | Text | The ID of the denominator +denom_name | Text | A human readable name for the denominator +denom_description | Text | Description of the denominator. Is sometimes NULL +denom_weight | Numeric | Numeric "weight" of the denominator. Ignored. +denom_license | Text | ID of the license for the denominator +denom_source | Text | ID of the source for the denominator +denom_type | Text | Postgres type of the denominator +denom_aggregate | Text | Aggregate type of the denominator. If `'SUM'`, this can be normalized by area +denom_extra | JSONB | Extra information about the denominator column. Ignored. +denom_tags | Text[] | Array of all tags applying to this denominator +valid_numer | Boolean | True if the `numer_id` argument is a valid numerator for this denominator, False otherwise +valid_geom | Boolean | True if the `geom_id` argument is a valid geometry for this denominator, False otherwise +valid_timespan | Boolean | True if the `timespan` argument is a valid timespan for this denominator, False otherwise + +#### Examples + +Obtain all denominators that are available within a small rectangle. + +```sql +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326)); +``` + +Obtain all denominators that are available within a small rectangle and are for +the United States only. + +```sql +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}'); +``` + +Obtain all denominators for male population (`us.census.acs.B01001002`). + +```sql +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01001002') +WHERE valid_numer IS True; +``` + +Obtain all denominators that work with US states (`us.census.tiger.state`) +as a geometry. + +```sql +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, 'us.census.tiger.state') +WHERE valid_geom IS True; +``` + +Obtain all denominators available in the timespan `2011 - 2015`. + +```sql +SELECT * FROM OBS_GetAvailableDenominators( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2011 - 2015') +WHERE valid_timespan IS True; +``` + +### OBS_GetAvailableGeometries(bounds, filter_tags, numer_id, denom_id, timespan, number_geometries) + +Return available geometries within a boundary and with the specified +`filter_tags`. + +#### Arguments + +Name | Type | Description +--- | --- | --- +bounds | Geometry(Geometry, 4326) | a geometry which must intersect the geometry +filter_tags | Text[] | a list of filters. Only geometries for which all of these apply are returned `NULL` to ignore (optional) +numer_id | Text | the ID of a numerator to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_numer` (optional) +denom_id | Text | the ID of a denominator to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_denom` (optional) +timespan | Text | the ID of a timespan to check whether the geometry is valid against. Will not reduce length of returned table, but will change values for `valid_timespan` (optional) +number_geometries | Integer | an additional variable that is used to adjust the calculation of the [score]({{ site.dataobservatory_docs}}/guides/discovery-functions/#returns-4) (optional) + +#### Returns + +A TABLE containing the following properties + +Key | Type | Description +--- | ---- | ----------- +geom_id | Text | The ID of the geometry +geom_name | Text | A human readable name for the geometry +geom_description | Text | Description of the geometry. Is sometimes NULL +geom_weight | Numeric | Numeric "weight" of the geometry. Ignored. +geom_aggregate | Text | Aggregate type of the geometry. Ignored. +geom_license | Text | ID of the license for the geometry +geom_source | Text | ID of the source for the geometry +geom_type | Text | Postgres type of the geometry +geom_extra | JSONB | Extra information about the geometry column. Ignored. +geom_tags | Text[] | Array of all tags applying to this geometry +valid_numer | Boolean | True if the `numer_id` argument is a valid numerator for this geometry, False otherwise +valid_denom | Boolean | True if the `geom_id` argument is a valid geometry for this geometry, False otherwise +valid_timespan | Boolean | True if the `timespan` argument is a valid timespan for this geometry, False otherwise +score | Numeric | Score between 0 and 100 for this geometry, higher numbers mean that this geometry is a better choice for the passed extent +numtiles | Numeric | How many raster tiles were read for score, numgeoms, and percentfill estimates +numgeoms | Numeric | About how many of these geometries fit inside the passed extent +percentfill | Numeric | About what percentage of the passed extent is filled with these geometries +estnumgeoms | Numeric | Ignored +meanmediansize | Numeric | Ignored + +#### Examples + +Obtain all geometries that are available within a small rectangle. + +```sql +SELECT * FROM OBS_GetAvailableGeometries( + ST_MakeEnvelope(-74, 41, -73, 40, 4326)); +``` + +Obtain all geometries that are available within a small rectangle and are for +the United States only. + +```sql +SELECT * FROM OBS_GetAvailableGeometries( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), '{section/tags.united_states}'); +``` + +Obtain all geometries that work with total population (`us.census.acs.B01003001`). + +```sql +SELECT * FROM OBS_GetAvailableGeometries( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001') +WHERE valid_numer IS True; +``` + +Obtain all geometries with timespan `2015`. + +```sql +SELECT * FROM OBS_GetAvailableGeometries( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, '2015') +WHERE valid_timespan IS True; +``` + +## OBS_GetAvailableTimespans(bounds, filter_tags, numer_id, denom_id, geom_id) + +Return available timespans within a boundary and with the specified +`filter_tags`. + +#### Arguments + +Name | Type | Description +--- | --- | --- +bounds | Geometry(Geometry, 4326) | a geometry which some of the timespan's data must intersect with +filter_tags | Text[] | a list of filters. Ignore +numer_id | Text | the ID of a numerator to check whether the timespans is valid against. Will not reduce length of returned table, but will change values for `valid_numer` (optional) +denom_id | Text | the ID of a denominator to check whether the timespans is valid against. Will not reduce length of returned table, but will change values for `valid_denom` (optional) +geom_id | Text | the ID of a geometry to check whether the timespans is valid against. Will not reduce length of returned table, but will change values for `valid_geom` (optional) + +#### Returns + +A TABLE containing the following properties + +Key | Type | Description +--- | ---- | ----------- +timespan_id | Text | The ID of the timespan +timespan_name | Text | A human readable name for the timespan +timespan_description | Text | Ignored +timespan_weight | Numeric | Ignored +timespan_aggregate | Text | Ignored +timespan_license | Text | Ignored +timespan_source | Text | Ignored +timespan_type | Text | Ignored +timespan_extra | JSONB | Ignored +timespan_tags | JSONB | Ignored +valid_numer | Boolean | True if the `numer_id` argument is a valid numerator for this timespan, False otherwise +valid_denom | Boolean | True if the `timespan` argument is a valid timespan for this timespan, False otherwise +valid_geom | Boolean | True if the `geom_id` argument is a valid geometry for this timespan, False otherwise + +#### Examples + +Obtain all timespans that are available within a small rectangle. + +```sql +SELECT * FROM OBS_GetAvailableTimespans( + ST_MakeEnvelope(-74, 41, -73, 40, 4326)); +``` + +Obtain all timespans for total population (`us.census.acs.B01003001`). + +```sql +SELECT * FROM OBS_GetAvailableTimespans( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, 'us.census.acs.B01003001') +WHERE valid_numer IS True; +``` + +Obtain all timespans that work with US states (`us.census.tiger.state`) +as a geometry. + +```sql +SELECT * FROM OBS_GetAvailableTimespans( + ST_MakeEnvelope(-74, 41, -73, 40, 4326), NULL, NULL, NULL, 'us.census.tiger.state') +WHERE valid_geom IS True; +``` diff --git a/docs/support/01-support-options.md b/docs/support/01-support-options.md new file mode 100644 index 0000000..682cfe4 --- /dev/null +++ b/docs/support/01-support-options.md @@ -0,0 +1,35 @@ +## Support Options + +Feeling stuck? There are many ways to find help. + +* Ask a question on [GIS StackExchange](https://gis.stackexchange.com/questions/tagged/carto) using the `CARTO` tag. +* [Report an issue](https://github.com/CartoDB/cartodb.js/issues) in Github. +* Engine Plan customers have additional access to enterprise-level support through CARTO's support representatives. + +If you just want to describe an issue or share an idea, just send your feedback. + +### Issues on Github + +If you think you may have found a bug, or if you have a feature request that you would like to share with the CARTO.js team, please [open an issue](https://github.com/cartodb/cartodb.js/issues/new). + +Before opening an issue, review the [contributing guidelines](https://github.com/CartoDB/cartodb.js/blob/develop/CONTRIBUTING.md#filling-a-ticket). + + +### Community support on GIS Stack Exchange + +GIS Stack Exchange is the most popular community in the geospatial industry. This is a collaboratively-edited question and answer site for geospatial programmers and technicians. It is a fantastic resource for asking technical questions about developing and maintaining your application. + +When posting a new question, please consider the following: + +* Read the GIS Stack Exchange [help](https://gis.stackexchange.com/help) and [how to ask](https://gis.stackexchange.com/help/how-to-ask) pages for guidelines and tips about posting questions. +* Be very clear about your question in the subject. A clear explanation helps those trying to answer your question, as well as those who may be looking for information in the future. +* Be informative in your post. Details, code snippets, logs, screenshots, etc. help others to understand your problem. +* Use code that demonstrates the problem. It is very hard to debug errors without sample code to reproduce the problem. + +### Engine Plan Customers + +Engine Plan customers have additional support options beyond general community support. As per your account Terms of Service, you have access to enterprise-level support through CARTO's support representatives available at [enterprise-support@carto.com](mailto:enterprise-support@carto.com) + +In order to speed up the resolution of your issue, provide as much information as possible (even if it is a link from community support). This allows our engineers to investigate your problem as soon as possible. + +If you are not yet CARTO customer, browse our [plans & pricing](https://carto.com/pricing/) and find the right plan for you. \ No newline at end of file diff --git a/docs/support/02-contribute.md b/docs/support/02-contribute.md new file mode 100644 index 0000000..3a01392 --- /dev/null +++ b/docs/support/02-contribute.md @@ -0,0 +1,36 @@ +## Contribute + +CARTO platform is an open-source ecosystem. You can read about the [fundamentals]({{site.fundamental_docs}}/components/) of CARTO architecture and its components. +We are more than happy to receive your contributions to the code and the documentation as well. + +## Filling a ticket + +If you want to open a new issue in our repository, please follow these instructions: + +1. Descriptive title. +2. Write a good description, it always helps. +3. Specify the steps to reproduce the problem. +4. Try to add an example showing the problem. + +## Contributing code + +Best part of open source, collaborate in Data Observatory code!. We like hearing from you, so if you have any bug fixed, or a new feature ready to be merged, those are the steps you should follow: + +1. Fork the repository. +2. Create a new branch in your forked repository. +3. Commit your changes. Add new tests if it is necessary. +4. Open a pull request. +5. Any of the maintainers will take a look. +6. If everything works, it will merged and released \o/. + +If you want more detailed information, this [GitHub guide](https://guides.github.com/activities/contributing-to-open-source/) is a must. + +## Completing documentation + +Data Observatory documentation is located in ```docs/```. That folder is the content that appears in the [Developer Center](http://carto.com/developers/data-observatory/). Just follow the instructions described in [contributing code](#contributing-code) and after accepting your pull request, we will make it appear online :). + +**Tip:** A convenient, easy way of proposing changes in documentation is by using the GitHub editor directly on the web. You can easily create a branch with your changes and make a PR from there. + +## Submitting contributions + +You will need to sign a Contributor License Agreement (CLA) before making a submission. [Learn more here](https://carto.com/contributions). diff --git a/docs/support/03-license.md b/docs/support/03-license.md new file mode 100644 index 0000000..1038fd8 --- /dev/null +++ b/docs/support/03-license.md @@ -0,0 +1,32 @@ +## License + +The Data Observatory is a collection of data sources with varying licenses and terms of use. We have endeavored to find you data that will work for the broadest set of use-cases. The following third-party data sources are used in the Data Observatory, and we have included the links to the terms governing their use. + +_**Legal Note**: The Data Observatory makes use of a variety of third party data and databases (collectively, the “Data”). You acknowledge that the included Data, and the licenses and terms of use, may be amended from time to time. Whenever you use the Data, you agree to the current relevant terms or license. Some Data will require that you provide attribution to the data source. Other Data may be protected by US or international copyright laws, treaties, or conventions. The Data and associated metadata are provided 'as-is', without express or implied warranty of any kind, including, but not limited to, infringement, merchantability and fitness for a particular purpose. CartoDB is not responsible for the accuracy, completeness, timeliness or quality of the Data._ + +Name | Terms link +-------|--------- +ACS | [https://www.usa.gov/government-works](https://www.usa.gov/government-works) +Australian Bureau of Statistics DataPacks | [https://creativecommons.org/licenses/by/2.5/au/](https://creativecommons.org/licenses/by/2.5/au/) +Bureau of Labor Statistics Quarterly Census of Employment and Wages (QCEW) | [https://www.usa.gov/government-works](https://www.usa.gov/government-works) +Censo Demográfico of the Instituto Brasileiro de Geografia e Estatística (IBGE) | Statistics are provided by the federal Institute of Applied Economic Research (IPEA), many of which are reproduced from another source. Some series are regularly updated, others are not. Licensing information is similar to CC-BY, allowing copying and reuse, but requiring attribution.

[http://www.ipeadata.gov.br/iframe_direitouso.aspx](http://www.ipeadata.gov.br/iframe_direitouso.aspx?width=1009&height=767) +Consumer Data Research Centre | [http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/](http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/) +El Instituto Nacional de Estadística (INE) | The National Statistics Institute (INE) of Spain includes data from multiple sources. If you are re-using their data, they explicitly require that you reference them accordingly

[http://www.ine.es/ss/Satellite?L=0&c=Page&cid=1254735849170&p=1254735849170&pagename=Ayuda%2FINELayout](http://www.ine.es/ss/Satellite?L=0&c=Page&cid=1254735849170&p=1254735849170&pagename=Ayuda%2FINELayout) +EuroGraphics EuroGlobalMap | [http://www.eurogeographics.org/content/eurogeographics-euroglobalmap-opendata](http://www.eurogeographics.org/content/eurogeographics-euroglobalmap-opendata)

This product includes Intellectual Property from European National Mapping and Cadastral Authorities and is licensed on behalf of these by EuroGeographics. Original product is available for free at [www.eurogeographics.org](http://www.eurogeographics.org/). Terms of the license available at [http://www.eurogeographics.org/form/topographic-data-eurogeographics](http://www.eurogeographics.org/form/topographic-data-eurogeographics) +GeoNames | [http://www.geonames.org/](http://www.geonames.org/) +GeoPlanet | [https://developer.yahoo.com/geo/geoplanet/](https://developer.yahoo.com/geo/geoplanet/) +Instituto Nacional de Estadística y Geografía | The National Statistics and Geography Institute (INEGI) of Mexico requires credit be given to INEGI as an author

[http://www.inegi.org.mx/terminos/terminos_info.aspx](http://www.inegi.org.mx/terminos/terminos_info.aspx) +National Center for Geographic Information (CNIG) | [https://www.cnig.es/propiedadIntelectual.do](https://www.cnig.es/propiedadIntelectual.do) +National Institute of Statistics and Economic Studies (INSEE) | [http://www.insee.fr/en/service/default.asp?page=rediffusion/copyright.htm](http://www.insee.fr/en/service/default.asp?page=rediffusion/copyright.htm) +Natural Earth | [http://www.naturalearthdata.com/about/terms-of-use/](http://www.naturalearthdata.com/about/terms-of-use/) +Northern Ireland Statistics and Research Agency | [https://www.nisra.gov.uk/statistics/terms-and-conditions](https://www.nisra.gov.uk/statistics/terms-and-conditions) +Office for National Statistics (ONS) | [http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/](http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/) +Quattroshapes | [https://github.com/foursquare/quattroshapes/blob/master/LICENSE.md](https://github.com/foursquare/quattroshapes/blob/master/LICENSE.md) +Scotland's Census Data Warehouse by National Records of Scotland | [https://www.nrscotland.gov.uk/copyright-and-disclaimer](https://www.nrscotland.gov.uk/copyright-and-disclaimer) +Spielman & Singleton | [https://www.openicpsr.org/openicpsr/project/100235/version/V5/view](https://www.openicpsr.org/openicpsr/project/100235/version/V5/view) +Statistics Canada Census of Population 2011 | [http://www.statcan.gc.ca/eng/reference/licence](http://www.statcan.gc.ca/eng/reference/licence) +Statistics Canada National Household Survey 2011 | [http://www.statcan.gc.ca/eng/reference/licence](http://www.statcan.gc.ca/eng/reference/licence) +TIGER | [https://www.usa.gov/government-works](https://www.usa.gov/government-works) +Who's on First | [http://whosonfirst.mapzen.com#License](http://whosonfirst.mapzen.com#License) +Zetashapes | [http://zetashapes.com/license](http://zetashapes.com/license) +Zillow Home Value Index | This data is "Aggregate Data", per the Zillow Terms of Use

[http://www.zillow.com/corp/Terms.htm](http://www.zillow.com/corp/Terms.htm) diff --git a/src/python/test/autotest.py b/src/python/test/autotest.py index b779488..e138400 100644 --- a/src/python/test/autotest.py +++ b/src/python/test/autotest.py @@ -1,7 +1,7 @@ from nose.tools import assert_equal, assert_is_not_none from nose_parameterized import parameterized -from itertools import izip_longest +from itertools import zip_longest from util import query from collections import OrderedDict import json @@ -11,78 +11,78 @@ def grouper(iterable, n, fillvalue=None): "Collect data into fixed-length chunks or blocks" # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx args = [iter(iterable)] * n - return izip_longest(fillvalue=fillvalue, *args) + return zip_longest(fillvalue=fillvalue, *args) USE_SCHEMA = True SKIP_COLUMNS = set([ - u'mx.inegi_columns.INDI18', - u'mx.inegi_columns.ECO40', - u'mx.inegi_columns.POB34', - u'mx.inegi_columns.POB63', - u'mx.inegi_columns.INDI7', - u'mx.inegi_columns.EDU28', - u'mx.inegi_columns.SCONY10', - u'mx.inegi_columns.EDU31', - u'mx.inegi_columns.POB7', - u'mx.inegi_columns.VIV30', - u'mx.inegi_columns.INDI12', - u'mx.inegi_columns.EDU13', - u'mx.inegi_columns.ECO43', - u'mx.inegi_columns.VIV9', - u'mx.inegi_columns.HOGAR25', - u'mx.inegi_columns.POB32', - u'mx.inegi_columns.ECO7', - u'mx.inegi_columns.INDI19', - u'mx.inegi_columns.INDI16', - u'mx.inegi_columns.POB65', - u'mx.inegi_columns.INDI3', - u'mx.inegi_columns.INDI9', - u'mx.inegi_columns.POB36', - u'mx.inegi_columns.POB33', - u'mx.inegi_columns.POB58', - u'mx.inegi_columns.DISC4', - u'mx.inegi_columns.VIV41', - u'mx.inegi_columns.VIV40', - u'mx.inegi_columns.VIV17', - u'mx.inegi_columns.VIV25', - u'mx.inegi_columns.EDU10', - u'whosonfirst.wof_disputed_name', - u'us.census.tiger.fullname', - u'whosonfirst.wof_marinearea_name', - u'us.census.tiger.mtfcc', - u'whosonfirst.wof_county_name', - u'whosonfirst.wof_region_name', - u'fr.insee.P12_RP_CHOS', - u'fr.insee.P12_RP_HABFOR', - u'fr.insee.P12_RP_EAUCH', - u'fr.insee.P12_RP_BDWC', - u'fr.insee.P12_RP_MIDUR', - u'fr.insee.P12_RP_CLIM', - u'fr.insee.P12_RP_MIBOIS', - u'fr.insee.P12_RP_CASE', - u'fr.insee.P12_RP_TTEGOU', - u'fr.insee.P12_RP_ELEC', - u'fr.insee.P12_ACTOCC15P_ILT45D', - u'fr.insee.P12_RP_CHOS', - u'fr.insee.P12_RP_HABFOR', - u'fr.insee.P12_RP_EAUCH', - u'fr.insee.P12_RP_BDWC', - u'fr.insee.P12_RP_MIDUR', - u'fr.insee.P12_RP_CLIM', - u'fr.insee.P12_RP_MIBOIS', - u'fr.insee.P12_RP_CASE', - u'fr.insee.P12_RP_TTEGOU', - u'fr.insee.P12_RP_ELEC', - u'fr.insee.P12_ACTOCC15P_ILT45D', - u'uk.ons.LC3202WA0007', - u'uk.ons.LC3202WA0010', - u'uk.ons.LC3202WA0004', - u'uk.ons.LC3204WA0004', - u'uk.ons.LC3204WA0007', - u'uk.ons.LC3204WA0010', - u'br.geo.subdistritos_name' + 'mx.inegi_columns.INDI18', + 'mx.inegi_columns.ECO40', + 'mx.inegi_columns.POB34', + 'mx.inegi_columns.POB63', + 'mx.inegi_columns.INDI7', + 'mx.inegi_columns.EDU28', + 'mx.inegi_columns.SCONY10', + 'mx.inegi_columns.EDU31', + 'mx.inegi_columns.POB7', + 'mx.inegi_columns.VIV30', + 'mx.inegi_columns.INDI12', + 'mx.inegi_columns.EDU13', + 'mx.inegi_columns.ECO43', + 'mx.inegi_columns.VIV9', + 'mx.inegi_columns.HOGAR25', + 'mx.inegi_columns.POB32', + 'mx.inegi_columns.ECO7', + 'mx.inegi_columns.INDI19', + 'mx.inegi_columns.INDI16', + 'mx.inegi_columns.POB65', + 'mx.inegi_columns.INDI3', + 'mx.inegi_columns.INDI9', + 'mx.inegi_columns.POB36', + 'mx.inegi_columns.POB33', + 'mx.inegi_columns.POB58', + 'mx.inegi_columns.DISC4', + 'mx.inegi_columns.VIV41', + 'mx.inegi_columns.VIV40', + 'mx.inegi_columns.VIV17', + 'mx.inegi_columns.VIV25', + 'mx.inegi_columns.EDU10', + 'whosonfirst.wof_disputed_name', + 'us.census.tiger.fullname', + 'whosonfirst.wof_marinearea_name', + 'us.census.tiger.mtfcc', + 'whosonfirst.wof_county_name', + 'whosonfirst.wof_region_name', + 'fr.insee.P12_RP_CHOS', + 'fr.insee.P12_RP_HABFOR', + 'fr.insee.P12_RP_EAUCH', + 'fr.insee.P12_RP_BDWC', + 'fr.insee.P12_RP_MIDUR', + 'fr.insee.P12_RP_CLIM', + 'fr.insee.P12_RP_MIBOIS', + 'fr.insee.P12_RP_CASE', + 'fr.insee.P12_RP_TTEGOU', + 'fr.insee.P12_RP_ELEC', + 'fr.insee.P12_ACTOCC15P_ILT45D', + 'fr.insee.P12_RP_CHOS', + 'fr.insee.P12_RP_HABFOR', + 'fr.insee.P12_RP_EAUCH', + 'fr.insee.P12_RP_BDWC', + 'fr.insee.P12_RP_MIDUR', + 'fr.insee.P12_RP_CLIM', + 'fr.insee.P12_RP_MIBOIS', + 'fr.insee.P12_RP_CASE', + 'fr.insee.P12_RP_TTEGOU', + 'fr.insee.P12_RP_ELEC', + 'fr.insee.P12_ACTOCC15P_ILT45D', + 'uk.ons.LC3202WA0007', + 'uk.ons.LC3202WA0010', + 'uk.ons.LC3202WA0004', + 'uk.ons.LC3204WA0004', + 'uk.ons.LC3204WA0007', + 'uk.ons.LC3204WA0010', + 'br.geo.subdistritos_name' ]) MEASURE_COLUMNS = query(''' @@ -192,7 +192,7 @@ def grouped_measure_columns(filtered_columns): else: groupbypoint[point] = [numer_ids] - for point, numer_ids in groupbypoint.iteritems(): + for point, numer_ids in groupbypoint.items(): for colgroup in grouper(numer_ids, 50): yield point, [c for c in colgroup if c] @@ -215,7 +215,7 @@ def _test_measures(numer_ids, geom): 'normalization': 'predenominated' }) - params = query(u''' + params = query(''' SELECT {schema}OBS_GetMeta({geom}, '{in_params}') '''.format(schema='cdb_observatory.' if USE_SCHEMA else '', geom=geom, @@ -223,15 +223,15 @@ def _test_measures(numer_ids, geom): # We can get duplicate IDs from multi-denominators, so for now we # compress those measures into a single - params = OrderedDict([(p['id'], p) for p in params]).values() + params = list(OrderedDict([(p['id'], p) for p in params]).values()) assert_equal(len(params), len(in_params), 'Inconsistent out and in params for {}'.format(in_params)) - q = u''' + q = ''' SELECT * FROM {schema}OBS_GetData(ARRAY[({geom}, 1)::geomval], '{params}') '''.format(schema='cdb_observatory.' if USE_SCHEMA else '', geom=geom, - params=json.dumps(params).replace(u"'", "''")) + params=json.dumps(params).replace("'", "''")) resp = query(q).fetchone() assert_is_not_none(resp, 'NULL returned for {}'.format(in_params)) rawvals = resp[1]