a4a:Convenções/Dados/SQL: mudanças entre as edições
(Criou página com 'Tabelas de todos os projetos') |
m (→Ver também) |
||
(11 revisões intermediárias pelo mesmo usuário não estão sendo mostradas) | |||
Linha 1: | Linha 1: | ||
Tabelas de todos os | Tabelas da base corrente <code>DL05s_main</code>. Documentação gerada por recursos do "psql" e [https://tableconvert.com/markdown-to-mediawiki tableconvert.com/markdown-to-mediawiki] (ou [https://www.vertopal.com/en/convert/markdown-to-wiki-mediawiki vertopal.com/en/convert/markdown-to-wiki-mediawiki]). | ||
Ver também [[Usuário:Peter/Modelos endereço]] | |||
== Table <code>optim.jurisdiction</code> == | |||
Jurisdições nacionais e locais, sem geometrias (ver tabela complementar _geom). | |||
{| class="wikitable" | |||
! Column !! Type !! Nullable !! Description | |||
|- | |||
| osm_id || bigint || not null || Relation identifier in OpenStreetMap. | |||
|- | |||
| jurisd_base_id || integer || not null || ISO3166-1-numeric COUNTRY ID (e.g. Brazil is 76) or negative for non-iso (ex. oceans). | |||
|- | |||
| jurisd_local_id || integer || not null || Numeric official ID like IBGE_ID of BR jurisdiction. For example ACRE is 12 and its cities are {1200013, 1200054,etc}. | |||
|- | |||
| parent_id || bigint || || osm_id of top admin_level. | |||
|- | |||
| admin_level || smallint || not null || OSM convention for admin_level tag in country. | |||
|- | |||
| name || text || not null || Name of jurisdiction | |||
|- | |||
| parent_abbrev || text || not null || Abbreviation of parent name. | |||
|- | |||
| abbrev || text || || Name abbreviation. | |||
|- | |||
| wikidata_id || bigint || || wikidata identifier without Q prefix. | |||
|- | |||
| lexlabel || text || not null || Cache from name; e.g. sao.paulo. | |||
|- | |||
| isolabel_ext || text || not null || Cache from parent_abbrev (ISO) and name (camel case); e.g. BR-SP-SaoPaulo. | |||
|- | |||
| ddd || integer || || Direct distance dialing. | |||
|- | |||
| housenumber_system_type || text || || Housenumber system. | |||
|- | |||
| lex_urn || text || || Housenumber system law. | |||
|- | |||
| info || jsonb || || Others information. | |||
|- | |||
| name_en || text || || City name in english. | |||
|- | |||
| isolevel || integer || || 1=country, 2=state, 3=mun | |||
|- | |||
| ne_country_id || integer || || NaturalEarthData country gid. | |||
|- | |||
| int_country_id || integer || || One byte identifier, adopted for internal use. Shorter than ISO and NaturalEarthData. | |||
|} | |||
<small><pre> | |||
Indexes: | |||
"jurisdiction_pkey" PRIMARY KEY, btree (osm_id) | |||
"jurisdiction_isolabel_ext_idx1" btree (isolabel_ext) | |||
"jurisdiction_isolabel_ext_key" UNIQUE CONSTRAINT, btree (isolabel_ext) | |||
"jurisdiction_jurisd_base_id_jurisd_local_id_key" UNIQUE CONSTRAINT, btree (jurisd_base_id, jurisd_local_id) | |||
"jurisdiction_jurisd_base_id_parent_abbrev_abbrev_key" UNIQUE CONSTRAINT, btree (jurisd_base_id, parent_abbrev, abbrev) | |||
"jurisdiction_jurisd_base_id_parent_abbrev_lexlabel_key" UNIQUE CONSTRAINT, btree (jurisd_base_id, parent_abbrev, lexlabel) | |||
"jurisdiction_jurisd_base_id_parent_abbrev_name_key" UNIQUE CONSTRAINT, btree (jurisd_base_id, parent_abbrev, name) | |||
"jurisdiction_wikidata_id_key" UNIQUE CONSTRAINT, btree (wikidata_id) | |||
Check constraints: | |||
"jurisdiction_admin_level_check" CHECK (admin_level > 0 AND admin_level < 100) | |||
"jurisdiction_name_check" CHECK (length(name) < 60) | |||
Foreign-key constraints: | |||
"jurisdiction_parent_id_fkey" FOREIGN KEY (parent_id) REFERENCES optim.jurisdiction(osm_id) | |||
Referenced by: | |||
TABLE "optim.donor" CONSTRAINT "donor_scope_osm_id_fkey" FOREIGN KEY (scope_osm_id) REFERENCES optim.jurisdiction(osm_id) | |||
TABLE "optim.jurisdiction" CONSTRAINT "jurisdiction_parent_id_fkey" FOREIGN KEY (parent_id) REFERENCES optim.jurisdiction(osm_id) | |||
Access method: heap | |||
</pre></small> | |||
=== Queries === | |||
Queries que geram reports e sugestão de análise das views próprias. | |||
<syntaxhighlight lang="sql"> | |||
-- R0a | |||
select '* '||vw_name from optim.admin_views_dependency_summary | |||
where ref_tab_name='optim.jurisdiction' order by 1 | |||
; | |||
-- R0b | |||
select schema_name, name, arguments_simplified , return_type | |||
from doc_UDF_using_a_table('optim.jurisdiction') t order by 1,2,3 | |||
; | |||
-- R1a. Intervalos de CEP das cidades do Brasil | |||
create view optim.vw_jurisdiction76_postal_ranges AS | |||
select isolabel_ext, jurisd_local_id, osm_id, admin_level, name, | |||
('{'|| translate(info->>'postalCode_ranges', '[] ','{},') ||'}')::text[] as postal_ranges, | |||
('{'|| translate(regexp_replace(info->>'postalCode_ranges','\-\d+','','g'), '[] ','{},') ||'}')::int[] as cep5, | |||
(info->'area_km2')::int area_km2 | |||
from optim.jurisdiction | |||
where jurisd_base_id=76 | |||
order by 1 | |||
; | |||
-- R1b | |||
select * from optim.vw_jurisdiction76_postal_ranges where cardinality(postal_ranges)>2; | |||
-- apenas BR-SP-SaoPaulo, BR-DF-Brasilia e BR-RJ-NovaIguacu | |||
-- R1c | |||
select *, round(area_km2::float/cep5s::float,1) as area_per_cep | |||
from ( | |||
select isolabel_ext, jurisd_local_id, osm_id, admin_level, name, cardinality(postal_ranges) as num_ceps, cep5, | |||
1 + (cep5[1][2] - cep5[1][1]) + case when cardinality(cep5)>2 then (cep5[2][2] - cep5[2][1]) else 0 end as cep5s, | |||
area_km2 | |||
from optim.vw_jurisdiction76_postal_ranges | |||
) t order by 10 desc, 1 | |||
; -- MEDIANA 71 km2, select PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY area_per_cep) FROM (...) t2. | |||
-- R2 | |||
SELECT substring(isolabel_ext,1,2) as pais, isolevel, lengtbit_MSB(jurisd_local_id) as bits, | |||
count(*) n | |||
FROM optim.jurisdiction where jurisd_local_id>0 | |||
GROUP by 1,2,3 order by 1, 4 desc | |||
; -- Chile (CL) cabe em 27 bits, Brasil (BR) não cabe. Poucos casos valem adaptar exceções. | |||
-- R3 | |||
SELECT '['||isolabel_ext||'](https://afa.codes/'||isolabel_ext||')' as AFAcode_map, | |||
name, | |||
jurisd_local_id as local_id, | |||
'['||osm_id||'](https://osm.org/relation/'||osm_id::text||')' as osm_id, | |||
'['||wikidata_id::text||'](https://wikidata.org/entity/Q'||wikidata_id::text||')' as wikidata_id | |||
FROM optim.jurisdiction | |||
ORDER BY jurisd_local_id, isolabel_ext; | |||
</syntaxhighlight> | |||
=== Reports === | |||
'''R0a'''. Views que fazem uso da tabela: | |||
* api.jurisdiction: reproduz a tabela na API. | |||
* api.stats_donated_packcomponent: fornece estatísticas de pacotes doados, organizada por jurisdição. | |||
* optim.jurisdiction_lexlabel: reformata isolabel_ext como URN LEX. | |||
* optim.vw01donorevidencecmd: | |||
* optim.vw01full_donated_packtpl: pacotes doados em join completo. | |||
* optim.vw01full_jurisdiction_geom: jurisdição completa, com geometria. | |||
* optim.vw01int_country_id: (lixo) lista o ID interno de país. | |||
* optim.vw02generate_list: gera JSON com árvore de países, com optim.vw01generate_list. | |||
* optim.vw02publication: metadados para ato de publicação de um pacote. | |||
* optim.vwjurisdiction_synonym: lista de-para de todos os sinônimos válidos de uma jurisdição (CEP, abreviação, URN LEX etc.). | |||
* osmc.jurisdictions_select: JSON para interface de seletor de jurisdição. | |||
* public.qwerty: (resgatar para optim) gerador de comando bash "curl" para criar CSVs de todas as jurisdições. | |||
'''R0a-packs''': select isolabel_ext, legalname as donor_name,shortname, housenumber_system_type as hn_sys, pack_number,license_data->'family' as lic_fam, jsonb_object_keys_asarray(make_conf_tpl->'layers') as layers from optim.vw01full_donated_packtpl where shortname!='OSM' order by packtpl_id | |||
'''R0b'''. Funções que fazem uso da tabela: | |||
{| class="wikitable" | |||
! schema_name !! name !! arguments_simplified !! return_type | |||
|- | |||
| api || jurisdiction_autocomplete || {text} || jsonb | |||
|- | |||
| api || osmcode_decode_postal || {text,text} || jsonb | |||
|- | |||
| api || osmcode_encode || {text,integer} || jsonb | |||
|- | |||
| api || osmcode_encode_sci || {text,integer} || jsonb | |||
|- | |||
| optim || generate_synonym_csv || {text,text} || text | |||
|- | |||
| optim || generate_synonym_ref_csv || {text} || text | |||
|- | |||
| optim || insert_donor_pack || {text} || text | |||
|- | |||
| optim || insert_jurisdpoint || {NULL} || text | |||
|- | |||
| osmc || encode_short_code || {text,"bit varying",text,USER-DEFINED,text,integer,text} || record | |||
|- | |||
| osmc || l0cover_upsert_co || {NULL} || text | |||
|} | |||
'''R1c'''. Avalia área por cep, para inferir quantos dígitos mais o CEP5 precisaria para chegar a 1m2 com AFAcode | |||
{| class="wikitable" | |||
! isolabel_ext !! ibge_id !! name !! cep5 !! cep5s !! area_km2 !! area_per_cep | |||
|- | |||
| BR-PA-SaoFelixXingu || 1507300 || São Félix do Xingu || {68380,68382} || 3 || 84193 || 28064.3 | |||
|- | |||
| BR-PA-NovoProgresso || 1505031 || Novo Progresso || {68193,68194} || 2 || 38166 || 19083.0 | |||
|- | |||
| BR-PA-Jacareacanga || 1503754 || Jacareacanga || {68195,68197} || 3 || 53312 || 17770.7 | |||
|- | |||
| BR-AM-Tapaua || 1304104 || Tapauá || {69480,69484} || 5 || 85499 || 17099.8 | |||
|- | |||
| BR-RR-Rorainopolis || 1400472 || Rorainópolis || {69373,69374} || 2 || 33548 || 16774.0 | |||
|- | |||
| BR-PA-Altamira || 1500602 || Altamira || {68370,68379} || 10 || 159525 || 15952.5 | |||
|- | |||
| BR-RR-Amajari || 1400027 || Amajari || {69343,69344} || 2 || 28401 || 14200.5 | |||
|- | |||
| BR-AM-Japura || 1302108 || Japurá || {69495,69499} || 5 || 55776 || 11155.2 | |||
|- | |||
| BR-PA-Oriximina || 1505304 || Oriximiná || {68270,68279} || 10 || 107680 || 10768.0 | |||
|- | |||
| BR-MT-Colniza || 5103254 || Colniza || {78335,78337} || 3 || 27929 || 9309.7 | |||
|- | |||
| BR-MT-Querencia || 5107065 || Querência || {78643,78644} || 2 || 17808 || 8904.0 | |||
|- | |||
| ... || || || || || || | |||
|- | |||
| BR-PR-Anahy || 4101051 || Anahy || {85425,85429} || 5 || 103 || 20.6 | |||
|- | |||
| BR-PR-ParaisoNorte || 4118006 || Paraíso do Norte || {87780,87789} || 10 || 206 || 20.6 | |||
|- | |||
| BR-RS-NovaBrescia || 4313003 || Nova Bréscia || {95950,95954} || 5 || 103 || 20.6 | |||
|- | |||
| BR-RS-NovaPadua || 4313086 || Nova Pádua || {95275,95279} || 5 || 103 || 20.6 | |||
|- | |||
| BR-SP-Moncoes || 3531001 || Monções || {15275,15279} || 5 || 103 || 20.6 | |||
|- | |||
| ... || || || || || || | |||
|- | |||
| BR-PA-Marituba || 1504422 || Marituba || {67200,67999} || 800 || 102 || 0.13 | |||
|- | |||
| BR-PE-Olinda || 2609600 || Olinda || {53000,53399} || 400 || 41 || 0.10 | |||
|- | |||
| BR-PE-Recife || 2611606 || Recife || {50000,52999} || 3000 || 219 || 0.07 | |||
|} | |||
'''R2'''. Número de bits necessários para representar jurisd_local_id. Critério para avaliar se cria artificial ou reusa no novo id. Amostrando apenas Brasil (IBGE). Reparar que somando 8 bits do país não caberia em 27. No caso do Chile caberia (15+8<27). Em função desta análise decidimos criar o ID com ROW | |||
{| class="wikitable" | |||
! país !! isolevel !! bits !! n_count | |||
|- | |||
| BR || 3 || 22 || 3861 | |||
|- | |||
| BR || 3 || 23 || 1259 | |||
|- | |||
| BR || 3 || 21 || 450 | |||
|- | |||
| BR || 2 || 5 || 12 | |||
|- | |||
| BR || 2 || 6 || 10 | |||
|- | |||
| BR || 2 || 4 || 5 | |||
|- | |||
| BR || 1 || 0 || 1 | |||
|- | |||
| ... | |||
|- | |||
| CL || 3 || 15 || 164 | |||
|- | |||
| CL || 3 || 14 || 82 | |||
|- | |||
| CL || 3 || 13 || 41 | |||
|- | |||
| ... | |||
|- | |||
| CL || 2 || 1 || 1 | |||
|- | |||
| CL || 1 || 0 || 1 | |||
|} | |||
<span id="optim.jurisdiction.R3"></span>'''R3'''. Listagem simples Markdown para relatórios internos e conferir se IDs batem com links. | |||
{| class="wikitable" | |||
!width="38%"| afacode_map | |||
!width="14%"| name | |||
!width="4%"| local_id | |||
!width="19%"| osm_id | |||
!width="22%"| wikidata_id | |||
|- | |||
| [https://afa.codes/BR BR] | |||
| Brasil | |||
| 0 | |||
| [https://osm.org/relation/59470 59470] | |||
| [https://wikidata.org/entity/Q155 155] | |||
|- | |||
| [https://afa.codes/BR-RO BR-RO] | |||
| Rondônia | |||
| 11 | |||
| [https://osm.org/relation/325866 325866] | |||
| [https://wikidata.org/entity/Q43235 43235] | |||
|- | |||
| [https://afa.codes/BR-RO-AltaFlorestaOeste BR-RO-AltaFlorestaOeste] | |||
| Alta Floresta D’Oeste | |||
| 1100015 | |||
| [https://osm.org/relation/325842 325842] | |||
| [https://wikidata.org/entity/Q1761456 1761456] | |||
|- | |||
| [https://afa.codes/BR-RO-Ariquemes BR-RO-Ariquemes] | |||
| Ariquemes | |||
| 1100023 | |||
| [https://osm.org/relation/325888 325888] | |||
| [https://wikidata.org/entity/Q1646975 1646975] | |||
|- | |||
| [https://afa.codes/BR-RO-Cabixi BR-RO-Cabixi] | |||
| Cabixi | |||
| 1100031 | |||
| [https://osm.org/relation/325882 325882] | |||
| [https://wikidata.org/entity/Q940433 940433] | |||
|- | |||
| [https://afa.codes/BR-RO-Cacoal BR-RO-Cacoal] | |||
| Cacoal | |||
| 1100049 | |||
| [https://osm.org/relation/325872 325872] | |||
| [https://wikidata.org/entity/Q1761637 1761637] | |||
|- | |||
|...||...||...||...||... | |||
|- | |||
| [https://afa.codes/BR-AC BR-AC] | |||
| Acre | |||
| 12 | |||
| [https://osm.org/relation/326266 326266] | |||
| [https://wikidata.org/entity/Q40780 40780] | |||
|- | |||
| [https://afa.codes/BR-AC-Acrelandia BR-AC-Acrelandia] | |||
| Acrelândia | |||
| 1200013 | |||
| [https://osm.org/relation/326273 326273] | |||
| [https://wikidata.org/entity/Q953086 953086] | |||
|- | |||
| [https://afa.codes/BR-AC-AssisBrasil BR-AC-AssisBrasil] | |||
| Assis Brasil | |||
| 1200054 | |||
| [https://osm.org/relation/326269 326269] | |||
| [https://wikidata.org/entity/Q1754403 1754403] | |||
|- | |||
| [https://afa.codes/BR-AC-Brasileia BR-AC-Brasileia] | |||
| Brasiléia | |||
| 1200104 | |||
| [https://osm.org/relation/326270 326270] | |||
| [https://wikidata.org/entity/Q899406 899406] | |||
|- | |||
| [https://afa.codes/BR-AC-Bujari BR-AC-Bujari] | |||
| Bujari | |||
| 1200138 | |||
| [https://osm.org/relation/326259 326259] | |||
| [https://wikidata.org/entity/Q1754408 1754408] | |||
|- | |||
| [https://afa.codes/BR-AC-Capixaba BR-AC-Capixaba] | |||
| Capixaba | |||
| 1200179 | |||
| [https://osm.org/relation/326268 326268] | |||
| [https://wikidata.org/entity/Q1800737 1800737] | |||
|- | |||
| [https://afa.codes/BR-AC-CruzeiroSul BR-AC-CruzeiroSul] | |||
| Cruzeiro do Sul | |||
| 1200203 | |||
| [https://osm.org/relation/326274 326274] | |||
| [https://wikidata.org/entity/Q941136 941136] | |||
|} | |||
== Table <code>optim.jurisdiction_geom</code> == | |||
Geometrias de [[#optim.jurisdiction]]. Geometrias das jurisdições nacionais e locais. | |||
{| class="wikitable" | |||
! Column !! Type !! Nullable !! Description | |||
|- | |||
| osm_id || bigint || not null || Relation identifier in OpenStreetMap. | |||
|- | |||
| isolabel_ext || text || not null || ISO 3166-1 alpha-2 code and name (camel case); e.g. BR-SP-SaoPaulo. | |||
|- | |||
| geom || geometry(Geometry,4326) || || Geometry for osm_id identifier | |||
|- | |||
| geom_svg || geometry(Geometry,4326) || || Simplified geometry version to use in svg interface. | |||
|- | |||
| kx_ghs1_intersects || text[] || || | |||
|- | |||
| kx_ghs2_intersects || text[] || || | |||
|} | |||
<small><pre> | |||
Indexes: | |||
"jurisdiction_geom_pkey" PRIMARY KEY, btree (osm_id) | |||
"jurisdiction_geom_isolabel_ext_key" UNIQUE CONSTRAINT, btree (isolabel_ext) | |||
"optim_jurisdiction_geom_idx1" gist (geom) | |||
"optim_jurisdiction_geom_isolabel_ext_idx1" btree (isolabel_ext) | |||
"optim_jurisdiction_geom_svg_idx1" gist (geom_svg) | |||
Access method: heap | |||
</pre></small> | |||
== Table <code>osmc.coverage</code> == | |||
Coberturas nacionais e de suas jurisdições. As geometrias nacionais são expressas em LatLong, as locais conforme sua projeção oficial. | |||
Ingestão: arquivos "coverage.csv" do git. Função ou make para construção?? <br/> Exemplos: [https://git.osm.codes/BR_new/blob/main/data/coverage.csv BR_new/data/coverage.csv], [https://git.osm.codes/CO_new/blob/main/data/coverage.csv CO_new/data/coverage.csv]. As colunas "status" e "cover" são copiadas em "status" e "kx_prefix". | |||
{| class="wikitable" | |||
! Column !! Type !! Default !! Description | |||
|- | |||
| cbits || bit varying || || Coverage cell (hierarchical) identifier. | |||
|- | |||
| isolabel_ext || text || || Jurisdiction identifier. ISO 3166-2 code extended for subjurisdiction name (camel case); e.g. BR-SP-SaoPaulo. | |||
|- | |||
| cindex || text || || First digit of the coverage cell, a prefix in base 32nvu. Used only case is_country=false. | |||
|- | |||
| bbox || integer[] || || Coverage cell BBOX. | |||
|- | |||
| status || smallint || 0 || Coverage status. Convention: 0: generated, 1: revised, 2: homologated. | |||
|- | |||
| is_country || boolean || false || True if it is a cell of national coverage. | |||
|- | |||
| is_contained || boolean || false || True if it is a cell contained in the jurisdiction. Must validate. | |||
|- | |||
| is_overlay || boolean || false || True if it is an overlay cell. | |||
|- | |||
| kx_prefix || text || || Coverage cell prefix in base 32nvu. | |||
|- | |||
| geom || geometry || || Coverage cell geometry on country's projection (local SRID). | |||
|- | |||
| geom_srid4326 || geometry || || Coverage cell geometry on WGS84 LatLong (4326 srid). Used only case is_country=true. | |||
|} | |||
<small><pre> | |||
Indexes: | |||
"osm_coverage_cbits10true_idx" btree ((cbits::bit(10))) WHERE is_country IS TRUE | |||
"osm_coverage_cbits15false_idx" btree ((cbits::bit(14)), isolabel_ext) WHERE is_country IS FALSE | |||
"osm_coverage_geom4326_idx1" gist (geom_srid4326) | |||
"osm_coverage_geom_idx1" gist (geom) | |||
"osm_coverage_isolabel_ext_false_idx" btree (isolabel_ext) WHERE is_country IS FALSE | |||
"osm_coverage_isolabel_ext_idx1" btree (isolabel_ext) | |||
"osm_coverage_isolabel_ext_true_idx" btree (isolabel_ext) WHERE is_country IS TRUE | |||
Check constraints: | |||
"coverage_status_check" CHECK (status = ANY (ARRAY[0, 1, 2])) | |||
</pre></small> | |||
=== Queries === | |||
Queries que geram reports e sugestão de análise das views próprias. | |||
<syntaxhighlight lang="sql"> | |||
-- R0a | |||
select * from optim.admin_views_dependency_summary where ref_tab_name='osmc.coverage' | |||
; | |||
-- R0b | |||
select schema_name, name, arguments_simplified , return_type | |||
from doc_UDF_using_a_table('osmc.cover') t order by 1,2,3 | |||
; | |||
-- R1 | |||
select distinct | |||
substring(isolabel_ext,1,2) as country, | |||
natcod.vbit_to_baseh(substring(cbits,1,8),16,true) as cbits_b16h, | |||
is_country, status, | |||
count(*) n, count(distinct isolabel_ext) n_jurisds | |||
from osmc.coverage | |||
group by 1,2,3,4 | |||
order by 1,2,3 desc,4; | |||
-- R2a | |||
select substring(isolabel_ext,1,2) as country, is_country, is_overlay, | |||
count(*) n, count(distinct isolabel_ext) n_jurisds, count(distinct cbits) n_cells, | |||
min(l) l_min, max(l) l_max, round(avg(l),2) l_avg, | |||
PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY l) l_mdn | |||
from (select *, round(length(cbits)/4)::int as l from osmc.coverage) t | |||
group by 1,2,3 order by 1,2 desc,3; | |||
-- R2b | |||
select substring(isolabel_ext,1,2) as country, is_country, is_overlay, | |||
count(*) n, count(distinct isolabel_ext) n_jurisds, | |||
count(distinct kx_prefix) n_cells, count(distinct kx_prefix||'.'||cindex::text) n_cells_idx, | |||
count(distinct kx_prefix||'.'||isolabel_ext) n_cells_jur, | |||
min(l) l_min, max(l) l_max, round(avg(l),2) l_avg, | |||
PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY l) l_mdn | |||
from (select *, length(kx_prefix) as l from osmc.coverage) t | |||
group by 1,2,3 order by 1,2 desc,3; | |||
-- R3 | |||
select isolabel_ext, is_overlay, | |||
array_agg( natcod.vbit_to_baseh(substring(cbits,11),16,true) order by cbits) cover | |||
from osmc.coverage | |||
where isolabel_ext like 'CO-ARA-%' | |||
group by 1,2 order by 1,2; | |||
-- R4. Testar contra qwerty2, que tem comjunto consistente. Aqui apenas CA e CO apresentam parte consistente. | |||
select substr(isolabel_ext,1,2) as iso, len, round(avg(a),1) area_avg, max(a) area_max, count(*) n | |||
from ( | |||
select isolabel_ext, length(cbits) len, round(st_area(geom)/10000.0) a, is_contained, is_overlay | |||
from osmc.coverage where is_contained order by 2 desc,1 | |||
) t group by 1,2 order by 1,2 | |||
; | |||
</syntaxhighlight> | |||
=== Reports === | |||
'''R0a'''. Views que fazem uso desta tabela: | |||
* osmc.jurisdictions_select | |||
* osmc.tmpvw10: temporária | |||
* osmc.tmpvwpoeira: indica "sobras" dos polígonos que não foram aceitas como cobertura válida, por serem menores que 100 metros quadrados. | |||
'''R0b'''. Funções que fazem uso desta tabela. (removidas as variantes "EC" e "UY") | |||
{| class="wikitable" | |||
! schema_name !! name !! arguments_simplified !! return_type | |||
|- | |||
| api || jurisdiction_coverage || {text,integer} || jsonb | |||
|- | |||
| api || jurisdiction_geojson_from_isolabel || {text} || jsonb | |||
|- | |||
| api || osmcode_decode_postal || {text,text} || jsonb | |||
|- | |||
| api || osmcode_decode_postal_absolute || {text,text} || jsonb | |||
|- | |||
| api || osmcode_decode_scientific_absolute || {text,text,integer} || jsonb | |||
|- | |||
| api || osmcode_encode || {text,integer} || jsonb | |||
|- | |||
| osmc || afa_log_to_hbig || {text,text} || int8 | |||
|- | |||
| osmc || cellcontainsproperly || {text,text,integer,text,integer} || text | |||
|- | |||
| osmc || check_coverage || {text,ARRAY,text,ARRAY,ARRAY,ARRAY,ARRAY,boolean,ARRAY} || record | |||
|- | |||
| osmc || cover_child_geometries || {text,text,integer,text,text,USER-DEFINED} || record | |||
|- | |||
| osmc || encode_point_brazil || {USER-DEFINED} || text | |||
|- | |||
| osmc || encode_point_colombia || {USER-DEFINED} || text | |||
|- | |||
| osmc || encode_postal_br || {USER-DEFINED,"double precision",integer,text} || jsonb | |||
|- | |||
| osmc || encode_postal_cm || {USER-DEFINED,"double precision",integer,text} || jsonb | |||
|- | |||
| osmc || encode_postal_co || {USER-DEFINED,"double precision",integer,text} || jsonb | |||
|- | |||
| osmc || encode_scientific_br || {USER-DEFINED,"double precision",integer} || jsonb | |||
|- | |||
| osmc || encode_scientific_cm || {USER-DEFINED,"double precision",integer} || jsonb | |||
|- | |||
| osmc || encode_scientific_co || {USER-DEFINED,"double precision",integer} || jsonb | |||
|- | |||
| osmc || encode_short_code || {text,"bit varying",text,USER-DEFINED,text,integer,text} || record | |||
|- | |||
| osmc || generate_cover_csv || {text,text} || text | |||
|- | |||
| osmc || l0_upsert || {text,smallint,integer,integer,integer,integer,integer,ARRAY,ARRAY} || text | |||
|- | |||
| osmc || l0cover_upsert_br || {NULL} || text | |||
|- | |||
| osmc || l0cover_upsert_co || {NULL} || text | |||
|- | |||
| osmc || update_coverage_isolevel3 || {text,smallint,ARRAY,ARRAY} || text | |||
|} | |||
'''R1'''. Sumário do conteúdo corrente (final de 2023). Diferencia coberturas nacionais, expressa contagem de células e contagem de jurisdições. | |||
{| class="wikitable" | |||
! country !! cbits_b16h !! is_country !! status !! n !! n_jurisds | |||
|- | |||
| BR || 13 || t || 1 || 18 || 1 | |||
|- | |||
| BR || 13 || f || 0 || 72726 || 5570 | |||
|- | |||
| CM || 1e || t || 1 || 14 || 1 | |||
|- | |||
| CM || 1e || f || 0 || 2738 || 360 | |||
|- | |||
| CO || 2a || t || 1 || 16 || 1 | |||
|- | |||
| CO || 2a || f || 0 || 15244 || 1115 | |||
|- | |||
| CO || 2a || f || 2 || 22 || 1 | |||
|- | |||
| EC || 36 || t || 1 || 22 || 1 | |||
|- | |||
| UY || d6 || t || 1 || 19 || 1 | |||
|- | |||
| UY || d6 || f || 0 || 1 || 1 | |||
|} | |||
'''R2a'''. Tamanho mais frequente (mediana "mdn") da cobertura. Comprimento ''l'' dado em quantidade de dígitos hexadecimais. <br/>PS: ignorando EC e UY, que não estão configurados. | |||
{| class="wikitable" | |||
! country !! is_country !! is_overlay !! n !! n_jurisds !! l_min !! l_max !! l_avg !! l_mdn | |||
|- | |||
| BR || t || f || 18 || 1 || 4 || 4 || 4.00 || 4 | |||
|- | |||
| BR || f || f || 72565 || 5570 || 5 || 9 || 7.77 || 8 | |||
|- | |||
| BR || f || t || 161 || 11 || 7 || 9 || 8.11 || 8 | |||
|- | |||
| CM || t || f || 14 || 1 || 3 || 3 || 3.00 || 3 | |||
|- | |||
| CM || f || f || 2730 || 360 || 3 || 7 || 5.48 || 5 | |||
|- | |||
| CM || f || t || 8 || 1 || 7 || 7 || 7.00 || 7 | |||
|- | |||
| CO || t || f || 16 || 1 || 3 || 3 || 3.00 || 3 | |||
|- | |||
| CO || f || f || 14165 || 1116 || 4 || 8 || 5.82 || 6 | |||
|- | |||
| CO || f || t || 1101 || 1091 || 6 || 9 || 7.39 || 8 | |||
|} | |||
'''R2b'''. Mesmo que R2a porém usando "kx_prefix" direto. Linhas is_country removidas pois não usam "kx_prefix". | |||
{| class="wikitable" | |||
! country !! is_ctry !! is_ovly !! n !! n_jurisds !! n_cells !! n_cel_idx !! n_cel_jur !! l_min !! l_max !! l_avg !! l_mdn | |||
|- | |||
| BR || f || f || 72565 || 5570 || 40933 || 69557 || 72565 || 2 || 5 || 3.77 || 4 | |||
|- | |||
| BR || f || t || 161 || 11 || 161 || 161 || 161 || 3 || 5 || 4.11 || 4 | |||
|- | |||
| CM || f || f || 2730 || 360 || 1411 || 2302 || 2626 || 2 || 5 || 3.50 || 3 | |||
|- | |||
| CM || f || t || 8 || 1 || 4 || 4 || 4 || 5 || 5 || 5.00 || 5 | |||
|- | |||
| CO || f || f || 14165 || 1116 || 7552 || 13530 || 14165 || 2 || 5 || 3.82 || 4 | |||
|- | |||
| CO || f || t || 1101 || 1091 || 1100 || 1100 || 1101 || 4 || 6 || 4.70 || 5 | |||
|} | |||
'''R3'''. Para reproduzir a planilha CSV precisamos agrupar cbits e converte-los em base16h. Exemplo, municípios de "CO-ARA". As coberturas-overlay ainda não aparecem no git, e aqui apenas a célula de um ponto é utilizada. | |||
{| class="wikitable" | |||
! isolabel_ext !! is_overlay !! n !! cover | |||
|- | |||
| CO-ARA-Arauca || f || 14 || {cd4,cd5,cd6,cd7,e2a,e2e,e2f,e80,e81,e82,e83,e84,e85,e86} | |||
|- | |||
| CO-ARA-Arauca || t || 1 || {cd7fG} | |||
|- | |||
| CO-ARA-Arauquita || f || 9 || {c7e,c7f,cd0,cd1,cd2,cd3,cd4,cd5,cd6} | |||
|- | |||
| CO-ARA-Arauquita || t || 1 || {cd3cQ} | |||
|- | |||
| CO-ARA-CravoNorte || f || 13 || {e28,e2a,e2b,e2c,e2d,e2e,e2f,e38,e3a,e80,e81,e84,e85} | |||
|- | |||
| CO-ARA-CravoNorte || t || 1 || {e2b5Q} | |||
|- | |||
| CO-ARA-Fortul || f || 5 || {c6f,cc5,cd0,cd1,cd2} | |||
|- | |||
| CO-ARA-Fortul || t || 1 || {cd0bG} | |||
|- | |||
| CO-ARA-PuertoRondon || f || 9 || {c7d,c7e,c7f,cd1,cd4,cd5,e28,e2a,e80} | |||
|- | |||
| CO-ARA-PuertoRondon || t || 1 || {c7e4Q} | |||
|- | |||
| CO-ARA-Saravena || f || 4 || {cc5,cc7,cd0,cd2} | |||
|- | |||
| CO-ARA-Saravena || t || 1 || {cd22Q} | |||
|- | |||
| CO-ARA-Tame || f || 13 || {c6c,c6d,c6e,c6f,c78,c79,c7a,c7b,c7e,cc5,cd0,cd1,cd4} | |||
|- | |||
| CO-ARA-Tame || t || 1 || {c7acQ} | |||
|} | |||
== Outros esquemas e tabelas == | |||
=== download.redirects === | |||
{| class="wikitable" | |||
! Column !! Type !! Nullable !! Description | |||
|- | |||
| donor_id || int || || Donor identifier in optim.donor | |||
|- | |||
| filename_original || text || || (not used) | |||
|- | |||
| package_path || text || || (not used) | |||
|- | |||
| hashedfname || text || not null || The filenaname, based on SHA256.extension | |||
|- | |||
| hashedfnameuri || text || || The real URL for file download of the file with the expected SHA256. | |||
|} | |||
<small><pre> | |||
Indexes: | |||
"redirects_pkey" PRIMARY KEY, btree (hashedfname) | |||
-- correto seria unique substring(hashedfname,1,64) para excluir a extension da unicidade | |||
"redirects_hashedfname_hashedfnameuri_key" UNIQUE CONSTRAINT, btree (hashedfname, hashedfnameuri) | |||
Check constraints: | |||
"redirects_hashedfname_check" CHECK (hashedfname ~ '^[0-9a-f]{64,64}\.[a-z0-9]+$'::text) | |||
</pre></small> | |||
Tem relação com a API do ''endpoint'' <code>http://DL.digital-guard.org</code>. | |||
onde faz UNION com a tabela <code>optim.donated_packcomponent_cloudcontrol</code>. No servidor, localmente faz <code>curl http://127.0.0.1:3105/redirects</code>. Na prática é chamado por PHP, ver <code>/var/www/dl.digital-guard.org/dl.php</code>. | |||
<syntaxhighlight lang="sql"> | |||
-- R0. Testa como a API fornece | |||
select * from api.redirects where fhash like 'e5b%'; | |||
CREATE VIEW optim.vw_download_redirects AS | |||
select substring(hashedfname,1,7) as short_id, donor_id, hashedfname, hashedfnameuri from download.redirects | |||
; | |||
-- R1. Gera testes bash dos downloads. Demora, já são mais de 700 | |||
COPY ( | |||
SELECT 'url_'||short_id ||'="' || hashedfnameuri || '"' FROM optim.vw_download_redirects | |||
UNION ALL | |||
SELECT format( | |||
'wget --spider "$url_%s" 2>/dev/null && echo "url_%1$s exists" || echo "!url_%1$s does not exist: $url1"', | |||
short_id | |||
) | |||
FROM optim.vw_download_redirects | |||
) to '/tmp/dl_urls_test.sh'; -- sh /tmp/dl_urls_test.sh | |||
</syntaxhighlight> | |||
== Outros lembretes == | |||
<pre> | |||
CREATE VIEW optim.admin_views_dependency AS | |||
-- https://stackoverflow.com/a/69473861/287948 | |||
select | |||
u.view_schema schema_name, | |||
u.view_name, | |||
u.table_schema referenced_table_schema, | |||
u.table_name referenced_table_name, | |||
v.view_definition | |||
from information_schema.view_table_usage u | |||
join information_schema.views v on u.view_schema = v.table_schema | |||
and u.view_name = v.table_name | |||
where u.table_schema not in ('information_schema', 'pg_catalog') | |||
order by u.view_schema, u.view_name | |||
; | |||
CREATE VIEW optim.admin_views_dependency_summary AS | |||
select schema_name||'.'||view_name as vw_name, | |||
referenced_table_schema||'.'||referenced_table_name as ref_tab_name | |||
from optim.admin_views_dependency | |||
; | |||
-- DROP VIEW optim.admin_udf_summary; | |||
CREATE VIEW optim.admin_udf_summary AS | |||
SELECT n.oid, quote_ident(n.nspname) as schema , quote_ident(p.proname) as function, | |||
pg_get_function_result(n.oid) as fres | |||
FROM pg_catalog.pg_proc p | |||
JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace | |||
WHERE n.nspname not like 'pg%' | |||
; | |||
CREATE or replace FUNCTION doc_UDF_using_a_table( | |||
p_table text, | |||
p_schema_name text DEFAULT NULL, | |||
p_name_like text DEFAULT '', | |||
p_name_notlike text DEFAULT '', | |||
p_oid oid DEFAULT NULL | |||
) RETURNS TABLE ( | |||
id text, | |||
oid oid, | |||
schema_name text, | |||
name text, | |||
language text, --new | |||
definition_md5 text, -- version control | |||
arguments_simplified text[], | |||
arguments text, | |||
return_type text, | |||
prokind text, | |||
comment text | |||
) AS $f$ | |||
SELECT doc_UDF_transparent_id(u.schema_name, u.name::text, s.arguments_simplified::text[]) AS id, | |||
u.oid, u.schema_name, u.name::text, | |||
u.language, md5(u.definition), | |||
s.arguments_simplified::text[] as arguments_simplified, | |||
u.arguments::text AS arguments, | |||
u.return_type, u.prokind, u.comment | |||
FROM doc_UDF_show_simplified_signature($2,$3,$4) s | |||
INNER JOIN doc_UDF_show($2,$3,$4,$5) u ON s.oid=u.oid::text | |||
WHERE u.definition ilike '%'|| p_table ||'%' | |||
$f$ LANGUAGE SQL IMMUTABLE; | |||
</pre> | |||
------ | |||
== Ver também == | |||
* Criação ou Reconstrução de qualquer DL (teste ou produção) https://github.com/digital-guard/preserv/wiki/Datalake |
Edição atual tal como às 19h59min de 22 de abril de 2024
Tabelas da base corrente DL05s_main
. Documentação gerada por recursos do "psql" e tableconvert.com/markdown-to-mediawiki (ou vertopal.com/en/convert/markdown-to-wiki-mediawiki).
Ver também Usuário:Peter/Modelos endereço
Table optim.jurisdiction
Jurisdições nacionais e locais, sem geometrias (ver tabela complementar _geom).
Column | Type | Nullable | Description |
---|---|---|---|
osm_id | bigint | not null | Relation identifier in OpenStreetMap. |
jurisd_base_id | integer | not null | ISO3166-1-numeric COUNTRY ID (e.g. Brazil is 76) or negative for non-iso (ex. oceans). |
jurisd_local_id | integer | not null | Numeric official ID like IBGE_ID of BR jurisdiction. For example ACRE is 12 and its cities are {1200013, 1200054,etc}. |
parent_id | bigint | osm_id of top admin_level. | |
admin_level | smallint | not null | OSM convention for admin_level tag in country. |
name | text | not null | Name of jurisdiction |
parent_abbrev | text | not null | Abbreviation of parent name. |
abbrev | text | Name abbreviation. | |
wikidata_id | bigint | wikidata identifier without Q prefix. | |
lexlabel | text | not null | Cache from name; e.g. sao.paulo. |
isolabel_ext | text | not null | Cache from parent_abbrev (ISO) and name (camel case); e.g. BR-SP-SaoPaulo. |
ddd | integer | Direct distance dialing. | |
housenumber_system_type | text | Housenumber system. | |
lex_urn | text | Housenumber system law. | |
info | jsonb | Others information. | |
name_en | text | City name in english. | |
isolevel | integer | 1=country, 2=state, 3=mun | |
ne_country_id | integer | NaturalEarthData country gid. | |
int_country_id | integer | One byte identifier, adopted for internal use. Shorter than ISO and NaturalEarthData. |
Indexes: "jurisdiction_pkey" PRIMARY KEY, btree (osm_id) "jurisdiction_isolabel_ext_idx1" btree (isolabel_ext) "jurisdiction_isolabel_ext_key" UNIQUE CONSTRAINT, btree (isolabel_ext) "jurisdiction_jurisd_base_id_jurisd_local_id_key" UNIQUE CONSTRAINT, btree (jurisd_base_id, jurisd_local_id) "jurisdiction_jurisd_base_id_parent_abbrev_abbrev_key" UNIQUE CONSTRAINT, btree (jurisd_base_id, parent_abbrev, abbrev) "jurisdiction_jurisd_base_id_parent_abbrev_lexlabel_key" UNIQUE CONSTRAINT, btree (jurisd_base_id, parent_abbrev, lexlabel) "jurisdiction_jurisd_base_id_parent_abbrev_name_key" UNIQUE CONSTRAINT, btree (jurisd_base_id, parent_abbrev, name) "jurisdiction_wikidata_id_key" UNIQUE CONSTRAINT, btree (wikidata_id) Check constraints: "jurisdiction_admin_level_check" CHECK (admin_level > 0 AND admin_level < 100) "jurisdiction_name_check" CHECK (length(name) < 60) Foreign-key constraints: "jurisdiction_parent_id_fkey" FOREIGN KEY (parent_id) REFERENCES optim.jurisdiction(osm_id) Referenced by: TABLE "optim.donor" CONSTRAINT "donor_scope_osm_id_fkey" FOREIGN KEY (scope_osm_id) REFERENCES optim.jurisdiction(osm_id) TABLE "optim.jurisdiction" CONSTRAINT "jurisdiction_parent_id_fkey" FOREIGN KEY (parent_id) REFERENCES optim.jurisdiction(osm_id) Access method: heap
Queries
Queries que geram reports e sugestão de análise das views próprias.
-- R0a
select '* '||vw_name from optim.admin_views_dependency_summary
where ref_tab_name='optim.jurisdiction' order by 1
;
-- R0b
select schema_name, name, arguments_simplified , return_type
from doc_UDF_using_a_table('optim.jurisdiction') t order by 1,2,3
;
-- R1a. Intervalos de CEP das cidades do Brasil
create view optim.vw_jurisdiction76_postal_ranges AS
select isolabel_ext, jurisd_local_id, osm_id, admin_level, name,
('{'|| translate(info->>'postalCode_ranges', '[] ','{},') ||'}')::text[] as postal_ranges,
('{'|| translate(regexp_replace(info->>'postalCode_ranges','\-\d+','','g'), '[] ','{},') ||'}')::int[] as cep5,
(info->'area_km2')::int area_km2
from optim.jurisdiction
where jurisd_base_id=76
order by 1
;
-- R1b
select * from optim.vw_jurisdiction76_postal_ranges where cardinality(postal_ranges)>2;
-- apenas BR-SP-SaoPaulo, BR-DF-Brasilia e BR-RJ-NovaIguacu
-- R1c
select *, round(area_km2::float/cep5s::float,1) as area_per_cep
from (
select isolabel_ext, jurisd_local_id, osm_id, admin_level, name, cardinality(postal_ranges) as num_ceps, cep5,
1 + (cep5[1][2] - cep5[1][1]) + case when cardinality(cep5)>2 then (cep5[2][2] - cep5[2][1]) else 0 end as cep5s,
area_km2
from optim.vw_jurisdiction76_postal_ranges
) t order by 10 desc, 1
; -- MEDIANA 71 km2, select PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY area_per_cep) FROM (...) t2.
-- R2
SELECT substring(isolabel_ext,1,2) as pais, isolevel, lengtbit_MSB(jurisd_local_id) as bits,
count(*) n
FROM optim.jurisdiction where jurisd_local_id>0
GROUP by 1,2,3 order by 1, 4 desc
; -- Chile (CL) cabe em 27 bits, Brasil (BR) não cabe. Poucos casos valem adaptar exceções.
-- R3
SELECT '['||isolabel_ext||'](https://afa.codes/'||isolabel_ext||')' as AFAcode_map,
name,
jurisd_local_id as local_id,
'['||osm_id||'](https://osm.org/relation/'||osm_id::text||')' as osm_id,
'['||wikidata_id::text||'](https://wikidata.org/entity/Q'||wikidata_id::text||')' as wikidata_id
FROM optim.jurisdiction
ORDER BY jurisd_local_id, isolabel_ext;
Reports
R0a. Views que fazem uso da tabela:
- api.jurisdiction: reproduz a tabela na API.
- api.stats_donated_packcomponent: fornece estatísticas de pacotes doados, organizada por jurisdição.
- optim.jurisdiction_lexlabel: reformata isolabel_ext como URN LEX.
- optim.vw01donorevidencecmd:
- optim.vw01full_donated_packtpl: pacotes doados em join completo.
- optim.vw01full_jurisdiction_geom: jurisdição completa, com geometria.
- optim.vw01int_country_id: (lixo) lista o ID interno de país.
- optim.vw02generate_list: gera JSON com árvore de países, com optim.vw01generate_list.
- optim.vw02publication: metadados para ato de publicação de um pacote.
- optim.vwjurisdiction_synonym: lista de-para de todos os sinônimos válidos de uma jurisdição (CEP, abreviação, URN LEX etc.).
- osmc.jurisdictions_select: JSON para interface de seletor de jurisdição.
- public.qwerty: (resgatar para optim) gerador de comando bash "curl" para criar CSVs de todas as jurisdições.
R0a-packs: select isolabel_ext, legalname as donor_name,shortname, housenumber_system_type as hn_sys, pack_number,license_data->'family' as lic_fam, jsonb_object_keys_asarray(make_conf_tpl->'layers') as layers from optim.vw01full_donated_packtpl where shortname!='OSM' order by packtpl_id
R0b. Funções que fazem uso da tabela:
schema_name | name | arguments_simplified | return_type |
---|---|---|---|
api | jurisdiction_autocomplete | {text} | jsonb |
api | osmcode_decode_postal | {text,text} | jsonb |
api | osmcode_encode | {text,integer} | jsonb |
api | osmcode_encode_sci | {text,integer} | jsonb |
optim | generate_synonym_csv | {text,text} | text |
optim | generate_synonym_ref_csv | {text} | text |
optim | insert_donor_pack | {text} | text |
optim | insert_jurisdpoint | {NULL} | text |
osmc | encode_short_code | {text,"bit varying",text,USER-DEFINED,text,integer,text} | record |
osmc | l0cover_upsert_co | {NULL} | text |
R1c. Avalia área por cep, para inferir quantos dígitos mais o CEP5 precisaria para chegar a 1m2 com AFAcode
isolabel_ext | ibge_id | name | cep5 | cep5s | area_km2 | area_per_cep |
---|---|---|---|---|---|---|
BR-PA-SaoFelixXingu | 1507300 | São Félix do Xingu | {68380,68382} | 3 | 84193 | 28064.3 |
BR-PA-NovoProgresso | 1505031 | Novo Progresso | {68193,68194} | 2 | 38166 | 19083.0 |
BR-PA-Jacareacanga | 1503754 | Jacareacanga | {68195,68197} | 3 | 53312 | 17770.7 |
BR-AM-Tapaua | 1304104 | Tapauá | {69480,69484} | 5 | 85499 | 17099.8 |
BR-RR-Rorainopolis | 1400472 | Rorainópolis | {69373,69374} | 2 | 33548 | 16774.0 |
BR-PA-Altamira | 1500602 | Altamira | {68370,68379} | 10 | 159525 | 15952.5 |
BR-RR-Amajari | 1400027 | Amajari | {69343,69344} | 2 | 28401 | 14200.5 |
BR-AM-Japura | 1302108 | Japurá | {69495,69499} | 5 | 55776 | 11155.2 |
BR-PA-Oriximina | 1505304 | Oriximiná | {68270,68279} | 10 | 107680 | 10768.0 |
BR-MT-Colniza | 5103254 | Colniza | {78335,78337} | 3 | 27929 | 9309.7 |
BR-MT-Querencia | 5107065 | Querência | {78643,78644} | 2 | 17808 | 8904.0 |
... | ||||||
BR-PR-Anahy | 4101051 | Anahy | {85425,85429} | 5 | 103 | 20.6 |
BR-PR-ParaisoNorte | 4118006 | Paraíso do Norte | {87780,87789} | 10 | 206 | 20.6 |
BR-RS-NovaBrescia | 4313003 | Nova Bréscia | {95950,95954} | 5 | 103 | 20.6 |
BR-RS-NovaPadua | 4313086 | Nova Pádua | {95275,95279} | 5 | 103 | 20.6 |
BR-SP-Moncoes | 3531001 | Monções | {15275,15279} | 5 | 103 | 20.6 |
... | ||||||
BR-PA-Marituba | 1504422 | Marituba | {67200,67999} | 800 | 102 | 0.13 |
BR-PE-Olinda | 2609600 | Olinda | {53000,53399} | 400 | 41 | 0.10 |
BR-PE-Recife | 2611606 | Recife | {50000,52999} | 3000 | 219 | 0.07 |
R2. Número de bits necessários para representar jurisd_local_id. Critério para avaliar se cria artificial ou reusa no novo id. Amostrando apenas Brasil (IBGE). Reparar que somando 8 bits do país não caberia em 27. No caso do Chile caberia (15+8<27). Em função desta análise decidimos criar o ID com ROW
país | isolevel | bits | n_count |
---|---|---|---|
BR | 3 | 22 | 3861 |
BR | 3 | 23 | 1259 |
BR | 3 | 21 | 450 |
BR | 2 | 5 | 12 |
BR | 2 | 6 | 10 |
BR | 2 | 4 | 5 |
BR | 1 | 0 | 1 |
... | |||
CL | 3 | 15 | 164 |
CL | 3 | 14 | 82 |
CL | 3 | 13 | 41 |
... | |||
CL | 2 | 1 | 1 |
CL | 1 | 0 | 1 |
R3. Listagem simples Markdown para relatórios internos e conferir se IDs batem com links.
afacode_map | name | local_id | osm_id | wikidata_id |
---|---|---|---|---|
BR | Brasil | 0 | 59470 | 155 |
BR-RO | Rondônia | 11 | 325866 | 43235 |
BR-RO-AltaFlorestaOeste | Alta Floresta D’Oeste | 1100015 | 325842 | 1761456 |
BR-RO-Ariquemes | Ariquemes | 1100023 | 325888 | 1646975 |
BR-RO-Cabixi | Cabixi | 1100031 | 325882 | 940433 |
BR-RO-Cacoal | Cacoal | 1100049 | 325872 | 1761637 |
... | ... | ... | ... | ... |
BR-AC | Acre | 12 | 326266 | 40780 |
BR-AC-Acrelandia | Acrelândia | 1200013 | 326273 | 953086 |
BR-AC-AssisBrasil | Assis Brasil | 1200054 | 326269 | 1754403 |
BR-AC-Brasileia | Brasiléia | 1200104 | 326270 | 899406 |
BR-AC-Bujari | Bujari | 1200138 | 326259 | 1754408 |
BR-AC-Capixaba | Capixaba | 1200179 | 326268 | 1800737 |
BR-AC-CruzeiroSul | Cruzeiro do Sul | 1200203 | 326274 | 941136 |
Table optim.jurisdiction_geom
Geometrias de #optim.jurisdiction. Geometrias das jurisdições nacionais e locais.
Column | Type | Nullable | Description |
---|---|---|---|
osm_id | bigint | not null | Relation identifier in OpenStreetMap. |
isolabel_ext | text | not null | ISO 3166-1 alpha-2 code and name (camel case); e.g. BR-SP-SaoPaulo. |
geom | geometry(Geometry,4326) | Geometry for osm_id identifier | |
geom_svg | geometry(Geometry,4326) | Simplified geometry version to use in svg interface. | |
kx_ghs1_intersects | text[] | ||
kx_ghs2_intersects | text[] |
Indexes: "jurisdiction_geom_pkey" PRIMARY KEY, btree (osm_id) "jurisdiction_geom_isolabel_ext_key" UNIQUE CONSTRAINT, btree (isolabel_ext) "optim_jurisdiction_geom_idx1" gist (geom) "optim_jurisdiction_geom_isolabel_ext_idx1" btree (isolabel_ext) "optim_jurisdiction_geom_svg_idx1" gist (geom_svg) Access method: heap
Table osmc.coverage
Coberturas nacionais e de suas jurisdições. As geometrias nacionais são expressas em LatLong, as locais conforme sua projeção oficial.
Ingestão: arquivos "coverage.csv" do git. Função ou make para construção??
Exemplos: BR_new/data/coverage.csv, CO_new/data/coverage.csv. As colunas "status" e "cover" são copiadas em "status" e "kx_prefix".
Column | Type | Default | Description |
---|---|---|---|
cbits | bit varying | Coverage cell (hierarchical) identifier. | |
isolabel_ext | text | Jurisdiction identifier. ISO 3166-2 code extended for subjurisdiction name (camel case); e.g. BR-SP-SaoPaulo. | |
cindex | text | First digit of the coverage cell, a prefix in base 32nvu. Used only case is_country=false. | |
bbox | integer[] | Coverage cell BBOX. | |
status | smallint | 0 | Coverage status. Convention: 0: generated, 1: revised, 2: homologated. |
is_country | boolean | false | True if it is a cell of national coverage. |
is_contained | boolean | false | True if it is a cell contained in the jurisdiction. Must validate. |
is_overlay | boolean | false | True if it is an overlay cell. |
kx_prefix | text | Coverage cell prefix in base 32nvu. | |
geom | geometry | Coverage cell geometry on country's projection (local SRID). | |
geom_srid4326 | geometry | Coverage cell geometry on WGS84 LatLong (4326 srid). Used only case is_country=true. |
Indexes: "osm_coverage_cbits10true_idx" btree ((cbits::bit(10))) WHERE is_country IS TRUE "osm_coverage_cbits15false_idx" btree ((cbits::bit(14)), isolabel_ext) WHERE is_country IS FALSE "osm_coverage_geom4326_idx1" gist (geom_srid4326) "osm_coverage_geom_idx1" gist (geom) "osm_coverage_isolabel_ext_false_idx" btree (isolabel_ext) WHERE is_country IS FALSE "osm_coverage_isolabel_ext_idx1" btree (isolabel_ext) "osm_coverage_isolabel_ext_true_idx" btree (isolabel_ext) WHERE is_country IS TRUE Check constraints: "coverage_status_check" CHECK (status = ANY (ARRAY[0, 1, 2]))
Queries
Queries que geram reports e sugestão de análise das views próprias.
-- R0a
select * from optim.admin_views_dependency_summary where ref_tab_name='osmc.coverage'
;
-- R0b
select schema_name, name, arguments_simplified , return_type
from doc_UDF_using_a_table('osmc.cover') t order by 1,2,3
;
-- R1
select distinct
substring(isolabel_ext,1,2) as country,
natcod.vbit_to_baseh(substring(cbits,1,8),16,true) as cbits_b16h,
is_country, status,
count(*) n, count(distinct isolabel_ext) n_jurisds
from osmc.coverage
group by 1,2,3,4
order by 1,2,3 desc,4;
-- R2a
select substring(isolabel_ext,1,2) as country, is_country, is_overlay,
count(*) n, count(distinct isolabel_ext) n_jurisds, count(distinct cbits) n_cells,
min(l) l_min, max(l) l_max, round(avg(l),2) l_avg,
PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY l) l_mdn
from (select *, round(length(cbits)/4)::int as l from osmc.coverage) t
group by 1,2,3 order by 1,2 desc,3;
-- R2b
select substring(isolabel_ext,1,2) as country, is_country, is_overlay,
count(*) n, count(distinct isolabel_ext) n_jurisds,
count(distinct kx_prefix) n_cells, count(distinct kx_prefix||'.'||cindex::text) n_cells_idx,
count(distinct kx_prefix||'.'||isolabel_ext) n_cells_jur,
min(l) l_min, max(l) l_max, round(avg(l),2) l_avg,
PERCENTILE_DISC(0.5) WITHIN GROUP(ORDER BY l) l_mdn
from (select *, length(kx_prefix) as l from osmc.coverage) t
group by 1,2,3 order by 1,2 desc,3;
-- R3
select isolabel_ext, is_overlay,
array_agg( natcod.vbit_to_baseh(substring(cbits,11),16,true) order by cbits) cover
from osmc.coverage
where isolabel_ext like 'CO-ARA-%'
group by 1,2 order by 1,2;
-- R4. Testar contra qwerty2, que tem comjunto consistente. Aqui apenas CA e CO apresentam parte consistente.
select substr(isolabel_ext,1,2) as iso, len, round(avg(a),1) area_avg, max(a) area_max, count(*) n
from (
select isolabel_ext, length(cbits) len, round(st_area(geom)/10000.0) a, is_contained, is_overlay
from osmc.coverage where is_contained order by 2 desc,1
) t group by 1,2 order by 1,2
;
Reports
R0a. Views que fazem uso desta tabela:
- osmc.jurisdictions_select
- osmc.tmpvw10: temporária
- osmc.tmpvwpoeira: indica "sobras" dos polígonos que não foram aceitas como cobertura válida, por serem menores que 100 metros quadrados.
R0b. Funções que fazem uso desta tabela. (removidas as variantes "EC" e "UY")
schema_name | name | arguments_simplified | return_type |
---|---|---|---|
api | jurisdiction_coverage | {text,integer} | jsonb |
api | jurisdiction_geojson_from_isolabel | {text} | jsonb |
api | osmcode_decode_postal | {text,text} | jsonb |
api | osmcode_decode_postal_absolute | {text,text} | jsonb |
api | osmcode_decode_scientific_absolute | {text,text,integer} | jsonb |
api | osmcode_encode | {text,integer} | jsonb |
osmc | afa_log_to_hbig | {text,text} | int8 |
osmc | cellcontainsproperly | {text,text,integer,text,integer} | text |
osmc | check_coverage | {text,ARRAY,text,ARRAY,ARRAY,ARRAY,ARRAY,boolean,ARRAY} | record |
osmc | cover_child_geometries | {text,text,integer,text,text,USER-DEFINED} | record |
osmc | encode_point_brazil | {USER-DEFINED} | text |
osmc | encode_point_colombia | {USER-DEFINED} | text |
osmc | encode_postal_br | {USER-DEFINED,"double precision",integer,text} | jsonb |
osmc | encode_postal_cm | {USER-DEFINED,"double precision",integer,text} | jsonb |
osmc | encode_postal_co | {USER-DEFINED,"double precision",integer,text} | jsonb |
osmc | encode_scientific_br | {USER-DEFINED,"double precision",integer} | jsonb |
osmc | encode_scientific_cm | {USER-DEFINED,"double precision",integer} | jsonb |
osmc | encode_scientific_co | {USER-DEFINED,"double precision",integer} | jsonb |
osmc | encode_short_code | {text,"bit varying",text,USER-DEFINED,text,integer,text} | record |
osmc | generate_cover_csv | {text,text} | text |
osmc | l0_upsert | {text,smallint,integer,integer,integer,integer,integer,ARRAY,ARRAY} | text |
osmc | l0cover_upsert_br | {NULL} | text |
osmc | l0cover_upsert_co | {NULL} | text |
osmc | update_coverage_isolevel3 | {text,smallint,ARRAY,ARRAY} | text |
R1. Sumário do conteúdo corrente (final de 2023). Diferencia coberturas nacionais, expressa contagem de células e contagem de jurisdições.
country | cbits_b16h | is_country | status | n | n_jurisds |
---|---|---|---|---|---|
BR | 13 | t | 1 | 18 | 1 |
BR | 13 | f | 0 | 72726 | 5570 |
CM | 1e | t | 1 | 14 | 1 |
CM | 1e | f | 0 | 2738 | 360 |
CO | 2a | t | 1 | 16 | 1 |
CO | 2a | f | 0 | 15244 | 1115 |
CO | 2a | f | 2 | 22 | 1 |
EC | 36 | t | 1 | 22 | 1 |
UY | d6 | t | 1 | 19 | 1 |
UY | d6 | f | 0 | 1 | 1 |
R2a. Tamanho mais frequente (mediana "mdn") da cobertura. Comprimento l dado em quantidade de dígitos hexadecimais.
PS: ignorando EC e UY, que não estão configurados.
country | is_country | is_overlay | n | n_jurisds | l_min | l_max | l_avg | l_mdn |
---|---|---|---|---|---|---|---|---|
BR | t | f | 18 | 1 | 4 | 4 | 4.00 | 4 |
BR | f | f | 72565 | 5570 | 5 | 9 | 7.77 | 8 |
BR | f | t | 161 | 11 | 7 | 9 | 8.11 | 8 |
CM | t | f | 14 | 1 | 3 | 3 | 3.00 | 3 |
CM | f | f | 2730 | 360 | 3 | 7 | 5.48 | 5 |
CM | f | t | 8 | 1 | 7 | 7 | 7.00 | 7 |
CO | t | f | 16 | 1 | 3 | 3 | 3.00 | 3 |
CO | f | f | 14165 | 1116 | 4 | 8 | 5.82 | 6 |
CO | f | t | 1101 | 1091 | 6 | 9 | 7.39 | 8 |
R2b. Mesmo que R2a porém usando "kx_prefix" direto. Linhas is_country removidas pois não usam "kx_prefix".
country | is_ctry | is_ovly | n | n_jurisds | n_cells | n_cel_idx | n_cel_jur | l_min | l_max | l_avg | l_mdn |
---|---|---|---|---|---|---|---|---|---|---|---|
BR | f | f | 72565 | 5570 | 40933 | 69557 | 72565 | 2 | 5 | 3.77 | 4 |
BR | f | t | 161 | 11 | 161 | 161 | 161 | 3 | 5 | 4.11 | 4 |
CM | f | f | 2730 | 360 | 1411 | 2302 | 2626 | 2 | 5 | 3.50 | 3 |
CM | f | t | 8 | 1 | 4 | 4 | 4 | 5 | 5 | 5.00 | 5 |
CO | f | f | 14165 | 1116 | 7552 | 13530 | 14165 | 2 | 5 | 3.82 | 4 |
CO | f | t | 1101 | 1091 | 1100 | 1100 | 1101 | 4 | 6 | 4.70 | 5 |
R3. Para reproduzir a planilha CSV precisamos agrupar cbits e converte-los em base16h. Exemplo, municípios de "CO-ARA". As coberturas-overlay ainda não aparecem no git, e aqui apenas a célula de um ponto é utilizada.
isolabel_ext | is_overlay | n | cover |
---|---|---|---|
CO-ARA-Arauca | f | 14 | {cd4,cd5,cd6,cd7,e2a,e2e,e2f,e80,e81,e82,e83,e84,e85,e86} |
CO-ARA-Arauca | t | 1 | {cd7fG} |
CO-ARA-Arauquita | f | 9 | {c7e,c7f,cd0,cd1,cd2,cd3,cd4,cd5,cd6} |
CO-ARA-Arauquita | t | 1 | {cd3cQ} |
CO-ARA-CravoNorte | f | 13 | {e28,e2a,e2b,e2c,e2d,e2e,e2f,e38,e3a,e80,e81,e84,e85} |
CO-ARA-CravoNorte | t | 1 | {e2b5Q} |
CO-ARA-Fortul | f | 5 | {c6f,cc5,cd0,cd1,cd2} |
CO-ARA-Fortul | t | 1 | {cd0bG} |
CO-ARA-PuertoRondon | f | 9 | {c7d,c7e,c7f,cd1,cd4,cd5,e28,e2a,e80} |
CO-ARA-PuertoRondon | t | 1 | {c7e4Q} |
CO-ARA-Saravena | f | 4 | {cc5,cc7,cd0,cd2} |
CO-ARA-Saravena | t | 1 | {cd22Q} |
CO-ARA-Tame | f | 13 | {c6c,c6d,c6e,c6f,c78,c79,c7a,c7b,c7e,cc5,cd0,cd1,cd4} |
CO-ARA-Tame | t | 1 | {c7acQ} |
Outros esquemas e tabelas
download.redirects
Column | Type | Nullable | Description |
---|---|---|---|
donor_id | int | Donor identifier in optim.donor | |
filename_original | text | (not used) | |
package_path | text | (not used) | |
hashedfname | text | not null | The filenaname, based on SHA256.extension |
hashedfnameuri | text | The real URL for file download of the file with the expected SHA256. |
Indexes: "redirects_pkey" PRIMARY KEY, btree (hashedfname) -- correto seria unique substring(hashedfname,1,64) para excluir a extension da unicidade "redirects_hashedfname_hashedfnameuri_key" UNIQUE CONSTRAINT, btree (hashedfname, hashedfnameuri) Check constraints: "redirects_hashedfname_check" CHECK (hashedfname ~ '^[0-9a-f]{64,64}\.[a-z0-9]+$'::text)
Tem relação com a API do endpoint http://DL.digital-guard.org
.
onde faz UNION com a tabela optim.donated_packcomponent_cloudcontrol
. No servidor, localmente faz curl http://127.0.0.1:3105/redirects
. Na prática é chamado por PHP, ver /var/www/dl.digital-guard.org/dl.php
.
-- R0. Testa como a API fornece
select * from api.redirects where fhash like 'e5b%';
CREATE VIEW optim.vw_download_redirects AS
select substring(hashedfname,1,7) as short_id, donor_id, hashedfname, hashedfnameuri from download.redirects
;
-- R1. Gera testes bash dos downloads. Demora, já são mais de 700
COPY (
SELECT 'url_'||short_id ||'="' || hashedfnameuri || '"' FROM optim.vw_download_redirects
UNION ALL
SELECT format(
'wget --spider "$url_%s" 2>/dev/null && echo "url_%1$s exists" || echo "!url_%1$s does not exist: $url1"',
short_id
)
FROM optim.vw_download_redirects
) to '/tmp/dl_urls_test.sh'; -- sh /tmp/dl_urls_test.sh
Outros lembretes
CREATE VIEW optim.admin_views_dependency AS -- https://stackoverflow.com/a/69473861/287948 select u.view_schema schema_name, u.view_name, u.table_schema referenced_table_schema, u.table_name referenced_table_name, v.view_definition from information_schema.view_table_usage u join information_schema.views v on u.view_schema = v.table_schema and u.view_name = v.table_name where u.table_schema not in ('information_schema', 'pg_catalog') order by u.view_schema, u.view_name ; CREATE VIEW optim.admin_views_dependency_summary AS select schema_name||'.'||view_name as vw_name, referenced_table_schema||'.'||referenced_table_name as ref_tab_name from optim.admin_views_dependency ; -- DROP VIEW optim.admin_udf_summary; CREATE VIEW optim.admin_udf_summary AS SELECT n.oid, quote_ident(n.nspname) as schema , quote_ident(p.proname) as function, pg_get_function_result(n.oid) as fres FROM pg_catalog.pg_proc p JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace WHERE n.nspname not like 'pg%' ; CREATE or replace FUNCTION doc_UDF_using_a_table( p_table text, p_schema_name text DEFAULT NULL, p_name_like text DEFAULT '', p_name_notlike text DEFAULT '', p_oid oid DEFAULT NULL ) RETURNS TABLE ( id text, oid oid, schema_name text, name text, language text, --new definition_md5 text, -- version control arguments_simplified text[], arguments text, return_type text, prokind text, comment text ) AS $f$ SELECT doc_UDF_transparent_id(u.schema_name, u.name::text, s.arguments_simplified::text[]) AS id, u.oid, u.schema_name, u.name::text, u.language, md5(u.definition), s.arguments_simplified::text[] as arguments_simplified, u.arguments::text AS arguments, u.return_type, u.prokind, u.comment FROM doc_UDF_show_simplified_signature($2,$3,$4) s INNER JOIN doc_UDF_show($2,$3,$4,$5) u ON s.oid=u.oid::text WHERE u.definition ilike '%'|| p_table ||'%' $f$ LANGUAGE SQL IMMUTABLE;
Ver também
- Criação ou Reconstrução de qualquer DL (teste ou produção) https://github.com/digital-guard/preserv/wiki/Datalake