CNEFE: mudanças entre as edições

movendo pontos para as faces
(movendo pontos para as faces)
Linha 131: Linha 131:
;
;
-- 325993 não-duplicados mais 2.16*24642 duplicados = 325993+2.16*24642 = 379220 ~ 379228.
-- 325993 não-duplicados mais 2.16*24642 duplicados = 325993+2.16*24642 = 379220 ~ 379228.
create table ibge_cnefe2022_uniq_lixo AS  -- erro
with upt as (
  SELECT ghs FROM vw02_ibge_cnefe2022 group by 1 having count(*)=1
), uend as(
  SELECT cod_setor, cep, via_name, num_endereco
  FROM vw02_ibge_cnefe2022
  group by 1,2,3,4 having count(*)=1
) select i.ghs, i.cod_setor, i.cep, i.via_name, i.num_endereco
  from upt inner join (vw02_ibge_cnefe2022 i inner join uend
    ON i.cod_setor=uend.cod_setor AND i.cep=uend.cep AND i.via_name=uend.via_name AND i.num_endereco=uend.num_endereco)
    ON upt.ghs=i.ghs
;
</pre>
</pre>


Linha 149: Linha 136:
Desses, com duplicidade posicional apenas ~7,6% (24642 de 325993), sendo também razoável o seu descarte de tmp_ibge_cnefe2022_uniq1.
Desses, com duplicidade posicional apenas ~7,6% (24642 de 325993), sendo também razoável o seu descarte de tmp_ibge_cnefe2022_uniq1.


<pre>
drop table if exists ibge_cnefe2022_uniq2;
drop table if exists ibge_cnefe2022_uniq2;
create table ibge_cnefe2022_uniq2 AS  -- remove endereços duplicados em locais diferentes  
create table ibge_cnefe2022_uniq2 AS  -- remove endereços duplicados em locais diferentes  
Linha 180: Linha 168:
from vw02_ibge_cnefe2022           
from vw02_ibge_cnefe2022           
) t; -- 288685 | 277160
) t; -- 288685 | 277160
</pre>
== Tratamentos finais ==
<pre>
drop table ibge_cnefe2022_uniq3;
CREATE TABLE  ibge_cnefe2022_uniq3 AS
SELECT ghss[1] as ghs, cod_setor, num_quadra::int as num_quadra, num_face::int num_face,
        cep, via_name, num_endereco,
        false AS is_centroid
FROM ibge_cnefe2022_uniq2 WHERE cardinality(ghss)=1
UNION ALL
SELECT st_geohash(geoms_centroid(geoms),9) as ghs,
        cod_setor, num_quadra::int, num_face::int, cep, via_name, num_endereco,
        true AS is_centroid
FROM ibge_cnefe2022_uniq2
WHERE cardinality(ghss)>1
      AND (hcodes_sameparent(ghss) OR (geoms_maxdist(geoms,true))[2]<=20)
ORDER BY 1
;  -- 249834
----
copy ibge_cnefe2022_uniq3 to '/tmp/BR-RS-POA2024_CNEFE-ptsEnderecoUnico.csv' CSV HEADER;
copy (select gid, cd_setor, cd_quadra::int as cd_quadra, cd_face::int as cd_face, via_name, tot_res, geom FROM vw01_log) TO '/tmp/BR-RS-POA-faces.csv' CSV HEADER;
-- COPY 56645
--- CONTAGENS DE JOIN
With f AS(
SELECT gid, cd_setor, cd_quadra::int as cd_quadra, cd_face::int as cd_face,
        via_name, tot_res, st_geohash(geom,7) as ghs7
FROM vw01_log
)
SELECT count(*)
FROM ibge_cnefe2022_uniq3 u INNER JOIN f
  ON substring(u.cod_setor,1,15)!=f.cd_setor AND substring(u.ghs,1,7)=f.ghs7 AND f.cd_quadra=u.num_quadra AND f.cd_face=u.num_face AND f.via_name=u.via_name
; -- 5893
--  testar se são consistentes ON substring(u.ghs,1,7)=f.ghs7 AND f.cd_quadra=u.num_quadra AND f.cd_face=u.num_face AND f.via_name=u.via_name
------- final só confiaveis:
drop table ibge_cnefe2022_uniq_onface;
create table ibge_cnefe2022_uniq_onface AS
WITH f AS (
  SELECT gid, cd_setor, cd_quadra::int as cd_quadra, cd_face::int as cd_face,
        via_name, tot_res, st_geohash(geom,7) as ghs7, geom
  FROM vw01_log
)
SELECT u.ghs, u.cep, u.via_name, u.num_endereco, u.is_centroid, substring(u.ghs,1,7)=f.ghs7 as is_nearface,
        ST_ClosestPoint(f.geom, ST_PointFromGeoHash(u.ghs)) as geom
FROM ibge_cnefe2022_uniq3 u INNER JOIN f
  ON substring(u.cod_setor,1,15)=f.cd_setor AND f.cd_quadra=u.num_quadra AND f.cd_face=u.num_face AND f.via_name=u.via_name
  order by 1
; -- 198797
</pre>
2 402

edições