Skip to content

Instantly share code, notes, and snippets.

@cpjeanpaul
Last active October 20, 2019 23:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cpjeanpaul/1387c1a28d78f15ef070e3e0f90590a8 to your computer and use it in GitHub Desktop.
Save cpjeanpaul/1387c1a28d78f15ef070e3e0f90590a8 to your computer and use it in GitHub Desktop.
Procesar padrón reducido de SUNAT para importarlo a una tabla de postgres usando COPY
<?php
// iconv -f iso-8859-1 -t utf-8 padron_reducido_ruc.txt > utf8_padron_reducido_ruc.txt
echo "Limpiar caracteres especiales\n";
$data = file_get_contents('utf8_padron_reducido_ruc.txt');
$data = str_replace(array('\'', '"', '-'), '', $data);
file_put_contents('utf8_padron_reducido_ruc.txt', $data);
echo "Limpiar caracteres especiales: LISTO\n";
$replace = array('Š' => 'S', 'š' => 's', 'Ž' => 'Z', 'ž' => 'z', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'A', 'Ç' => 'C', 'È' => 'E', 'É' => 'E',
'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 'Ø' => 'O', 'Ù' => 'U',
'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'B', 'ß' => 'Ss', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'a', 'ç' => 'c',
'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'o', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o',
'ö' => 'o', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ý' => 'y', 'þ' => 'b', 'ÿ' => 'y');
echo "Generando output.csv\n";
$i = 0;
if (($handle1 = fopen("utf8_padron_reducido_ruc.txt", "r")) !== FALSE) {
if (($handle2 = fopen("output.csv", "w")) !== FALSE) {
while (($data = fgetcsv($handle1, 000, "|")) !== FALSE) {
$i++;
if ($i == 1) {
continue;
}
// mostrar mensaje cada 100K
if (($i % 100000) == 0) {
echo $i . "\n";
}
// Solo mantener RUCs activos
if (trim($data['2']) != 'ACTIVO') {
continue;
}
$data = array_pad($data, 15, '');
$data[0] = strtr($data[0], $replace);
$data[1] = strtr($data[1], $replace);
$data[2] = strtr($data[2], $replace);
$data[3] = strtr($data[3], $replace);
$data[4] = strtr($data[4], $replace);
$data[5] = strtr($data[5], $replace);
$data[6] = strtr($data[6], $replace);
$data[7] = strtr($data[7], $replace);
$data[8] = strtr($data[8], $replace);
$data[9] = strtr($data[9], $replace);
$data[10] = strtr($data[10], $replace);
$data[11] = strtr($data[11], $replace);
$data[12] = strtr($data[12], $replace);
$data[13] = strtr($data[13], $replace);
$data[14] = strtr($data[14], $replace);
$data = array_slice($data, 0, 15);
fputcsv($handle2, $data, '|');
}
fclose($handle2);
}
fclose($handle1);
}
echo "Generando output.csv: LISTO\n";
$data = file_get_contents('output.csv');
echo "Limpiando output.csv\n";
$data = str_replace(array('\'', '"'), '', $data);
file_put_contents('output.csv', $data);
echo "Limpiando output.csv: LISTO\n";
/**
* DROP TABLE padron;
* TRUNCATE TABLE padron;
*
* CREATE TABLE public.padron
* (
* ruc character varying(15) NOT NULL,
* razon character varying(255),
* estado character varying(25),
* domicilio character varying(25),
* ubigeo character varying(6),
* via character varying(150),
* nombre_via character varying(150),
* codigo_zona character varying(150),
* tipo_zona character varying(150),
* numero character varying(100),
* interior character varying(50),
* lote character varying(50),
* departamento character varying(50),
* manzana character varying(50),
* kilometro character varying(50)
* )
* WITH (
* OIDS=FALSE
* );
* ALTER TABLE public.padron
* OWNER TO postgres;
*
* COPY padron FROM '../output.csv' DELIMITER '|' CSV
*
* CREATE INDEX CONCURRENTLY padron_ruc ON padron(ruc);
* CREATE INDEX CONCURRENTLY padron_razon ON padron(razon);
*
* REINDEX INDEX padron_ruc;
* REINDEX INDEX padron_razon;
*
* select * from padron where ruc like '%%';
* select * from padron where (razon like '%%') or (ruc like '%%') ;
* -- via nombre_via 'km.' kilometro 'Nro' numero 'Mza.' manzana 'Lote.' lote 'Dpto' departamento codigo_zona tipo_zona
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment