Skip to content

Instantly share code, notes, and snippets.

@1cadumagalhaes
Created July 6, 2021 22:08
Show Gist options
  • Save 1cadumagalhaes/005af5f663749385f5f22fe94524b40f to your computer and use it in GitHub Desktop.
Save 1cadumagalhaes/005af5f663749385f5f22fe94524b40f to your computer and use it in GitHub Desktop.
Função para leitura e transformação de arquivos de texto em stream
const fs = require('fs');
const readline = require('readline');
async function processLineByLine(input, output='output.csv', callback) {
let lineCount = 0;
try {
const fileStream = fs.createReadStream(input);
const outstream = fs.createWriteStream(output);
const rl = readline.createInterface({
input: fileStream,
output: outstream,
crlfDelay: Infinity
});
console.log('reading lines');
for await (let line of rl) {
line = callback(line);
lineCount++;
rl.output.write(line + '\n');
}
console.log(lineCount);
} catch (error) {
console.log(error, lineCount);
}
}
function main() {
processLineByLine('dados.csv', 'output_dados.csv', transformLine);
}
/**
* Exemplo de transformação.
* Remove campos vazios (-2 e ,) e multiplos espaços;
* troca do separador de | para , ;
* remoção de uma coluna duplicada;
* transformação do campo de tempo para timestamp no formato do BigQuery
*/
function transformLine(line) {
let original = line;
line = line.replace(/(-2)|(,)|(\s{2,})/g, '');
line = line.split('|');
if (line.length != 13) console.error("Numero inesperado campos", line.length, line[1], '\n', original);
line.splice(5, 1);
if (line[0][0] != 'T') {
console.log(line[0]);
line[0] = new Date(line[0].replace(/(\d{2})\/(\d{2})/, "$2/$1")).toISOString();
}
line = line.join(',');
return line;
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment