|
const fs = require('fs'); |
|
const PDFParser = require("pdf2json"); |
|
|
|
function showHelp() { |
|
console.log( |
|
` |
|
usage: |
|
node fnb-pdf-to-csv.js <FILE_NAME> [<FILE_NAME> [...]] |
|
` |
|
); |
|
} |
|
|
|
// validate arguments |
|
var args = process.argv.slice(2); |
|
let files = []; |
|
|
|
while (args.length > 0) { |
|
let arg = args.splice(0, 1)[0]; |
|
switch (arg) { |
|
case "--help": |
|
case "-h": |
|
showHelp(); |
|
process.exit(0); |
|
default: |
|
files.push(arg); |
|
break; |
|
} |
|
} |
|
|
|
if (files.length === 0) { |
|
console.error(`missing required arguments`); |
|
showHelp(); |
|
process.exit(1); |
|
} |
|
|
|
for (let fileIndex in files) { |
|
let pdfParser = new PDFParser(); |
|
pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError) ); |
|
pdfParser.on("pdfParser_dataReady", pdfData => { |
|
let output = []; |
|
let page; |
|
let cardNumber; |
|
let cardTotalFound = false; |
|
for (let pageIndex = 1; |
|
pageIndex < pdfData.formImage.Pages.length |
|
&& !cardTotalFound; |
|
pageIndex++ |
|
) { |
|
page = pdfData.formImage.Pages[pageIndex]; |
|
for (let textIndex = 0; |
|
textIndex < page.Texts.length |
|
&& !cardTotalFound; |
|
textIndex++ |
|
) { |
|
let getWord = (i) => { |
|
return page.Texts[i].R[0].T; |
|
}; |
|
row = getWord(textIndex); |
|
if (pageIndex == 1 && row === "Card%20No.") { |
|
cardNumber = decodeURIComponent(getWord(textIndex+1).replace(/\s/g, '')); |
|
textIndex++; |
|
output.push(`Card No.,${cardNumber.replace(/\s/g, '')}`); |
|
// RUSHED HACK: inject year into expected csv location |
|
// 3,3,'18 February 2021','18 March 2021',... |
|
output.push( |
|
'1', '2', '3', |
|
`3,3,'18 YOU-SHOULD-PROBABLY-CHANGE-THIS 2021'` |
|
); |
|
// add header line |
|
output.push(`5,Date,Description,Location,Amount`); |
|
} else if (pageIndex == 1 && cardNumber || pageIndex > 1) { |
|
let dateMatch = (str) => { |
|
let result = str.match(/^\d{2}\%20[A-Za-z]{3}$/); |
|
return result ? true : false; |
|
} |
|
if (dateMatch(getWord(textIndex))) { |
|
// csv format starts each row with 5 |
|
let lineParts = [ 5, decodeURIComponent(getWord(textIndex)) ]; |
|
for (let wordIndex = textIndex + 1; |
|
wordIndex < page.Texts.length |
|
&& !dateMatch(getWord(wordIndex)) |
|
&& !cardTotalFound; |
|
wordIndex++ |
|
) { |
|
let word = decodeURIComponent(getWord(wordIndex)); |
|
// if the word is an amount, remove spaces |
|
let amountMatch = word.match(/^\d[\s\d]*\.\d\d$/); |
|
if (amountMatch) { |
|
word = word.replace(/\s/g, ''); |
|
// if an amount is not followed by "Cr" make it negative |
|
if ((wordIndex+1) === page.Texts.length |
|
|| getWord(wordIndex+1) != "Cr" |
|
) { |
|
word = "-" + word; |
|
} |
|
} |
|
lineParts.push(word); |
|
textIndex = wordIndex; |
|
} |
|
|
|
output.push(lineParts.join(',')); |
|
} |
|
} |
|
} |
|
} |
|
fs.writeFileSync(`${files[fileIndex]}.csv`, output.join('\n')); |
|
}); |
|
|
|
pdfParser.loadPDF(files[fileIndex]); |
|
} |