Skip to content

Instantly share code, notes, and snippets.

@therightstuff
Created July 10, 2021 09:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save therightstuff/aa7723ac246e396a0dc86ce499e2e6b5 to your computer and use it in GitHub Desktop.
Save therightstuff/aa7723ac246e396a0dc86ce499e2e6b5 to your computer and use it in GitHub Desktop.
Node.js script to convert FNB Credit Card statements from PDF to CSV

Convert PDF FNB Credit Card statements to CSV compatible with cheque account CSV format

A quick-hack node.js script that converts FNB (First Nation Bank of South Africa) PDF Credit Card statements to a CSV format that's similar enough to the format of their cheque account CSVs to enable the budgie-feeder parser to ingest it.

NOTE: results may vary, generated CSV files must be reviewed before use.

const fs = require('fs');
const PDFParser = require("pdf2json");
function showHelp() {
console.log(
`
usage:
node fnb-pdf-to-csv.js <FILE_NAME> [<FILE_NAME> [...]]
`
);
}
// validate arguments
var args = process.argv.slice(2);
let files = [];
while (args.length > 0) {
let arg = args.splice(0, 1)[0];
switch (arg) {
case "--help":
case "-h":
showHelp();
process.exit(0);
default:
files.push(arg);
break;
}
}
if (files.length === 0) {
console.error(`missing required arguments`);
showHelp();
process.exit(1);
}
for (let fileIndex in files) {
let pdfParser = new PDFParser();
pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError) );
pdfParser.on("pdfParser_dataReady", pdfData => {
let output = [];
let page;
let cardNumber;
let cardTotalFound = false;
for (let pageIndex = 1;
pageIndex < pdfData.formImage.Pages.length
&& !cardTotalFound;
pageIndex++
) {
page = pdfData.formImage.Pages[pageIndex];
for (let textIndex = 0;
textIndex < page.Texts.length
&& !cardTotalFound;
textIndex++
) {
let getWord = (i) => {
return page.Texts[i].R[0].T;
};
row = getWord(textIndex);
if (pageIndex == 1 && row === "Card%20No.") {
cardNumber = decodeURIComponent(getWord(textIndex+1).replace(/\s/g, ''));
textIndex++;
output.push(`Card No.,${cardNumber.replace(/\s/g, '')}`);
// RUSHED HACK: inject year into expected csv location
// 3,3,'18 February 2021','18 March 2021',...
output.push(
'1', '2', '3',
`3,3,'18 YOU-SHOULD-PROBABLY-CHANGE-THIS 2021'`
);
// add header line
output.push(`5,Date,Description,Location,Amount`);
} else if (pageIndex == 1 && cardNumber || pageIndex > 1) {
let dateMatch = (str) => {
let result = str.match(/^\d{2}\%20[A-Za-z]{3}$/);
return result ? true : false;
}
if (dateMatch(getWord(textIndex))) {
// csv format starts each row with 5
let lineParts = [ 5, decodeURIComponent(getWord(textIndex)) ];
for (let wordIndex = textIndex + 1;
wordIndex < page.Texts.length
&& !dateMatch(getWord(wordIndex))
&& !cardTotalFound;
wordIndex++
) {
let word = decodeURIComponent(getWord(wordIndex));
// if the word is an amount, remove spaces
let amountMatch = word.match(/^\d[\s\d]*\.\d\d$/);
if (amountMatch) {
word = word.replace(/\s/g, '');
// if an amount is not followed by "Cr" make it negative
if ((wordIndex+1) === page.Texts.length
|| getWord(wordIndex+1) != "Cr"
) {
word = "-" + word;
}
}
lineParts.push(word);
textIndex = wordIndex;
}
output.push(lineParts.join(','));
}
}
}
}
fs.writeFileSync(`${files[fileIndex]}.csv`, output.join('\n'));
});
pdfParser.loadPDF(files[fileIndex]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment