Skip to content

Instantly share code, notes, and snippets.

@alshdavid
Last active November 17, 2023 21:25
Show Gist options
  • Save alshdavid/7626bac4716bdcd52cf0a233ea205b6b to your computer and use it in GitHub Desktop.
Save alshdavid/7626bac4716bdcd52cf0a233ea205b6b to your computer and use it in GitHub Desktop.
CBA PDF statement converter to PDF or CSV
"use strict";var t=this&&this.__importDefault||function(t){return t&&t.__esModule?t:{default:t}};Object.defineProperty(exports,"__esModule",{value:!0}),exports.parsePDF=void 0;const e=t(require("fs")),n=t(require("pdf-parse")),s=require("json2csv");async function r(t,r){const l=e.default.readFileSync(t),o=(await n.default(l)).text.split("TransactionDebitCreditBalance");o.shift();let a=[],c=0;for(let t of o){const{results:e,finalBalance:n}=i(t,c);c=n,a=[...a,...e]}if(r&&r.endsWith(".json"))e.default.writeFileSync(r,JSON.stringify(a,null,2));else if(r&&r.endsWith(".csv")){const t=s.parse(a);e.default.writeFileSync(r,t)}else console.log(a)}function i(t,e=0){const{openingBalance:n,firstPassResults:s}=function(t){const e=t.split("\n");let n;e.shift();const s=[];if(function(t){return t.includes("OPENING BALANCE")}(e[0])){n=l((e.shift()||"").split("$")[1])}let r=!1;for(const t of e)if(!0!==r){if(""===t)break;if(t.includes("CLOSING BALANCE"))break;t.includes("CREDIT INTEREST EARNED")?r=!0:s.push(t)}else r=!1;return{firstPassResults:s,openingBalance:n}}(t);return function(t,e){const n=[];let s=e;for(const e of t){const t=e.substring(0,6);let[r,...i]=e.substring(6,e.length).split("$");const c=o(i[0]),u=l(i[1]),f=u-s;let g=0;if(0===c){let e=a(f);e.startsWith("-")&&(e=e.substring(1,e.length));e!==r.substring(r.length-e.length,r.length)&&(console.log("WrongDebitCalc"),console.log({newBalance:u-Math.abs(f),diff:Math.abs(f),ds:e,date:t,label:r,debit:g,credit:c,balance:u})),g=Math.abs(f),r=r.substring(0,r.length-e.length)}n.push({date:t,label:r,debit:g,credit:c,balance:u}),s=u}return{results:n,finalBalance:s}}(function(t){const e=[];let n="";for(const s of t){1===s.split("$").length?n+=s:(e.push(n+s),n="")}return e}(s),n||e)}function l(t){if(t.includes("CR"))return o(t.split("CR")[0]);if(t.includes("DR"))return-Math.abs(o(t.split("DR")[0]));throw"No Balance"}function o(t){return t?parseInt(t.replace(/,/g,"").replace(/\./g,"")):0}function a(t){const e=t.toString().startsWith("-")?"-":"",n=t.toString().replace("-","");if(1===n.length)return e+"0.0"+n;if(2===n.length)return e+"0."+n;const s=n.substring(0,n.length-2),r=n.substring(n.length-2,n.length);return parseInt(s).toLocaleString()+"."+r}process.argv[2]&&r(process.argv[2],process.argv[3]),exports.parsePDF=r
Usage
Contact alshdavid@gmail.com for help
Download and install node from https://nodejs.org/en/download/
Copy the "index.js" script below to you computer.
Open up your terminal, cmd, or powershell.
run the following once:
npm install pdf-parse json2csv
Navigate to the directory then run the following for each statement:
node index.js ./statement.pdf ./output.csv
If you want to output a JSON file
node index.js ./statement.pdf ./output.json
import fs from 'fs'
import pdf from 'pdf-parse'
import { parse } from 'json2csv';
export type Cents = number
export type Record = {
date: string,
label: string,
debit: Cents,
credit: Cents,
balance: Cents
};
if (process.argv[2]) {
parsePDF(process.argv[2], process.argv[3])
}
export async function parsePDF(file: string, outputFile?: string){
const dataBuffer = fs.readFileSync(file);
const data = await pdf(dataBuffer)
const vs = data.text.split('TransactionDebitCreditBalance')
vs.shift()
let output: Record[] = []
let previousBalance = 0
for (let v of vs) {
const { results, finalBalance } = processTable(v, previousBalance)
previousBalance = finalBalance
output = [ ...output, ...results ]
}
if (outputFile && outputFile.endsWith('.json')) {
fs.writeFileSync(outputFile, JSON.stringify(output, null, 2))
} else if (outputFile && outputFile.endsWith('.csv')) {
const csv = parse(output);
fs.writeFileSync(outputFile, csv)
} else {
console.log(output)
}
}
function isOpeningBalance(target: string) {
return target.includes('OPENING BALANCE')
}
function getTabletLines(target: string): { firstPassResults: string[], openingBalance: number | undefined } {
const l = target.split('\n')
l.shift()
let openingBalance: number | undefined
const p = []
if (isOpeningBalance(l[0])) {
const openingLine = l.shift() || ''
openingBalance = parseBalance(openingLine.split('$')[1])
}
let skip = false
for (const line of l) {
if (skip === true) {
skip = false
continue
}
if (line === '') {
break
}
if (line.includes('CLOSING BALANCE')) {
break
}
if (line.includes('CREDIT INTEREST EARNED')) {
skip = true
continue
}
p.push(line)
}
return {
firstPassResults: p,
openingBalance
}
}
function squashLines(target: string[]): string[] {
const p2: string[] = []
let temp = ''
for (const line of target) {
const v = line.split('$')
if (v.length === 1) {
temp += line
} else {
p2.push(temp + line)
temp = ''
}
}
return p2
}
function generateRecords(target: string[], initialBalance: number): { results: Record[], finalBalance: Cents } {
const p3: Record[] = []
let previousBalance = initialBalance
for (const line of target) {
const date = line.substring(0, 6)
let [ label, ...data ] = line.substring(6, line.length).split('$')
const credit = moneyToCents(data[0])
const balance = parseBalance(data[1])
const diff = balance - previousBalance
let debit = 0
if (credit === 0) {
let ds = centsToMoney(diff)
if (ds.startsWith('-')) {
ds = ds.substring(1, ds.length)
}
const diffInLabel = label.substring(label.length - ds.length, label.length)
if (ds !== diffInLabel) {
console.log('WrongDebitCalc')
console.log({
newBalance: balance - Math.abs(diff),
diff: Math.abs(diff),
ds,
date,
label,
debit,
credit,
balance
})
}
debit = Math.abs(diff)
label = label.substring(0, label.length - ds.length)
}
p3.push({
date,
label,
debit,
credit,
balance
})
previousBalance = balance
}
return {
results: p3,
finalBalance: previousBalance
}
}
function processTable(t: string, initialBalance: Cents = 0) {
const { openingBalance, firstPassResults } = getTabletLines(t)
const squashedLines = squashLines(firstPassResults)
return generateRecords(squashedLines, openingBalance || initialBalance)
}
function parseBalance(balance: string): Cents {
if (balance.includes('CR')) {
return moneyToCents(balance.split('CR')[0])
} if (balance.includes('DR')) {
return -Math.abs(moneyToCents(balance.split('DR')[0]))
}
throw 'No Balance'
}
function moneyToCents(money: string): Cents {
return money ? parseInt(money.replace(/,/g, '').replace(/\./g, '')) : 0
}
function centsToMoney(input: Cents): string {
const negative = input.toString().startsWith('-') ? '-' : ''
const iS = input.toString().replace('-', '')
if (iS.length === 1) {
return negative + '0.0'+ iS
}
if (iS.length === 2) {
return negative + '0.'+ iS
}
const dollars = iS.substring(0, iS.length - 2)
const cents = iS.substring(iS.length - 2, iS.length)
const localDollars = parseInt(dollars).toLocaleString()
return localDollars + '.' + cents
}
@terrynier
Copy link

You are a legend mate!

@alshdavid
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment