- Download Tabula and import all the PDF you want to convert.
- Select the table(s) you want to export without paying attention to header and/or footer.
- Run the exported
csv
s through the following script:const parse = require('csv-parse') const read = require('fs').readFileSync; parse(read(process.argv[2]).toString(), {delimiter: ','}, (err, data) => { const output = data.map((row) => { return { date: row[0], description: row[1], moneyIn: row[2], moneyOut: (row.length === 4) ? '' : row[3] }; }) .reduce((acc, currentRow) => { 'use strict'; let lastRow = acc[acc.length - 1]; if (!lastRow) { acc.push(currentRow); return acc; } if (currentRow.date === '' && currentRow.moneyIn === '' && currentRow.moneyOut === '') { lastRow.description += currentRow.description; } else { if (currentRow.date === '') currentRow.date = lastRow.date; acc.push(currentRow); } return acc; }, []) .map((row) => { return `"${row.date}","${row.description}","${row.moneyIn}","${row.moneyOut}"`; }) .join('\n') console.log(output); })
- import the files into Google Spreadsheet.
Created
October 3, 2015 10:24
-
-
Save danielepolencic/78d29fa60ea7b8bbfbd4 to your computer and use it in GitHub Desktop.
Barclays PDF extraction
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment