danielepolencic/barclays_pdf_extraction.md

## barclays_pdf_extraction.md

      
    Raw
  

              barclays_pdf_extraction.md
            
          
    Barclays PDF Extraction


Download Tabula and import all the PDF you want to convert.
Select the table(s) you want to export without paying attention to header and/or footer.
Run the exported csvs through the following script:
const parse = require('csv-parse')
const read = require('fs').readFileSync;

parse(read(process.argv[2]).toString(), {delimiter: ','}, (err, data) => {
  const output = data.map((row) => {
    return {
      date: row[0],
      description: row[1],
      moneyIn: row[2],
      moneyOut: (row.length === 4) ? '' : row[3]
    };
  })
  .reduce((acc, currentRow) => {
    'use strict';
    let lastRow = acc[acc.length - 1];

    if (!lastRow) {
      acc.push(currentRow);
      return acc;
    }

    if (currentRow.date === '' && currentRow.moneyIn === '' && currentRow.moneyOut === '') {
      lastRow.description += currentRow.description;
    } else {
      if (currentRow.date === '') currentRow.date = lastRow.date;
      acc.push(currentRow);
    }

    return acc;
  }, [])
  .map((row) => {
    return `"${row.date}","${row.description}","${row.moneyIn}","${row.moneyOut}"`;
  })
  .join('\n')

  console.log(output);
})

import the files into Google Spreadsheet.