Skip to content

Instantly share code, notes, and snippets.

@prashant1k99
Last active January 30, 2023 11:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save prashant1k99/db768c73a752428157f2de4e772e1c4a to your computer and use it in GitHub Desktop.
Save prashant1k99/db768c73a752428157f2de4e772e1c4a to your computer and use it in GitHub Desktop.
Reads all PDF from Dir and Generates CSV for Income Tax
import fs from 'fs'
import PDFParser from "pdf2json";
import { parseAsync } from 'json2csv'
const BillsTotal = []
const [_, __, path, portal] = process.argv
const config = {
amount: portal === 'zomato' ? "Total" : "Invoice%20Total",
billNo: "Order%20ID%3A",
date: portal === 'zomato' ? "Order%20Time%3A" : "Date%20of%20Invoice%3A",
}
const analyzePdf = (file) => new Promise((resolve, reject) => {
const pdfParser = new PDFParser();
pdfParser.on("pdfParser_dataError", errData => reject(errData.parserError) );
pdfParser.on("pdfParser_dataReady", pdfData => {
const data = pdfData.Pages[0].Texts
const indexOfInvoice = data.findIndex(el => el.R[0].T === config.amount) + 1
const indexOfBillNo = data.findIndex(el => el.R[0].T === config.billNo) + 1
const indexOfDate = data.findIndex(el => el.R[0].T === config.date) + 1
resolve({
amount: decodeURIComponent(data[indexOfInvoice].R[0].T),
bill: decodeURIComponent(data[indexOfBillNo].R[0].T),
date: decodeURIComponent(data[indexOfDate].R[0].T)
})
});
pdfParser.loadPDF(path + '/' + file)
})
const convertJSON2CSV = async (data) => {
const fields = [{
label: 'Date',
value: 'date',
default: 'NULL'
}, {
label: 'Bill No. & Date',
value: 'bill',
default: 'NULL'
}, {
label: 'Entertainment Amount',
value: 'amount',
default: 'NULL'
}, {
label: 'Name of Establishment',
value: 'name',
default: 'Swiggy'
}];
const opts = { fields };
parseAsync(data, opts).then(csv => {
fs.writeFile(`./${portal}.csv`, csv, err => {
if (err) {
console.error(err)
return
}
})
}).catch(err => console.error(err))
}
fs.readdir(path, async (err, files) => {
if (err) {
return console.log('Unable to scan directory: ' + err);
}
for (const file of files) {
console.log(file)
BillsTotal.push(await analyzePdf(file))
}
await convertJSON2CSV(BillsTotal)
console.log("Done")
})
// To call this script execute: node <file>.js <folderPath: ../Swiggy> <swiggy | zomato>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment