Last active
January 30, 2023 11:22
-
-
Save prashant1k99/db768c73a752428157f2de4e772e1c4a to your computer and use it in GitHub Desktop.
Reads all PDF from Dir and Generates CSV for Income Tax
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fs from 'fs' | |
import PDFParser from "pdf2json"; | |
import { parseAsync } from 'json2csv' | |
const BillsTotal = [] | |
const [_, __, path, portal] = process.argv | |
const config = { | |
amount: portal === 'zomato' ? "Total" : "Invoice%20Total", | |
billNo: "Order%20ID%3A", | |
date: portal === 'zomato' ? "Order%20Time%3A" : "Date%20of%20Invoice%3A", | |
} | |
const analyzePdf = (file) => new Promise((resolve, reject) => { | |
const pdfParser = new PDFParser(); | |
pdfParser.on("pdfParser_dataError", errData => reject(errData.parserError) ); | |
pdfParser.on("pdfParser_dataReady", pdfData => { | |
const data = pdfData.Pages[0].Texts | |
const indexOfInvoice = data.findIndex(el => el.R[0].T === config.amount) + 1 | |
const indexOfBillNo = data.findIndex(el => el.R[0].T === config.billNo) + 1 | |
const indexOfDate = data.findIndex(el => el.R[0].T === config.date) + 1 | |
resolve({ | |
amount: decodeURIComponent(data[indexOfInvoice].R[0].T), | |
bill: decodeURIComponent(data[indexOfBillNo].R[0].T), | |
date: decodeURIComponent(data[indexOfDate].R[0].T) | |
}) | |
}); | |
pdfParser.loadPDF(path + '/' + file) | |
}) | |
const convertJSON2CSV = async (data) => { | |
const fields = [{ | |
label: 'Date', | |
value: 'date', | |
default: 'NULL' | |
}, { | |
label: 'Bill No. & Date', | |
value: 'bill', | |
default: 'NULL' | |
}, { | |
label: 'Entertainment Amount', | |
value: 'amount', | |
default: 'NULL' | |
}, { | |
label: 'Name of Establishment', | |
value: 'name', | |
default: 'Swiggy' | |
}]; | |
const opts = { fields }; | |
parseAsync(data, opts).then(csv => { | |
fs.writeFile(`./${portal}.csv`, csv, err => { | |
if (err) { | |
console.error(err) | |
return | |
} | |
}) | |
}).catch(err => console.error(err)) | |
} | |
fs.readdir(path, async (err, files) => { | |
if (err) { | |
return console.log('Unable to scan directory: ' + err); | |
} | |
for (const file of files) { | |
console.log(file) | |
BillsTotal.push(await analyzePdf(file)) | |
} | |
await convertJSON2CSV(BillsTotal) | |
console.log("Done") | |
}) | |
// To call this script execute: node <file>.js <folderPath: ../Swiggy> <swiggy | zomato> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment