Skip to content

Instantly share code, notes, and snippets.

@Mte90
Last active March 7, 2023 10:14
Show Gist options
  • Save Mte90/a67b870a106eb3243e8024f701f3b987 to your computer and use it in GitHub Desktop.
Save Mte90/a67b870a106eb3243e8024f701f3b987 to your computer and use it in GitHub Desktop.
Calcolo buste paghe da PDF generati da Zucchetti
#!/usr/bin/env python3
from io import StringIO
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser
import sys
import os
if len(sys.argv) < 3:
cedolinipdf = [filename for filename in os.listdir(sys.argv[1]) if filename.startswith("CEDOLINI")]
modf24pdf = [filename for filename in os.listdir(sys.argv[1]) if filename.startswith("MOD.F24")]
if cedolinipdf == '' and modf24pdf == '':
print('Primo parametro Cedolini e poi MOD F24')
sys.exit()
else:
cedolinipdf = sys.argv[1] + cedolinipdf[0]
modf24pdf = sys.argv[1] + modf24pdf[0]
print('File trovati!')
else:
cedolinipdf = sys.argv[1]
modf24pdf = sys.argv[2]
# Extract the content from PDF
output_string = StringIO()
with open(cedolinipdf, 'rb') as in_file:
parser = PDFParser(in_file)
doc = PDFDocument(parser)
rsrcmgr = PDFResourceManager()
device = TextConverter(rsrcmgr, output_string, laparams=LAParams())
interpreter = PDFPageInterpreter(rsrcmgr, device)
for page in PDFPage.create_pages(doc):
interpreter.process_page(page)
# I don't know why the output is on this variable
output_string = output_string.getvalue().splitlines()
next_is_netto = False
next_3_lines_competenze = -1
netto = []
competenze = []
for line in output_string:
if line.strip() == 'T.F.R.':
next_3_lines_competenze = 5
if next_is_netto and line != '':
if line.strip()[0].isdigit():
netto.append(float(line.strip().replace('.', '').replace(',', '.')))
next_is_netto = False
if next_3_lines_competenze > 0:
next_3_lines_competenze = next_3_lines_competenze - 1
line = line.strip().replace('.', '').replace(',', '.')
if next_3_lines_competenze == 0 and len(line) > 0:
competenze.append(float(line))
next_3_lines_competenze = -1
if line == 'N E T T O DEL M E S E' or line == 'ARROTONDAMENTO':
next_is_netto = True
if line.strip() == 'Imp. INPS Imp. INAIL Imp. IRPEF IRPEF pagata':
next_3_lines_competenze = 3
# Extract the content from PDF
output_string = StringIO()
with open(modf24pdf, 'rb') as in_file:
parser = PDFParser(in_file)
doc = PDFDocument(parser)
rsrcmgr = PDFResourceManager()
device = TextConverter(rsrcmgr, output_string, laparams=LAParams())
interpreter = PDFPageInterpreter(rsrcmgr, device)
for page in PDFPage.create_pages(doc):
interpreter.process_page(page)
output_string = output_string.getvalue().splitlines()
saldo = ''
next_is_h = 0
for line in output_string:
if '.' in line and line[0].isdigit():
saldo = float(line.strip().replace(' ', ',').replace('.', '').replace(',', '.'))
break
if line != '' and line[0] == 'H':
next_is_h = 5
if next_is_h > 0:
next_is_h = next_is_h - 1
if next_is_h == 0 and len(line) > 0:
saldo = float(line.strip().replace(' ', '.'))
break
if 'ON-LINE INTERMED Scad.' in line:
print('Scadenza: ' + line.strip().replace('ON-LINE INTERMED Scad.', '').replace(' ', '/'))
if saldo == '':
saldo = float(input('Il dato (saldo) non era presente nel PDF inseriscilo manualmente con la virgola: ').replace(
' ', ',').replace('.', '').replace(',', '.'))
primo = netto[0] + netto[1] + saldo
secondo = competenze[0] + competenze[1]
dolce = primo - secondo
print('Netto: ' + str(netto))
print('Competenze: ' + str(competenze))
print('Saldo F24: ' + str(saldo))
print('Somma netti e saldo finale: ' + str(round(primo, 2)))
print('Tasse: ' + str(round(dolce, 2)))
print('Importo Netto: ' + str(netto[0] + netto[1]).replace('.', ','))
print('Importo Lordo: ' + str(secondo).replace('.', ','))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment