Skip to content

Instantly share code, notes, and snippets.

@wynemo
Created October 9, 2017 08:52
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save wynemo/c66ce792c0ca11da9690f4829761e959 to your computer and use it in GitHub Desktop.
Save wynemo/c66ce792c0ca11da9690f4829761e959 to your computer and use it in GitHub Desktop.
计算某目录下电子发票总金额
#!/usr/bin/env python
#coding:utf-8
#sudo yum install poppler-utils
import os
import re
import subprocess
from decimal import *
def scrape_text(src):
return subprocess.check_output(['pdftotext', src, '-', '-q'])
def main():
all_value = Decimal(0)
all_nums = set()
for src in os.listdir('.'):
if os.path.isfile(src) and src.endswith('.pdf'):
text = scrape_text(src)
num = None
o = re.search('发票号码\s*(:|:)\s*(\d+)', text, re.M)
if o is not None:
num = o.group(2)
if num in all_nums:
print src, 'already exists'
continue
else:
all_nums.add(num)
else:
print src, 'error get tax num --------------'
continue
o = re.search('([(]|()小写([)]|))\s*(¥|¥)?\s*(\d+[.]\d+)', text, re.M)
if o is None:
print src, 'error get tax value --------------'
continue
m = Decimal(o.group(4))
print src, num, m
all_value += m
print 'total is', all_value
raw_input('plz input enter to close this')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment