Created
October 9, 2017 08:52
-
-
Save wynemo/c66ce792c0ca11da9690f4829761e959 to your computer and use it in GitHub Desktop.
计算某目录下电子发票总金额
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#coding:utf-8 | |
#sudo yum install poppler-utils | |
import os | |
import re | |
import subprocess | |
from decimal import * | |
def scrape_text(src): | |
return subprocess.check_output(['pdftotext', src, '-', '-q']) | |
def main(): | |
all_value = Decimal(0) | |
all_nums = set() | |
for src in os.listdir('.'): | |
if os.path.isfile(src) and src.endswith('.pdf'): | |
text = scrape_text(src) | |
num = None | |
o = re.search('发票号码\s*(:|:)\s*(\d+)', text, re.M) | |
if o is not None: | |
num = o.group(2) | |
if num in all_nums: | |
print src, 'already exists' | |
continue | |
else: | |
all_nums.add(num) | |
else: | |
print src, 'error get tax num --------------' | |
continue | |
o = re.search('([(]|()小写([)]|))\s*(¥|¥)?\s*(\d+[.]\d+)', text, re.M) | |
if o is None: | |
print src, 'error get tax value --------------' | |
continue | |
m = Decimal(o.group(4)) | |
print src, num, m | |
all_value += m | |
print 'total is', all_value | |
raw_input('plz input enter to close this') | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment