Skip to content

Instantly share code, notes, and snippets.

@iamsk
Created January 19, 2019 06:59
Show Gist options
  • Save iamsk/16bee6e2bae7e1520b6f9930dc64f928 to your computer and use it in GitHub Desktop.
Save iamsk/16bee6e2bae7e1520b6f9930dc64f928 to your computer and use it in GitHub Desktop.
#!-*- coding:utf-8 -*-
import re
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
from pdfrw import PdfReader
class PDFSignedInfo(object):
def use_pdfminer(self, filename):
fp = open(filename, 'rb')
parser = PDFParser(fp)
doc = PDFDocument(parser)
fields = resolve1(doc.catalog['AcroForm'])['Fields']
signed_main_body_list = []
for i in fields:
field = resolve1(i)
name, value = field.get('T'), field.get('V')
# print '{0}: {1}'.format(name, value)
c = value.resolve()['Contents']
signed_main_body_list.append(self.get_main_body_name(c))
return signed_main_body_list
def get_main_body(self, filename):
x = PdfReader(filename)
fields = x.Root.AcroForm.Fields if x.Root.AcroForm else []
signed_main_body_list = []
for field in fields:
contents = field.V.Contents
data = contents.to_bytes()
signed_main_body_list.append(self.get_main_body_name(data))
return signed_main_body_list
@classmethod
def get_main_body_name(cls, data):
obj = re.search(r'[@&]([^@&]*公司)', data)
company = obj and obj.group() and obj.group(1)
if not company:
obj = re.search(r'@([^@&]*)@', data)
user = obj and obj.group() and obj.group(1)
return user
return company
if __name__ == '__main__':
p = PDFSignedInfo()
# print p.use_pdfminer('a.pdf')
print p.get_main_body('c.pdf')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment