Skip to content

Instantly share code, notes, and snippets.

@B44ken
Last active April 26, 2020 02:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save B44ken/037f802490129f8fb7c2f0c8c326f8ce to your computer and use it in GitHub Desktop.
Save B44ken/037f802490129f8fb7c2f0c8c326f8ce to your computer and use it in GitHub Desktop.
bountify.co/xt
import sys
from tika import parser
def get_content(file): # parse the file
content = parser.from_file(file)['content']
groups = content.split("-" * 105)[1:]
results = []
for group in groups:
results += [parse_group(group)]
return results
def parse_group(group): # parse all proposals from one company
group = group.split("SIGNATURES")[0]
lines = group.split("\n")
content = {
'issuer': lines[2][9:].strip(),
'ticker': lines[3].split(" ")[2],
'cusip': lines[3].split(" ")[4],
'meetingDate': lines[4].split(" ")[3],
'proposals': []
}
for proposal in lines[7:]:
current = parse_proposal(proposal)
if(current is not None):
content['proposals'] += [current]
return content
def parse_proposal(proposal): # parse an individual proposal
words = proposal.split(" ")
if(len(words) < 5):
return None
prop_content = {
'voted': words[-2],
'voteCast': words[-3],
'forAgainstMgmt': words[-4],
'proposal': " ".join(words[:-4]),
'proposedBy': words[-5]
}
return prop_content
if(len(sys.argv) > 1):
result = get_content(sys.argv[1])
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment