Skip to content

Instantly share code, notes, and snippets.

@h4ck3rm1k3
Created June 30, 2018 09:56
Show Gist options
  • Save h4ck3rm1k3/a9dfd904c8fb217116cb17a9e5dff1c5 to your computer and use it in GitHub Desktop.
Save h4ck3rm1k3/a9dfd904c8fb217116cb17a9e5dff1c5 to your computer and use it in GitHub Desktop.
Parse Forclosure Data from softcode
# for parsing the output of https://www.tylertech.com/solutions-products/softcode-product-suite
# parses the forclosure pdf output and produces an csv file with the location, amount owed, lender and auction date
# first run pdf2html and then this script
import bs4
import pprint
import re
import csv
f=open("sheriff_foreclosuresales_ls.html")
lines=[]
for x in f:
lines.append(x)
objects= {}
junk = [
'<br/>',
'<br>',
'</br>',
'\n',
'<b>',
'</b>',
'&#160;'
]
def clean(value):
for j in junk:
value = value.replace(j,'')
return value
for key in ('Location','Current Sale','Plaintiff') :
n = 0
for x in xrange(1,len(lines)):
if key not in lines[x]:
continue
n = n + 1
value = clean(lines[x+1])
if n not in objects:
aobject={
key : value
}
objects[n]=aobject
else:
nkey = key
if key == 'Plaintiff' :
if re.match('[\d+,\.]+$',value):
nkey = 'Owed'
value = float(value.replace(',',''))
# just grab the next line
objects[n]['Plaintiff']=clean(lines[x+2])
objects[n][nkey]=value
with open('results.csv', 'w') as csvfile:
fieldnames = ['Location', 'Owed', 'Current Sale', 'Plaintiff']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for x in objects:
writer.writerow(objects[x])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment