Created
June 30, 2018 09:56
-
-
Save h4ck3rm1k3/a9dfd904c8fb217116cb17a9e5dff1c5 to your computer and use it in GitHub Desktop.
Parse Forclosure Data from softcode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for parsing the output of https://www.tylertech.com/solutions-products/softcode-product-suite | |
# parses the forclosure pdf output and produces an csv file with the location, amount owed, lender and auction date | |
# first run pdf2html and then this script | |
import bs4 | |
import pprint | |
import re | |
import csv | |
f=open("sheriff_foreclosuresales_ls.html") | |
lines=[] | |
for x in f: | |
lines.append(x) | |
objects= {} | |
junk = [ | |
'<br/>', | |
'<br>', | |
'</br>', | |
'\n', | |
'<b>', | |
'</b>', | |
' ' | |
] | |
def clean(value): | |
for j in junk: | |
value = value.replace(j,'') | |
return value | |
for key in ('Location','Current Sale','Plaintiff') : | |
n = 0 | |
for x in xrange(1,len(lines)): | |
if key not in lines[x]: | |
continue | |
n = n + 1 | |
value = clean(lines[x+1]) | |
if n not in objects: | |
aobject={ | |
key : value | |
} | |
objects[n]=aobject | |
else: | |
nkey = key | |
if key == 'Plaintiff' : | |
if re.match('[\d+,\.]+$',value): | |
nkey = 'Owed' | |
value = float(value.replace(',','')) | |
# just grab the next line | |
objects[n]['Plaintiff']=clean(lines[x+2]) | |
objects[n][nkey]=value | |
with open('results.csv', 'w') as csvfile: | |
fieldnames = ['Location', 'Owed', 'Current Sale', 'Plaintiff'] | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
writer.writeheader() | |
for x in objects: | |
writer.writerow(objects[x]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment