Skip to content

Instantly share code, notes, and snippets.

@ryanhallcs
Created August 8, 2016 12:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryanhallcs/8bef9a25ac7e5e81b37df297909ed890 to your computer and use it in GitHub Desktop.
Save ryanhallcs/8bef9a25ac7e5e81b37df297909ed890 to your computer and use it in GitHub Desktop.
portion python script of selenium crawler
def search_record_of_deeds_pin(self, rawPin, fileLock):
driver = self.driver
driver.delete_all_cookies()
driver.get(self.base_url + "/i2/default.aspx?AspxAutoDetectCookieSupport=1")
pin = rawPin.split("-")
log("Collecting data for PIN {}".format(rawPin))
# Enter pin and search
for i in range(5):
elemName = "SearchFormEx1_PINTextBox" + str(i)
driver.find_element_by_id(elemName).send_keys(pin[i])
driver.find_element_by_id("SearchFormEx1_btnSearch").click()
# Get all result rows
searchResults = driver.find_elements_by_class_name("DataGridRow") + driver.find_elements_by_class_name("DataGridAlternatingRow")
jsDocLinks = []
# Iterate each row, and extract the necessary javascript to run to get each document's details
for element in searchResults:
docTypeChild = element.find_element_by_xpath('.//td[4]/a')
docType = docTypeChild.text
# For now, just grab MORTGAGEs and WARRENTY DEEDs
if ("MORTGAGE" in docType) or ("WARRANTY DEED" in docType):
attr = docTypeChild.get_attribute('href').replace('javascript:', '') + ';'
docNumber = element.find_element_by_xpath('.//td[5]/a').text
result = {}
result['link'] = attr
result['docNumber'] = docNumber
result['docType'] = docType
jsDocLinks.append(result)
deeds = []
# For each relevant row, extract the rest of the details
for document in jsDocLinks:
result = driver.execute_script(str(document['link']))
self.waitForIdTextToMatch('DocDetails1_GridView_Details_ctl02_ctl00', document['docNumber'])
newRecord = DeedRecord("-".join(pin), document['docNumber'], document['docType'])
newRecord.executedDate = parse(self.getTextFromId('DocDetails1_GridView_Details_ctl02_ctl01', ''))
newRecord.recordedDate = parse(self.getTextFromId('DocDetails1_GridView_Details_ctl02_ctl02', ''))
newRecord.amount = self.getTextFromId('DocDetails1_GridView_Details_ctl02_ctl05', '')
# Grantors and grantees take a little more finesse
grantElement = driver.find_element_by_id('DocDetails1_GrantorGrantee_Table')
numGrantors = grantElement.find_element_by_xpath('.//tbody/tr[1]/td/span').text
numGrantees = grantElement.find_element_by_xpath('.//tbody/tr[3]/td/span').text
for i in range(int(numGrantors[len(numGrantors)-1])):
newRecord.grantors.append(self.getTextFromId('DocDetails1_GridView_Grantor_ctl0{}_ctl00'.format(str(2 + i)), ''))
for i in range(int(numGrantees[len(numGrantees)-1])):
newRecord.grantees.append(self.getTextFromId('DocDetails1_GridView_Grantee_ctl0{}_ctl00'.format(str(2 + i)), ''))
deeds.append(newRecord)
# Sort and save to a csv file
deeds.sort(key=lambda x: x.executedDate)
self.outputToCsv(deeds, fileLock)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment