Created
August 8, 2016 12:43
-
-
Save ryanhallcs/8bef9a25ac7e5e81b37df297909ed890 to your computer and use it in GitHub Desktop.
portion python script of selenium crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def search_record_of_deeds_pin(self, rawPin, fileLock): | |
driver = self.driver | |
driver.delete_all_cookies() | |
driver.get(self.base_url + "/i2/default.aspx?AspxAutoDetectCookieSupport=1") | |
pin = rawPin.split("-") | |
log("Collecting data for PIN {}".format(rawPin)) | |
# Enter pin and search | |
for i in range(5): | |
elemName = "SearchFormEx1_PINTextBox" + str(i) | |
driver.find_element_by_id(elemName).send_keys(pin[i]) | |
driver.find_element_by_id("SearchFormEx1_btnSearch").click() | |
# Get all result rows | |
searchResults = driver.find_elements_by_class_name("DataGridRow") + driver.find_elements_by_class_name("DataGridAlternatingRow") | |
jsDocLinks = [] | |
# Iterate each row, and extract the necessary javascript to run to get each document's details | |
for element in searchResults: | |
docTypeChild = element.find_element_by_xpath('.//td[4]/a') | |
docType = docTypeChild.text | |
# For now, just grab MORTGAGEs and WARRENTY DEEDs | |
if ("MORTGAGE" in docType) or ("WARRANTY DEED" in docType): | |
attr = docTypeChild.get_attribute('href').replace('javascript:', '') + ';' | |
docNumber = element.find_element_by_xpath('.//td[5]/a').text | |
result = {} | |
result['link'] = attr | |
result['docNumber'] = docNumber | |
result['docType'] = docType | |
jsDocLinks.append(result) | |
deeds = [] | |
# For each relevant row, extract the rest of the details | |
for document in jsDocLinks: | |
result = driver.execute_script(str(document['link'])) | |
self.waitForIdTextToMatch('DocDetails1_GridView_Details_ctl02_ctl00', document['docNumber']) | |
newRecord = DeedRecord("-".join(pin), document['docNumber'], document['docType']) | |
newRecord.executedDate = parse(self.getTextFromId('DocDetails1_GridView_Details_ctl02_ctl01', '')) | |
newRecord.recordedDate = parse(self.getTextFromId('DocDetails1_GridView_Details_ctl02_ctl02', '')) | |
newRecord.amount = self.getTextFromId('DocDetails1_GridView_Details_ctl02_ctl05', '') | |
# Grantors and grantees take a little more finesse | |
grantElement = driver.find_element_by_id('DocDetails1_GrantorGrantee_Table') | |
numGrantors = grantElement.find_element_by_xpath('.//tbody/tr[1]/td/span').text | |
numGrantees = grantElement.find_element_by_xpath('.//tbody/tr[3]/td/span').text | |
for i in range(int(numGrantors[len(numGrantors)-1])): | |
newRecord.grantors.append(self.getTextFromId('DocDetails1_GridView_Grantor_ctl0{}_ctl00'.format(str(2 + i)), '')) | |
for i in range(int(numGrantees[len(numGrantees)-1])): | |
newRecord.grantees.append(self.getTextFromId('DocDetails1_GridView_Grantee_ctl0{}_ctl00'.format(str(2 + i)), '')) | |
deeds.append(newRecord) | |
# Sort and save to a csv file | |
deeds.sort(key=lambda x: x.executedDate) | |
self.outputToCsv(deeds, fileLock) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment