Created
July 1, 2017 09:03
-
-
Save ethanwillis/8982b9be65bd0c902e41276f19b2ff14 to your computer and use it in GitHub Desktop.
tnbear business scraper.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
// Load in a modern version of JQuery from Google CDN. | |
var script = document.createElement("SCRIPT"); | |
script.src = 'https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js'; | |
script.type = 'text/javascript'; | |
script.onload = function() { | |
var $ = window.jQuery; | |
}; | |
document.getElementsByTagName("head")[0].appendChild(script); | |
// Pull in all of the rows from the result set on the current page. | |
var businesses_rows = $('#ctl00_MainContent_SearchResultList table tbody .rowRegular, #ctl00_MainContent_SearchResultList table tbody .rowHighlight') | |
// Convert all of the rows into JSON | |
$.map(businesses_rows, function(i, node) { | |
business_json = { | |
more_info_url: "https://tnbear.tn.gov/Ecommerce/" + $(i).children("td").eq(0).children("a").attr("href"), | |
control_number: $(i).children("td").eq(0).text(), | |
entity_type: $(i).children("td").eq(1).text(), | |
name_type: $(i).children("td").eq(3).text(), | |
name_status: $(i).children("td").eq(4).text(), | |
entity_filing_date: $(i).children("td").eq(5).text(), | |
entity_status: $(i).children("td").eq(6).text() | |
}; | |
// Do an AJAX call to fetch the business details page and enrich each JSON object with more information. | |
$.get(business_json.more_info_url, function(data) { | |
// Parse the raw html data into a DOM tree to use with JQuery | |
dom_nodes = $.parseHTML(data) | |
business_json.entity_name = $('#ctl00_MainContent_txtName', dom_nodes).text(); | |
business_json.formed_in = $('#ctl00_MainContent_txtFormation', dom_nodes).text(); | |
business_json.fiscal_year_close = $('#ctl00_MainContent_txtFYC', dom_nodes).text(); | |
business_json.term_of_duration = $('#ctl00_MainContent_txtDuration', dom_nodes).text(); | |
business_json.principal_office = $('#ctl00_MainContent_txtOfficeAddresss', dom_nodes).text(); | |
business_json.mailing_address = $('#ctl00_MainContent_txtMailAddress', dom_nodes).text(); | |
business_json.ar_exempt = $('#ctl00_MainContent_txtARExempt', dom_nodes).text(); | |
business_json.managed_by = $('#ctl00_MainContent_txtOptional1', dom_nodes).text(); | |
business_json.initial_filing_date = $('#ctl00_MainContent_txtInitialDate', dom_nodes).text(); | |
business_json.delayed_effective_date = $('#ctl00_MainContent_txtDelayedDate', dom_nodes).text(); | |
business_json.ar_due_date = $('#ctl00_MainContent_txtARDueDate', dom_nodes).text(); | |
business_json.inactive_date = $('#ctl00_MainContent_txtInactiveDate', dom_nodes).text(); | |
business_json.obligated_member_entity = $('#ctl00_MainContent_txtOME', dom_nodes).text(); | |
business_json.number_of_members = $('#ctl00_MainContent_txtOptional2', dom_nodes).text(); | |
console.log(JSON.stringify(business_json)) | |
}); | |
}); | |
})(); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment