tnbear business scraper.
(function() { | |
// Load in a modern version of JQuery from Google CDN. | |
var script = document.createElement("SCRIPT"); | |
script.src = 'https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js'; | |
script.type = 'text/javascript'; | |
script.onload = function() { | |
var $ = window.jQuery; | |
}; | |
document.getElementsByTagName("head")[0].appendChild(script); | |
// Pull in all of the rows from the result set on the current page. | |
var businesses_rows = $('#ctl00_MainContent_SearchResultList table tbody .rowRegular, #ctl00_MainContent_SearchResultList table tbody .rowHighlight') | |
// Convert all of the rows into JSON | |
$.map(businesses_rows, function(i, node) { | |
business_json = { | |
more_info_url: "https://tnbear.tn.gov/Ecommerce/" + $(i).children("td").eq(0).children("a").attr("href"), | |
control_number: $(i).children("td").eq(0).text(), | |
entity_type: $(i).children("td").eq(1).text(), | |
name_type: $(i).children("td").eq(3).text(), | |
name_status: $(i).children("td").eq(4).text(), | |
entity_filing_date: $(i).children("td").eq(5).text(), | |
entity_status: $(i).children("td").eq(6).text() | |
}; | |
// Do an AJAX call to fetch the business details page and enrich each JSON object with more information. | |
$.get(business_json.more_info_url, function(data) { | |
// Parse the raw html data into a DOM tree to use with JQuery | |
dom_nodes = $.parseHTML(data) | |
business_json.entity_name = $('#ctl00_MainContent_txtName', dom_nodes).text(); | |
business_json.formed_in = $('#ctl00_MainContent_txtFormation', dom_nodes).text(); | |
business_json.fiscal_year_close = $('#ctl00_MainContent_txtFYC', dom_nodes).text(); | |
business_json.term_of_duration = $('#ctl00_MainContent_txtDuration', dom_nodes).text(); | |
business_json.principal_office = $('#ctl00_MainContent_txtOfficeAddresss', dom_nodes).text(); | |
business_json.mailing_address = $('#ctl00_MainContent_txtMailAddress', dom_nodes).text(); | |
business_json.ar_exempt = $('#ctl00_MainContent_txtARExempt', dom_nodes).text(); | |
business_json.managed_by = $('#ctl00_MainContent_txtOptional1', dom_nodes).text(); | |
business_json.initial_filing_date = $('#ctl00_MainContent_txtInitialDate', dom_nodes).text(); | |
business_json.delayed_effective_date = $('#ctl00_MainContent_txtDelayedDate', dom_nodes).text(); | |
business_json.ar_due_date = $('#ctl00_MainContent_txtARDueDate', dom_nodes).text(); | |
business_json.inactive_date = $('#ctl00_MainContent_txtInactiveDate', dom_nodes).text(); | |
business_json.obligated_member_entity = $('#ctl00_MainContent_txtOME', dom_nodes).text(); | |
business_json.number_of_members = $('#ctl00_MainContent_txtOptional2', dom_nodes).text(); | |
console.log(JSON.stringify(business_json)) | |
}); | |
}); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment