Skip to content

Instantly share code, notes, and snippets.

@ethanwillis
Created July 1, 2017 09:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ethanwillis/8982b9be65bd0c902e41276f19b2ff14 to your computer and use it in GitHub Desktop.
Save ethanwillis/8982b9be65bd0c902e41276f19b2ff14 to your computer and use it in GitHub Desktop.
tnbear business scraper.
(function() {
// Load in a modern version of JQuery from Google CDN.
var script = document.createElement("SCRIPT");
script.src = 'https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js';
script.type = 'text/javascript';
script.onload = function() {
var $ = window.jQuery;
};
document.getElementsByTagName("head")[0].appendChild(script);
// Pull in all of the rows from the result set on the current page.
var businesses_rows = $('#ctl00_MainContent_SearchResultList table tbody .rowRegular, #ctl00_MainContent_SearchResultList table tbody .rowHighlight')
// Convert all of the rows into JSON
$.map(businesses_rows, function(i, node) {
business_json = {
more_info_url: "https://tnbear.tn.gov/Ecommerce/" + $(i).children("td").eq(0).children("a").attr("href"),
control_number: $(i).children("td").eq(0).text(),
entity_type: $(i).children("td").eq(1).text(),
name_type: $(i).children("td").eq(3).text(),
name_status: $(i).children("td").eq(4).text(),
entity_filing_date: $(i).children("td").eq(5).text(),
entity_status: $(i).children("td").eq(6).text()
};
// Do an AJAX call to fetch the business details page and enrich each JSON object with more information.
$.get(business_json.more_info_url, function(data) {
// Parse the raw html data into a DOM tree to use with JQuery
dom_nodes = $.parseHTML(data)
business_json.entity_name = $('#ctl00_MainContent_txtName', dom_nodes).text();
business_json.formed_in = $('#ctl00_MainContent_txtFormation', dom_nodes).text();
business_json.fiscal_year_close = $('#ctl00_MainContent_txtFYC', dom_nodes).text();
business_json.term_of_duration = $('#ctl00_MainContent_txtDuration', dom_nodes).text();
business_json.principal_office = $('#ctl00_MainContent_txtOfficeAddresss', dom_nodes).text();
business_json.mailing_address = $('#ctl00_MainContent_txtMailAddress', dom_nodes).text();
business_json.ar_exempt = $('#ctl00_MainContent_txtARExempt', dom_nodes).text();
business_json.managed_by = $('#ctl00_MainContent_txtOptional1', dom_nodes).text();
business_json.initial_filing_date = $('#ctl00_MainContent_txtInitialDate', dom_nodes).text();
business_json.delayed_effective_date = $('#ctl00_MainContent_txtDelayedDate', dom_nodes).text();
business_json.ar_due_date = $('#ctl00_MainContent_txtARDueDate', dom_nodes).text();
business_json.inactive_date = $('#ctl00_MainContent_txtInactiveDate', dom_nodes).text();
business_json.obligated_member_entity = $('#ctl00_MainContent_txtOME', dom_nodes).text();
business_json.number_of_members = $('#ctl00_MainContent_txtOptional2', dom_nodes).text();
console.log(JSON.stringify(business_json))
});
});
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment