Skip to content

Instantly share code, notes, and snippets.

@lgrayland
Created February 24, 2017 16:56
Show Gist options
  • Save lgrayland/0cec5c278ea60bca386736614481121f to your computer and use it in GitHub Desktop.
Save lgrayland/0cec5c278ea60bca386736614481121f to your computer and use it in GitHub Desktop.
uses regex to find email addresses on a page
// Returns a list of emails for every page on the domains of the URL list
var EightyApp = function() {
this.processDocument = function(html, url, headers, status, jQuery) {
var app = this;
$ = jQuery;
var $html = app.parseHtml(html, $);
var object = {};
// Get emails
var emailList = [];
emailList = html.match(/[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9]+)*(\.[a-z]{2,})/gi);
object.emailList = emailList;
return JSON.stringify(object);
}
this.parseLinks = function(html, url, headers, status, jQuery) {
var app = this;
var $ = jQuery;
var $html = app.parseHtml(html, $);
var links = [];
var r = /:\/\/(.[^/]+)/;
var urlDomain = url.match(r)[1]
// gets all links in the html document
$html.find('a').each(function(i, obj) {
// console.log($(this).attr('href'));
var link = app.makeLink(url, $(this).attr('href'));
if (link != null) {
var linkDomain = link.match(r);
if (linkDomain && linkDomain.length > 1) {
linkDomain = linkDomain[1];
if (urlDomain == linkDomain)
links.push(link);
}
}
});
return links;
}
}
try {
// Testing
module.exports = function(EightyAppBase) {
EightyApp.prototype = new EightyAppBase();
return new EightyApp();
}
} catch(e) {
// Production
console.log("Eighty app exists.");
EightyApp.prototype = new EightyAppBase();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment