Created
February 24, 2017 16:56
-
-
Save lgrayland/0cec5c278ea60bca386736614481121f to your computer and use it in GitHub Desktop.
uses regex to find email addresses on a page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Returns a list of emails for every page on the domains of the URL list | |
var EightyApp = function() { | |
this.processDocument = function(html, url, headers, status, jQuery) { | |
var app = this; | |
$ = jQuery; | |
var $html = app.parseHtml(html, $); | |
var object = {}; | |
// Get emails | |
var emailList = []; | |
emailList = html.match(/[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9]+)*(\.[a-z]{2,})/gi); | |
object.emailList = emailList; | |
return JSON.stringify(object); | |
} | |
this.parseLinks = function(html, url, headers, status, jQuery) { | |
var app = this; | |
var $ = jQuery; | |
var $html = app.parseHtml(html, $); | |
var links = []; | |
var r = /:\/\/(.[^/]+)/; | |
var urlDomain = url.match(r)[1] | |
// gets all links in the html document | |
$html.find('a').each(function(i, obj) { | |
// console.log($(this).attr('href')); | |
var link = app.makeLink(url, $(this).attr('href')); | |
if (link != null) { | |
var linkDomain = link.match(r); | |
if (linkDomain && linkDomain.length > 1) { | |
linkDomain = linkDomain[1]; | |
if (urlDomain == linkDomain) | |
links.push(link); | |
} | |
} | |
}); | |
return links; | |
} | |
} | |
try { | |
// Testing | |
module.exports = function(EightyAppBase) { | |
EightyApp.prototype = new EightyAppBase(); | |
return new EightyApp(); | |
} | |
} catch(e) { | |
// Production | |
console.log("Eighty app exists."); | |
EightyApp.prototype = new EightyAppBase(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment