Created
February 6, 2019 00:25
-
-
Save geotheory/cea66b3bf100bb98fab39c0c4768ae1b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let Readability = require('readability'); | |
var fs = require('fs') | |
var JSDOM = require('jsdom').JSDOM; | |
var url = 'https://www.example.com/the-page-i-got-the-source-from'; | |
var filename = '~/Downloads/test.html'; | |
//------------------------------------------------------------------ | |
// FROM FILE | |
var txt0; | |
fs.readFile(filename, 'utf8', function(err, html){ | |
if (err) throw err; | |
txt0 = html; | |
}); | |
var doc = new JSDOM(txt0, { url: url }); | |
let reader = new Readability(doc.window.document); | |
let article = reader.parse(); | |
reader = new Readability(doc.window.document); | |
article = reader.parse(); | |
article.siteName | |
article.title | |
article.byline | |
article.content.replace(/\n/gi, '') | |
//------------------------------------------------------------------- | |
// FROM URL | |
const getScript = (url) => { | |
return new Promise((resolve, reject) => { | |
const http = require('http'), | |
https = require('https'); | |
let client = http; | |
if (url.toString().indexOf("https") === 0) { | |
client = https; | |
} | |
client.get(url, (resp) => { | |
let data = ''; | |
// A chunk of data has been recieved. | |
resp.on('data', (chunk) => { | |
data += chunk; | |
}); | |
// The whole response has been received. Print out the result. | |
resp.on('end', () => { | |
resolve(data); | |
}); | |
}).on("error", (err) => { | |
reject(err); | |
}); | |
}); | |
}; | |
// (async (url) => { | |
// txt = getScript(url); | |
// })(url); | |
var txt0 = getScript(url); | |
var txt; | |
txt0.then(function(result){ txt = result; }); | |
var doc = new JSDOM(txt, { url: url }); | |
reader = new Readability(doc.window.document); | |
article = reader.parse(); | |
article.siteName | |
article.title | |
article.byline | |
article.content.replace(/\n/gi, '') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment