Skip to content

Instantly share code, notes, and snippets.

@danielyaa5
Created September 26, 2017 05:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danielyaa5/e3f466dfa1c921d066418ebcda39992e to your computer and use it in GitHub Desktop.
Save danielyaa5/e3f466dfa1c921d066418ebcda39992e to your computer and use it in GitHub Desktop.
Steemit Scraper
const util = require('util');
const request = require('request-promise');
const cheerio = require('cheerio');
const imageDataURI = require('image-data-uri')
const _ = require('lodash');
function getImgUris(urls) {
return Promise.all(urls.map(imageDataURI.encodeFromURL));
}
async function fetchPost(postUrl) {
const html = await request(postUrl);
const $ = cheerio.load(html);
const meta = JSON.parse($('table > tbody > tr:nth-child(1) > td').text());
const body = $('div.md > div > pre').text();
const author = $('div.post-head-bar > div > a').text();
const url = $('div.post-head-bar > a').attr('href');
const title = $('div h3:first-of-type').text();
return { meta, body, author, url, title };
}
async function getPost(url, opts) {
opts = opts || {};
const rawPost = await fetchPost(url);
console.log(rawPost);
if (opts.imageUris) rawPost.meta.imageUri = await getImgUris(rawPost.meta.image);
return rawPost;
}
module.exports = {
getPost,
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment