Skip to content

Instantly share code, notes, and snippets.

@dlwr
Last active July 5, 2016 15:44
Show Gist options
  • Save dlwr/c1da36161e6f151af5c3e2c897dd56db to your computer and use it in GitHub Desktop.
Save dlwr/c1da36161e6f151af5c3e2c897dd56db to your computer and use it in GitHub Desktop.
'use strict'
const fetch = require('node-fetch')
const client = require('cheerio-httpcli')
const BASE_URL = 'https://wikihub.io'
const COMMUNITIES_URL = BASE_URL + '/communities'
const TOKEN = '0b8c09f94a89e1ce3d0c1fbd35cd6ac327a104b3'
const paths = {}
function fetchPages(community, url) {
console.log(`begin fetch ${community} pages`)
return new Promise((resolve, reject) => {
function recursivelyFetch (community, url) {
let nextUrl
url || (url = `https://${community}.wikihub.io/api/v1/pages`)
paths[community] || (paths[community] = {})
paths[community].pages || (paths[community].pages = [])
console.log(`fetch ${url}`)
fetch(url, {
headers: { Authorization: `Bearer ${TOKEN}` }
}).then(response => {
if(response.status != 200) return resolve(true)
const match = response.headers.get('link').match(/<(https:\/\/[^<>]*\.wikihub.io¥/[^<>]*)>;\s*rel="next"/)
match && (nextUrl = match.pop())
return response.json()
}).then(json => {
json.forEach(page => {
paths[community].pages.push(page.title)
})
nextUrl ? recursivelyFetch(community, nextUrl) : resolve(true)
})
}
recursivelyFetch(community)
})
}
function fetchArticles(community) {
console.log(`begin fetch ${community} articles`)
return new Promise((resolve, reject) => {
function recursivelyFetch (community, url) {
let nextUrl
url || (url = `https://${community}.wikihub.io/api/v1/articles`)
paths[community] || (paths[community] = {})
paths[community].articles || (paths[community].articles = {})
console.log(`fetch ${url}`)
fetch(url, {
headers: { Authorization: `Bearer ${TOKEN}` }
}).then(response => {
if(response.status != 200) return resolve(true)
const match = response.headers.get('link').match(/<(https:\/\/[^<>]*\.wikihub.io\/[^<>]*)>;\s*rel="next"/)
match && (nextUrl = match.pop())
return response.json()
}).then(json => {
json.forEach(article => {
const date = new Date(Date.parse(article.created_at))
const fakeUtcDate = new Date(date.getTime() + (date.getTimezoneOffset() * 60000))
const path = [
fakeUtcDate.getFullYear(),
('0' + (fakeUtcDate.getMonth() + 1)).slice(-2),
('0' + fakeUtcDate.getDate()).slice(-2),
('0' + fakeUtcDate.getHours()).slice(-2),
('0' + fakeUtcDate.getMinutes()).slice(-2),
('0' + fakeUtcDate.getSeconds()).slice(-2),
].join('')
paths[community].articles[`@${article.user.name}`] || (paths[community].articles[`@${article.user.name}`] = {})
paths[community].articles[`@${article.user.name}`][path] = article.title
})
nextUrl ? recursivelyFetch(community, nextUrl) : resolve(true)
})
}
recursivelyFetch(community)
})
}
client.fetch(COMMUNITIES_URL)
.then(result => {
const links = result.$('a.list-group-item')
links.each((_, link) => {
const community = link.attribs.href.replace(/(https?:\/\/|\.wikihub\.io\/?)/g, '')
paths[community] = { title: result.$(link).find('.lgi-heading').text(), pages: [], articles: {} }
})
return links.map((_, link) => link.attribs.href.replace(/(https?:\/\/|\.wikihub\.io\/?)/g, '')).get()
})
.then(communities => {
return Promise.all(communities.map(community => fetchPages(community)).concat(communities.map(community => fetchArticles(community)))).then(arr => {
console.log(JSON.stringify(paths))
console.log('finished')
})
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment