Skip to content

Instantly share code, notes, and snippets.

@jon49
Created January 30, 2018 09:02
Show Gist options
  • Save jon49/027b0611d3255726551893a1fdf91b5b to your computer and use it in GitHub Desktop.
Save jon49/027b0611d3255726551893a1fdf91b5b to your computer and use it in GitHub Desktop.
Last youtube videos on the survival podcast
var fs = require('fs'),
cheerio = require('cheerio'),
fetch = require('node-fetch')
// https://gist.github.com/endel/321925f6cafa25bbfbde
const Pad = size => number => {
let s = String(number)
while (s.length < (size || 2)) { s = "0" + s }
return s
}
const Pad3 = Pad(3)
function GetHTML(pageNumber, callback){
const fileName = `./TSP-${Pad3(pageNumber)}.html`
fs.exists(fileName, exists => {
if (exists) {
fs.readFile(fileName, 'utf8', (err, data) => {
if (err) {
console.error('Page:', pageNumber, err.message)
callback()
}
callback(data.toString())
})
} else {
const url =
(pageNumber === 1)
? 'http://www.thesurvivalpodcast.com/'
: `http://www.thesurvivalpodcast.com/page/${pageNumber}`
fetch(url).then(response => {
return response.text()
})
.then(text => {
fs.writeFile(fileName, text)
callback(text)
})
.catch(err => {
console.error('Page:', pageNumber, err)
callback()
})
}
})
}
var isEpisode = /^Episode-\d+/
var isResource = /^Resources for/
function GetAllYoutubeMusicLinks(html){
var list = []
var $ = cheerio.load(html)
$('.entry-title').filter(function(){
var data = $(this)
var episode = data.text()
if (episode.match(isEpisode)) {
var resources = data.siblings('.entry-content')
.each(function(idx, el){
var entry = $(this)
entry
.children('p')
.filter(function(idx, el){
var p = $(this)
return p.text().match(isResource)
})
.next('ul')
.children('li')
.filter(function(idx, el){
var li = $(this)
return li.children('a[href*=youtube]').length === 1
})
.last()
.each(function(idx, el){
var li = $(this)
var show = li.closest('.entry-content').prev().prev().text()
var youtube = li.children('a').first().attr('href')
var title = li.text()
list.push({show,youtube,title})
})
})
}
})
return list
}
const AllSongsFileName = './AllSongs.csv'
let idx = 0
let keepGoing = true
while (idx < 1000 && keepGoing) {
idx = idx + 1
GetHTML(idx, content => {
if (content) {
const links = GetAllYoutubeMusicLinks(content)
if (links.length === 0) {
keepGoing = false
}
links.forEach(x => {
let showNumberMatch = x.show.match(/Episode-(\d+)/)
if (showNumberMatch.length > 1) {
x.episodeNumber = +showNumberMatch[1]
}
fs.appendFile(
AllSongsFileName,
`${x.episodeNumber || ''},${x.title},${x.youtube},${x.show}\r\n`,
err => { if (err) console.error('File appen error:', err.message) })
})
}
})
}
// fs.writeFile(AllSongsFileName, '')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment