Created
January 30, 2018 09:02
-
-
Save jon49/027b0611d3255726551893a1fdf91b5b to your computer and use it in GitHub Desktop.
Last youtube videos on the survival podcast
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs'), | |
cheerio = require('cheerio'), | |
fetch = require('node-fetch') | |
// https://gist.github.com/endel/321925f6cafa25bbfbde | |
const Pad = size => number => { | |
let s = String(number) | |
while (s.length < (size || 2)) { s = "0" + s } | |
return s | |
} | |
const Pad3 = Pad(3) | |
function GetHTML(pageNumber, callback){ | |
const fileName = `./TSP-${Pad3(pageNumber)}.html` | |
fs.exists(fileName, exists => { | |
if (exists) { | |
fs.readFile(fileName, 'utf8', (err, data) => { | |
if (err) { | |
console.error('Page:', pageNumber, err.message) | |
callback() | |
} | |
callback(data.toString()) | |
}) | |
} else { | |
const url = | |
(pageNumber === 1) | |
? 'http://www.thesurvivalpodcast.com/' | |
: `http://www.thesurvivalpodcast.com/page/${pageNumber}` | |
fetch(url).then(response => { | |
return response.text() | |
}) | |
.then(text => { | |
fs.writeFile(fileName, text) | |
callback(text) | |
}) | |
.catch(err => { | |
console.error('Page:', pageNumber, err) | |
callback() | |
}) | |
} | |
}) | |
} | |
var isEpisode = /^Episode-\d+/ | |
var isResource = /^Resources for/ | |
function GetAllYoutubeMusicLinks(html){ | |
var list = [] | |
var $ = cheerio.load(html) | |
$('.entry-title').filter(function(){ | |
var data = $(this) | |
var episode = data.text() | |
if (episode.match(isEpisode)) { | |
var resources = data.siblings('.entry-content') | |
.each(function(idx, el){ | |
var entry = $(this) | |
entry | |
.children('p') | |
.filter(function(idx, el){ | |
var p = $(this) | |
return p.text().match(isResource) | |
}) | |
.next('ul') | |
.children('li') | |
.filter(function(idx, el){ | |
var li = $(this) | |
return li.children('a[href*=youtube]').length === 1 | |
}) | |
.last() | |
.each(function(idx, el){ | |
var li = $(this) | |
var show = li.closest('.entry-content').prev().prev().text() | |
var youtube = li.children('a').first().attr('href') | |
var title = li.text() | |
list.push({show,youtube,title}) | |
}) | |
}) | |
} | |
}) | |
return list | |
} | |
const AllSongsFileName = './AllSongs.csv' | |
let idx = 0 | |
let keepGoing = true | |
while (idx < 1000 && keepGoing) { | |
idx = idx + 1 | |
GetHTML(idx, content => { | |
if (content) { | |
const links = GetAllYoutubeMusicLinks(content) | |
if (links.length === 0) { | |
keepGoing = false | |
} | |
links.forEach(x => { | |
let showNumberMatch = x.show.match(/Episode-(\d+)/) | |
if (showNumberMatch.length > 1) { | |
x.episodeNumber = +showNumberMatch[1] | |
} | |
fs.appendFile( | |
AllSongsFileName, | |
`${x.episodeNumber || ''},${x.title},${x.youtube},${x.show}\r\n`, | |
err => { if (err) console.error('File appen error:', err.message) }) | |
}) | |
} | |
}) | |
} | |
// fs.writeFile(AllSongsFileName, '') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment