Created
January 5, 2017 00:36
-
-
Save justinwolfe/93ff78f54c47c18cfebdd97d1e765aa4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//tiny_ebook by justin wolfe | |
//downloads your tinyletter archive, processes it, and makes it into an .epub ebook. | |
//required modules | |
var http = require('http'); | |
var cheerio = require('cheerio'); | |
var s = require('string'); | |
var fs = require('fs'); | |
var async = require('async'); | |
var mkdirp = require('mkdirp'); | |
var zipFolder = require('zip-folder'); | |
//settings and processing variables | |
var tinySettings = {}; | |
//title and author information | |
tinySettings.title = 'thank you notes, volume 2'; | |
tinySettings.authorFirst = 'justin'; | |
tinySettings.authorLast = 'wolfe'; | |
//whether or not to strip out certain HTML tags when processing letters | |
tinySettings.strip = true; | |
//list of tags (comma separated) that should be stripped out during letter processing if strip is tinySettings.strip is true | |
//i strip out span tags because sometimes they get included when pasting in gmail and mess up the uniformity of the styling | |
tinySettings.stripTags = 'span'; | |
//tinySettings.recs needs to be larger than the number of public TL issues you have (but can be set to a smaller number here for testing) | |
tinySettings.recs = 500; | |
//your tinyletter account name | |
tinySettings.account = process.argv[2]; | |
tinySettings.url = 'http://www.tinyletter.com/' + tinySettings.account + '/archive?page=1&recs=' + tinySettings.recs + '&sort=asc&q='; | |
//the folder where you want the ebook to be generated | |
tinySettings.folder = process.argv[3] + '/'; | |
tinySettings.links = []; | |
tinySettings.letters = []; | |
tinySettings.toc = []; | |
//replace with a function that actually generates a random string | |
tinySettings.uid = 'fdskaj3r3fjw0jfds0afoj30fjo0efjo0dfdfd023jf'; | |
//number of seconds (ms * 1000) delay in between get requests for the letters for politeness | |
//added a bit of randomness to the interval | |
tinySettings.delay = (6 + (2*Math.random()))* 1000; | |
//template files that will need to be created for the epub — first array element is filename/path, second is file contents (if applicable) | |
tinySettings.files = []; | |
tinySettings.files[0] = ['mimetype', 'application/epub+zip']; | |
tinySettings.files[1] = ['page.css', '']; | |
tinySettings.files[2] = ['book.css', '']; | |
tinySettings.files[3] = ['titlepage.xhtml', ` | |
<body> | |
<h4>thank you notes</h4> | |
<h4>1/4/2016 - 1/4/2017 | |
<p>by justin, esmé, l, julia, patrick, m.o., pierce, ashley, k, fsa, l, madeleine, r, julia w., kflo, klark, tj, jhe, sydney, c, helene, katelyn, moe, zilla, dana, marco, sarah, krs, jerome, s, clare, scarlett, sherry, micha, k, kw, y, chloe, mt, l, j, and other contributors</p> | |
<br> | |
<h4>subscribe at <a href="http://www.tinyletter.com/thankyounotes">tinyletter.com/thankyounotes</a> | |
<h4>ebook generated with <a href="http://www.github.com">tiny_ebook</a> | |
</body> | |
`]; | |
tinySettings.files[4] = ['toc.xhtml', ` | |
<html xmlns:epub="http://www.idpf.org/2007/ops" xmlns="http://www.w3.org/1999/xhtml"> | |
<nav epub:type="index" id="toc"> | |
<h4>table of contents</h4> | |
<ol> | |
`]; | |
//this is the first part of this file, the rest gets added after the letters get pulled down | |
tinySettings.files[5] = ['content.opf', `<?xml version='1.0' encoding='utf-8'?> | |
<package xmlns="http://www.idpf.org/2007/opf" version="2.0" unique-identifier="`+tinySettings.uid+`"> | |
<metadata xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata" xmlns:dc="http://purl.org/dc/elements/1.1/"> | |
<dc:language>en</dc:language> | |
<dc:title>` + tinySettings.title + `</dc:title> | |
<dc:creator opf:file-as="` + tinySettings.authorLast + ', ' + tinySettings.authorFirst + `" opf:role="aut">` + tinySettings.authorFirst + ' ' + tinySettings.authorLast + `</dc:creator> | |
<meta name="cover" content="cover"/> | |
<dc:date>0101-01-01T00:00:00+00:00</dc:date> | |
<dc:contributor opf:role="bkp"></dc:contributor> | |
<dc:identifier id="`+tinySettings.uid+`" opf:scheme="uuid"></dc:identifier> | |
</metadata> | |
<manifest> | |
<item href="page.css" id="page_css" media-type="text/css"/> | |
<item href="book.css" id="css" media-type="text/css"/> | |
<item href="titlepage.xhtml" id="titlepage" media-type="application/xhtml+xml"/> | |
<item href="toc.xhtml" media-type="application/xhtml+xml" id="toc"/>` | |
]; | |
tinySettings.files[6] = ['META-INF/container.xml', `<?xml version="1.0"?> | |
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> | |
<rootfiles> | |
<rootfile full-path="content.opf" media-type="application/oebps-package+xml"/> | |
</rootfiles> | |
</container>` | |
]; | |
//functions | |
//start the ball rolling | |
bookIt() | |
//wait, but before the ball starts rolling, check and make sure that the necessary information is available | |
function bookIt(){ | |
if (tinySettings.account&&tinySettings.folder!='/'){ | |
console.log('\nrequired parameters available, going to work!\n') | |
if(process.argv[4]){tinySettings.strip = process.argv[4]}; | |
downloadDirectory(); | |
} else { | |
console.log('missing required parameters, try again'); | |
console.log('syntax should be "node tl.js [accountName] [outputFolderPath] [clean HTML true/false]"'); | |
console.log('example: node tl.js thankyounotes thankyoubook true') | |
return; | |
} | |
} | |
//pulls down the contents of your TL archive page and uses cheerio to extract an array of HTML objects containing letter links | |
function downloadDirectory(){ | |
var page = ''; | |
var directory = []; | |
http.get(tinySettings.url, function(response){ | |
response.on('data', function(chunk){ | |
page+=chunk; | |
}) | |
response.on('end', function(){ | |
var pageComplete = page.toString(); | |
$ = cheerio.load(pageComplete); | |
//checks whether the get request returns the tinyletter 404 page (which includes this particular CSS class); if it does, return an error; if not, move on | |
if ($(pageComplete).find('.load-error').length){ | |
console.log("invalid archive path - did you type your account name correctly? try again"); | |
return; | |
} else { | |
console.log("valid archive path - letter links coming!\n"); | |
directory = $(pageComplete).find('.message-link'); | |
filterLinks(directory); | |
} | |
}) | |
}) | |
} | |
//filters the letter links out of the array of objects created by downloadDirectory | |
function filterLinks(directory){ | |
var links = []; | |
async.each(directory, function (item, callback){ | |
tinySettings.links.push(item.attribs.href); | |
callback(); | |
}, function(err){ | |
console.log('got all the letter links'); | |
console.log(tinySettings.links.length + ' links total\n'); | |
makeContainer1(); | |
//saveLinks(); | |
}) | |
} | |
//makes folder to hold the files | |
function makeContainer1(){ | |
mkdirp(tinySettings.folder, function(err) { | |
if(err){console.log(err)}else{ | |
console.log('made outer directory') | |
makeContainer2(); | |
}; | |
}); | |
} | |
//makes folder in folder to hold the XML metadata file (should combine with above step in an eachOfSeries maybe) | |
function makeContainer2(){ | |
mkdirp(tinySettings.folder + 'META-INF', function(err) { | |
if(err){console.log(err)} else{ | |
console.log('made inner META-INF directory\n') | |
setupContainer(); | |
} | |
}); | |
} | |
//makes the ebook template files that need to be created | |
function setupContainer(){ | |
console.log("making template files") | |
async.eachOfSeries(tinySettings.files,function(item, key, callback){ | |
console.log(key + ': ' + item[0]); | |
fs.writeFile(tinySettings.folder + item[0], item[1], function (err) { | |
if (err){ | |
return console.log(err); | |
} | |
console.log('made template file ' + key + ' of ' + (tinySettings.files.length-1)); | |
callback(); | |
}); | |
},function (err) { | |
console.log('made all the template files\n'); | |
getLetters(); | |
}); | |
} | |
//download the letters from the archive and process them | |
function getLetters(){ | |
console.log("downloading letters with a(n) " + (tinySettings.delay/1000) + " second delay in between"); | |
async.eachOfSeries(tinySettings.links,function(item, key, callback){ | |
setTimeout(function(){ | |
var page = ""; | |
http.get(item, function(response){ | |
response.on('data', function(chunk){ | |
page+=chunk; | |
}) | |
response.on('end', function(){ | |
var pageComplete = page.toString(); | |
$ = cheerio.load(pageComplete); | |
var letter = {} | |
letter.index = key + 1; | |
letter.title = $('.subject').text(); | |
letter.date = $('.date').text(); | |
if (tinySettings.strip==true){ | |
$('.message-body *').removeAttr('style'); | |
letter.body = s($('.message-body').html()).stripTags(tinySettings.stripTags); | |
} else { | |
letter.body = $('.message-body').html(); | |
} | |
console.log('downloaded letter ' + letter.index + ': ' + letter.title + ' - '+letter.date); | |
tinySettings.letters.push(letter); | |
callback(); | |
}) | |
}) | |
}, tinySettings.delay) | |
},function (err) { | |
console.log('downloaded all the letters!\n'); | |
outputLetters(); | |
}); | |
} | |
//write all the letters to files (and also set up manifest and spine and toc lists); | |
function outputLetters(){ | |
console.log("making chapter files") | |
var manifest = ""; | |
var spine = '<itemref idref="titlepage" />\n'; | |
var toc = ""; | |
async.eachOfSeries(tinySettings.letters,function(item, key, callback){ | |
var letterPath = tinySettings.folder + "ch-" + item.index + ".xhtml"; | |
var letter = ""; | |
letter+='<h3>'+item.title+'</h3><br>'; | |
letter+=item.body; | |
fs.writeFile(letterPath, letter, function (err) { | |
if (err){ | |
return console.log(err); | |
} | |
console.log('made chapter file ' + key + ' of ' + (tinySettings.letters.length-1)); | |
//add entry to manifest | |
//example syntax: <item id="chapter01" href="chap01.xhtml" media-type="application/xhtml+xml" /> | |
manifest+='<item id="ch-'+item.index+'" href="ch-'+item.index+'.xhtml" media-type:"application/xhtml+xml" /> \n'; | |
//add entry to spine (reading order) | |
//example syntax: <itemref idref="titlepage" /> | |
spine+='<itemref idref="ch-'+item.index+'"/> \n'; | |
//add entry to table of contents | |
/*example syntax: <li> | |
<a href="ch-'+item.index+'.xhtml">`+item.title+`</a> | |
</li>`*/ | |
toc+=` | |
<li> | |
<a href="ch-`+item.index+`.xhtml">`+item.title+`</a> | |
</li>` | |
callback(); | |
}); | |
},function (err) { | |
manifest+='</manifest>\n<spine>\n<itemref idref="titlepage"/>\n<itemref idref="toc"/>\n'; | |
manifest+=spine; | |
manifest+=`</spine> | |
<guide> | |
<reference href="titlepage.xhtml" type="cover" title="Cover"/> | |
</guide> | |
</package>`; | |
toc+='</ol>\n</nav>'; | |
console.log('made all the chapter files\n'); | |
var updates = []; | |
updates[0] = [tinySettings.folder+'content.opf',manifest]; | |
updates[1] = [tinySettings.folder+'toc.xhtml',toc]; | |
updateContent(updates); | |
}); | |
} | |
//update the manifest and spine and the table of contents to include all the chapter files | |
//this could probably be refactored later to be more efficient | |
function updateContent(updates){ | |
async.eachOfSeries(updates, function(item, key, callback){ | |
fs.appendFile(item[0], item[1], function (err) { | |
if (err){ | |
return console.log(err); | |
} | |
console.log('appended data to '+item[0]); | |
callback(); | |
}); | |
},function (err) { | |
console.log('updates to manifest, spine, and table of contents completed \n'); | |
zipDirectory(); | |
}); | |
} | |
function zipDirectory(){ | |
var filename = tinySettings.title + '-' + tinySettings.authorFirst + '_' + tinySettings.authorLast + '.epub'; | |
zipFolder(tinySettings.folder, filename, function(err) { | |
if(err) { | |
console.log('zip didn\'t work', err); | |
} else { | |
console.log('epub generated! all done :) \n'); | |
} | |
}); | |
} | |
//NOT CURRENTLY USING | |
//testing output of letters with a single print, looks fine :) | |
function testOutput(){ | |
console.log("testing output"); | |
var letter = ""; | |
letter+='<h3>'+tinySettings.letters[0].title+'</h3><br>' | |
letter+=tinySettings.letters[0].body; | |
fs.writeFile(tinySettings.folder + "testoutput.html", letter, function (err) { | |
if (err){ | |
return console.log(err); | |
} | |
console.log('saved that test output'); | |
}); | |
} | |
//NOT CURRENTLY USING | |
//save the links to a text file, in case you'd like to have that for some reason | |
function saveLinks(){ | |
fs.writeFile('testlinks.txt', tinySettings.links.join('\n'), function (err) { | |
if (err){ | |
return console.log(err); | |
} | |
console.log('saved the links'); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment