Skip to content

Instantly share code, notes, and snippets.

@justinwolfe
Created January 5, 2017 00:36
Show Gist options
  • Save justinwolfe/93ff78f54c47c18cfebdd97d1e765aa4 to your computer and use it in GitHub Desktop.
Save justinwolfe/93ff78f54c47c18cfebdd97d1e765aa4 to your computer and use it in GitHub Desktop.
//tiny_ebook by justin wolfe
//downloads your tinyletter archive, processes it, and makes it into an .epub ebook.
//required modules
var http = require('http');
var cheerio = require('cheerio');
var s = require('string');
var fs = require('fs');
var async = require('async');
var mkdirp = require('mkdirp');
var zipFolder = require('zip-folder');
//settings and processing variables
var tinySettings = {};
//title and author information
tinySettings.title = 'thank you notes, volume 2';
tinySettings.authorFirst = 'justin';
tinySettings.authorLast = 'wolfe';
//whether or not to strip out certain HTML tags when processing letters
tinySettings.strip = true;
//list of tags (comma separated) that should be stripped out during letter processing if strip is tinySettings.strip is true
//i strip out span tags because sometimes they get included when pasting in gmail and mess up the uniformity of the styling
tinySettings.stripTags = 'span';
//tinySettings.recs needs to be larger than the number of public TL issues you have (but can be set to a smaller number here for testing)
tinySettings.recs = 500;
//your tinyletter account name
tinySettings.account = process.argv[2];
tinySettings.url = 'http://www.tinyletter.com/' + tinySettings.account + '/archive?page=1&recs=' + tinySettings.recs + '&sort=asc&q=';
//the folder where you want the ebook to be generated
tinySettings.folder = process.argv[3] + '/';
tinySettings.links = [];
tinySettings.letters = [];
tinySettings.toc = [];
//replace with a function that actually generates a random string
tinySettings.uid = 'fdskaj3r3fjw0jfds0afoj30fjo0efjo0dfdfd023jf';
//number of seconds (ms * 1000) delay in between get requests for the letters for politeness
//added a bit of randomness to the interval
tinySettings.delay = (6 + (2*Math.random()))* 1000;
//template files that will need to be created for the epub — first array element is filename/path, second is file contents (if applicable)
tinySettings.files = [];
tinySettings.files[0] = ['mimetype', 'application/epub+zip'];
tinySettings.files[1] = ['page.css', ''];
tinySettings.files[2] = ['book.css', ''];
tinySettings.files[3] = ['titlepage.xhtml', `
<body>
<h4>thank you notes</h4>
<h4>1/4/2016 - 1/4/2017
<p>by justin, esmé, l, julia, patrick, m.o., pierce, ashley, k, fsa, l, madeleine, r, julia w., kflo, klark, tj, jhe, sydney, c, helene, katelyn, moe, zilla, dana, marco, sarah, krs, jerome, s, clare, scarlett, sherry, micha, k, kw, y, chloe, mt, l, j, and other contributors</p>
<br>
<h4>subscribe at <a href="http://www.tinyletter.com/thankyounotes">tinyletter.com/thankyounotes</a>
<h4>ebook generated with <a href="http://www.github.com">tiny_ebook</a>
</body>
`];
tinySettings.files[4] = ['toc.xhtml', `
<html xmlns:epub="http://www.idpf.org/2007/ops" xmlns="http://www.w3.org/1999/xhtml">
<nav epub:type="index" id="toc">
<h4>table of contents</h4>
<ol>
`];
//this is the first part of this file, the rest gets added after the letters get pulled down
tinySettings.files[5] = ['content.opf', `<?xml version='1.0' encoding='utf-8'?>
<package xmlns="http://www.idpf.org/2007/opf" version="2.0" unique-identifier="`+tinySettings.uid+`">
<metadata xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:calibre="http://calibre.kovidgoyal.net/2009/metadata" xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:language>en</dc:language>
<dc:title>` + tinySettings.title + `</dc:title>
<dc:creator opf:file-as="` + tinySettings.authorLast + ', ' + tinySettings.authorFirst + `" opf:role="aut">` + tinySettings.authorFirst + ' ' + tinySettings.authorLast + `</dc:creator>
<meta name="cover" content="cover"/>
<dc:date>0101-01-01T00:00:00+00:00</dc:date>
<dc:contributor opf:role="bkp"></dc:contributor>
<dc:identifier id="`+tinySettings.uid+`" opf:scheme="uuid"></dc:identifier>
</metadata>
<manifest>
<item href="page.css" id="page_css" media-type="text/css"/>
<item href="book.css" id="css" media-type="text/css"/>
<item href="titlepage.xhtml" id="titlepage" media-type="application/xhtml+xml"/>
<item href="toc.xhtml" media-type="application/xhtml+xml" id="toc"/>`
];
tinySettings.files[6] = ['META-INF/container.xml', `<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>`
];
//functions
//start the ball rolling
bookIt()
//wait, but before the ball starts rolling, check and make sure that the necessary information is available
function bookIt(){
if (tinySettings.account&&tinySettings.folder!='/'){
console.log('\nrequired parameters available, going to work!\n')
if(process.argv[4]){tinySettings.strip = process.argv[4]};
downloadDirectory();
} else {
console.log('missing required parameters, try again');
console.log('syntax should be "node tl.js [accountName] [outputFolderPath] [clean HTML true/false]"');
console.log('example: node tl.js thankyounotes thankyoubook true')
return;
}
}
//pulls down the contents of your TL archive page and uses cheerio to extract an array of HTML objects containing letter links
function downloadDirectory(){
var page = '';
var directory = [];
http.get(tinySettings.url, function(response){
response.on('data', function(chunk){
page+=chunk;
})
response.on('end', function(){
var pageComplete = page.toString();
$ = cheerio.load(pageComplete);
//checks whether the get request returns the tinyletter 404 page (which includes this particular CSS class); if it does, return an error; if not, move on
if ($(pageComplete).find('.load-error').length){
console.log("invalid archive path - did you type your account name correctly? try again");
return;
} else {
console.log("valid archive path - letter links coming!\n");
directory = $(pageComplete).find('.message-link');
filterLinks(directory);
}
})
})
}
//filters the letter links out of the array of objects created by downloadDirectory
function filterLinks(directory){
var links = [];
async.each(directory, function (item, callback){
tinySettings.links.push(item.attribs.href);
callback();
}, function(err){
console.log('got all the letter links');
console.log(tinySettings.links.length + ' links total\n');
makeContainer1();
//saveLinks();
})
}
//makes folder to hold the files
function makeContainer1(){
mkdirp(tinySettings.folder, function(err) {
if(err){console.log(err)}else{
console.log('made outer directory')
makeContainer2();
};
});
}
//makes folder in folder to hold the XML metadata file (should combine with above step in an eachOfSeries maybe)
function makeContainer2(){
mkdirp(tinySettings.folder + 'META-INF', function(err) {
if(err){console.log(err)} else{
console.log('made inner META-INF directory\n')
setupContainer();
}
});
}
//makes the ebook template files that need to be created
function setupContainer(){
console.log("making template files")
async.eachOfSeries(tinySettings.files,function(item, key, callback){
console.log(key + ': ' + item[0]);
fs.writeFile(tinySettings.folder + item[0], item[1], function (err) {
if (err){
return console.log(err);
}
console.log('made template file ' + key + ' of ' + (tinySettings.files.length-1));
callback();
});
},function (err) {
console.log('made all the template files\n');
getLetters();
});
}
//download the letters from the archive and process them
function getLetters(){
console.log("downloading letters with a(n) " + (tinySettings.delay/1000) + " second delay in between");
async.eachOfSeries(tinySettings.links,function(item, key, callback){
setTimeout(function(){
var page = "";
http.get(item, function(response){
response.on('data', function(chunk){
page+=chunk;
})
response.on('end', function(){
var pageComplete = page.toString();
$ = cheerio.load(pageComplete);
var letter = {}
letter.index = key + 1;
letter.title = $('.subject').text();
letter.date = $('.date').text();
if (tinySettings.strip==true){
$('.message-body *').removeAttr('style');
letter.body = s($('.message-body').html()).stripTags(tinySettings.stripTags);
} else {
letter.body = $('.message-body').html();
}
console.log('downloaded letter ' + letter.index + ': ' + letter.title + ' - '+letter.date);
tinySettings.letters.push(letter);
callback();
})
})
}, tinySettings.delay)
},function (err) {
console.log('downloaded all the letters!\n');
outputLetters();
});
}
//write all the letters to files (and also set up manifest and spine and toc lists);
function outputLetters(){
console.log("making chapter files")
var manifest = "";
var spine = '<itemref idref="titlepage" />\n';
var toc = "";
async.eachOfSeries(tinySettings.letters,function(item, key, callback){
var letterPath = tinySettings.folder + "ch-" + item.index + ".xhtml";
var letter = "";
letter+='<h3>'+item.title+'</h3><br>';
letter+=item.body;
fs.writeFile(letterPath, letter, function (err) {
if (err){
return console.log(err);
}
console.log('made chapter file ' + key + ' of ' + (tinySettings.letters.length-1));
//add entry to manifest
//example syntax: <item id="chapter01" href="chap01.xhtml" media-type="application/xhtml+xml" />
manifest+='<item id="ch-'+item.index+'" href="ch-'+item.index+'.xhtml" media-type:"application/xhtml+xml" /> \n';
//add entry to spine (reading order)
//example syntax: <itemref idref="titlepage" />
spine+='<itemref idref="ch-'+item.index+'"/> \n';
//add entry to table of contents
/*example syntax: <li>
<a href="ch-'+item.index+'.xhtml">`+item.title+`</a>
</li>`*/
toc+=`
<li>
<a href="ch-`+item.index+`.xhtml">`+item.title+`</a>
</li>`
callback();
});
},function (err) {
manifest+='</manifest>\n<spine>\n<itemref idref="titlepage"/>\n<itemref idref="toc"/>\n';
manifest+=spine;
manifest+=`</spine>
<guide>
<reference href="titlepage.xhtml" type="cover" title="Cover"/>
</guide>
</package>`;
toc+='</ol>\n</nav>';
console.log('made all the chapter files\n');
var updates = [];
updates[0] = [tinySettings.folder+'content.opf',manifest];
updates[1] = [tinySettings.folder+'toc.xhtml',toc];
updateContent(updates);
});
}
//update the manifest and spine and the table of contents to include all the chapter files
//this could probably be refactored later to be more efficient
function updateContent(updates){
async.eachOfSeries(updates, function(item, key, callback){
fs.appendFile(item[0], item[1], function (err) {
if (err){
return console.log(err);
}
console.log('appended data to '+item[0]);
callback();
});
},function (err) {
console.log('updates to manifest, spine, and table of contents completed \n');
zipDirectory();
});
}
function zipDirectory(){
var filename = tinySettings.title + '-' + tinySettings.authorFirst + '_' + tinySettings.authorLast + '.epub';
zipFolder(tinySettings.folder, filename, function(err) {
if(err) {
console.log('zip didn\'t work', err);
} else {
console.log('epub generated! all done :) \n');
}
});
}
//NOT CURRENTLY USING
//testing output of letters with a single print, looks fine :)
function testOutput(){
console.log("testing output");
var letter = "";
letter+='<h3>'+tinySettings.letters[0].title+'</h3><br>'
letter+=tinySettings.letters[0].body;
fs.writeFile(tinySettings.folder + "testoutput.html", letter, function (err) {
if (err){
return console.log(err);
}
console.log('saved that test output');
});
}
//NOT CURRENTLY USING
//save the links to a text file, in case you'd like to have that for some reason
function saveLinks(){
fs.writeFile('testlinks.txt', tinySettings.links.join('\n'), function (err) {
if (err){
return console.log(err);
}
console.log('saved the links');
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment