Skip to content

Instantly share code, notes, and snippets.

@SecretDeveloper
Last active November 10, 2016 18:34
Show Gist options
  • Save SecretDeveloper/e0ec251820d41f8f06a462a3c109e756 to your computer and use it in GitHub Desktop.
Save SecretDeveloper/e0ec251820d41f8f06a462a3c109e756 to your computer and use it in GitHub Desktop.
unsong.fs
// Create epub file from unsongbook.com and write to console.
open FSharp.Data
open System.Collections.Generic
type Chapter(title:string, body:string, url:string)=
member x.Title = title
member x.Body = body
member x.URL = url
let chapters = new List<Chapter>()
let slug = fun(value:string)->
value.Replace(" ", "_").Replace("\n","").Replace(":","").Replace("?","").Replace("'","").Replace("#","").Replace("=","").Replace(",","")
let rec gatherPages = fun (url : string) ->
let page = HtmlDocument.Load(url)
let content = page.CssSelect("div#pjgm-content")
let title = content.CssSelect("h1.pjgm-posttitle").[0].InnerText()
let body =
content.CssSelect("div.pjgm-postcontent").[0].Elements()
|> Seq.filter(fun (t:HtmlNode) -> t.HasClass("sharedaddy")=false)
|> Seq.map (fun s -> s.ToString())
|> Seq.reduce (fun acc item -> acc + " " + item)
chapters.Add(new Chapter(title=title, body=body, url=url))
let nextUrl = content.CssSelect("a[rel|=next]").[0].AttributeValue("href").ToString()
gatherPages(nextUrl)
[<EntryPoint>]
let main argv =
gatherPages "http://unsongbook.com/prologue-2/"
printfn "<html><head><title>Unsong book</title></head><body>"
//TOC
printfn "<h1>Table of Contents</h1><ol>"
for chap in chapters do
let s = slug chap.Title
printfn "<li><h2><a href='#%s'>%s</a></h2></li>\n" s chap.Title
printfn "</ol>"
//CONTENT
for chap in chapters do
let s = slug chap.Title
printfn "<div><h2 id='%s'>%s</h2>\n<div>%s</div>\n<div>\n" s chap.Title chap.Body
printfn "</body></html>"
0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment