Skip to content

Instantly share code, notes, and snippets.

@rosalogia
Created March 28, 2021 08:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rosalogia/5948d18e79685f2ba704abf50d6ce3ca to your computer and use it in GitHub Desktop.
Save rosalogia/5948d18e79685f2ba704abf50d6ce3ca to your computer and use it in GitHub Desktop.
Script to grab all the developer blog entries from the Veloren site and convert their content to markdown files (for the purpose of searchability with grep)
#r "nuget: FSharp.Data"
open FSharp.Data
open System.IO
let args = System.Environment.GetCommandLineArgs()
let first = int <| args.[2]
let last = int <| args.[3]
let fetchPageAsync i =
async {
let! page =
sprintf "https://veloren.net/devblog-%i" i
|> HtmlDocument.AsyncLoad
return page.Descendants ["p"; "h1" ; "h2" ; "h3"]
}
let parseNode (node: HtmlNode) =
node.DirectInnerText().Trim()
|> match node.Name() with
| "h1" -> sprintf "# %s"
| "h2" -> sprintf "## %s"
| "h3" ->sprintf "### %s"
| _ -> sprintf "%s"
let pages =
[first..last]
|> List.map fetchPageAsync
|> Async.Parallel |> Async.RunSynchronously
|> Array.map (Seq.map parseNode
>> String.concat "\n\n")
|> List.ofArray
let filepaths =
[first..last]
|> List.map (sprintf "./veloren_blogs/devblog_%i.md")
filepaths
|> List.map File.Create
|> List.iter (fun fs -> fs.Dispose())
pages
|> List.zip filepaths
|> List.iter File.WriteAllText
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment