Created
March 28, 2021 08:11
-
-
Save rosalogia/5948d18e79685f2ba704abf50d6ce3ca to your computer and use it in GitHub Desktop.
Script to grab all the developer blog entries from the Veloren site and convert their content to markdown files (for the purpose of searchability with grep)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "nuget: FSharp.Data" | |
open FSharp.Data | |
open System.IO | |
let args = System.Environment.GetCommandLineArgs() | |
let first = int <| args.[2] | |
let last = int <| args.[3] | |
let fetchPageAsync i = | |
async { | |
let! page = | |
sprintf "https://veloren.net/devblog-%i" i | |
|> HtmlDocument.AsyncLoad | |
return page.Descendants ["p"; "h1" ; "h2" ; "h3"] | |
} | |
let parseNode (node: HtmlNode) = | |
node.DirectInnerText().Trim() | |
|> match node.Name() with | |
| "h1" -> sprintf "# %s" | |
| "h2" -> sprintf "## %s" | |
| "h3" ->sprintf "### %s" | |
| _ -> sprintf "%s" | |
let pages = | |
[first..last] | |
|> List.map fetchPageAsync | |
|> Async.Parallel |> Async.RunSynchronously | |
|> Array.map (Seq.map parseNode | |
>> String.concat "\n\n") | |
|> List.ofArray | |
let filepaths = | |
[first..last] | |
|> List.map (sprintf "./veloren_blogs/devblog_%i.md") | |
filepaths | |
|> List.map File.Create | |
|> List.iter (fun fs -> fs.Dispose()) | |
pages | |
|> List.zip filepaths | |
|> List.iter File.WriteAllText |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment