Skip to content

Instantly share code, notes, and snippets.

@citizen428
Last active July 4, 2019 14:48
Show Gist options
  • Save citizen428/212b4396e008c72e44adaf767b8cbb92 to your computer and use it in GitHub Desktop.
Save citizen428/212b4396e008c72e44adaf767b8cbb92 to your computer and use it in GitHub Desktop.
A simple asynchronous dev.to scraper
#r "fsharp-data/lib/net45/FSharp.Data.dll"
open FSharp.Data
let firstChildText selector (post : HtmlNode) =
post.CssSelect(selector).[0].DirectInnerText().Trim()
let cleanName (name : string) = name.Replace("・", "")
let fetchTagAsync tag =
async {
let url = sprintf "https://dev.to/t/%s/top/infinity" tag
let! doc = HtmlDocument.AsyncLoad(url)
return doc.CssSelect(".single-article")
}
let getPosts tags =
tags
|> List.map fetchTagAsync
|> Async.Parallel
|> Async.RunSynchronously
let top5 =
["fsharp"; "elm"; "haskell"]
|> getPosts
|> Seq.collect(fun posts ->
posts
|> Seq.map(fun post ->
post |> firstChildText ".content h3",
post |> firstChildText "h4 > a" |> cleanName,
post |> firstChildText ".reactions-count .engagement-count-number"))
|> Seq.sortBy(fun (_, _, score) -> -(int score))
|> Seq.take 5
for (title, author, reactions) in top5 do
printf "\"%s\" (%s): %s\n" title author reactions
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment