Skip to content

Instantly share code, notes, and snippets.

@btbytes
Created February 25, 2018 02:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save btbytes/4c894cf1ab5dd8cc99f5161b7b5ab92d to your computer and use it in GitHub Desktop.
Save btbytes/4c894cf1ab5dd8cc99f5161b7b5ab92d to your computer and use it in GitHub Desktop.
Download documents matching a pattern from a webpage.
#[
downthemall.nim
Download documents matching a pattern from a webpage.
Build:
nim c -r -d:ssl -d:release downthemall.nim
Example: (downloads all the PDFs linked in the URL)
./downthemall https://www.btbytes.com/pl.html -A .pdf
]#
import htmlparser
import httpclient
import xmltree
import strtabs
import os
import strutils
import streams
import parseopt2
when isMainModule:
var url: string
var extns: seq[string]
for kind, key, val in getopt():
case kind
of cmdArgument:
url = key
of cmdShortOption:
case key
of "A": extns.add(val)
of cmdLongOption: discard
of cmdEnd: discard
if url == nil or extns == nil:
echo "usage: downthemall URL -A .extension [-A .another]"
quit(0)
let client = newHttpClient()
let content = client.getContent(url)
let html = parseHtml(newStringStream(content))
for a in html.findall("a"):
let href = a.attrs["href"]
for extn in extns:
if href.endswith(extn):
let fname = href.split('/')[^1]
downloadFile(href, fname)
echo "Downloaded: $1 to $2" % [href, fname]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment