xbns/download-pdfs.py

## download-pdfs.py
from bs4 import BeautifulSoup

import requests
r = requests.get("https://aws.amazon.com/whitepapers/")
data = r.text
soup = BeautifulSoup(data,"lxml")

for link in soup.findAll('a',href=True):
  #skip all other liks except pdf ones
  if  not link['href'].endswith('pdf'):
    continue
  print(link.get('href'))

  ##usage
  # $ python download-pdfs.py >aws-whitepapers.txt
  # then..
  # $ parallel -j 20 --gnu -a aws-whitepapers.txt  wget -nc
  # -nc,--no-clobber: skip downloads that would download to existing files
	from bs4 import BeautifulSoup

	import requests
	r = requests.get("https://aws.amazon.com/whitepapers/")
	data = r.text
	soup = BeautifulSoup(data,"lxml")

	for link in soup.findAll('a',href=True):
	#skip all other liks except pdf ones
	if not link['href'].endswith('pdf'):
	continue
	print(link.get('href'))

	##usage
	# $ python download-pdfs.py >aws-whitepapers.txt
	# then..
	# $ parallel -j 20 --gnu -a aws-whitepapers.txt wget -nc
	# -nc,--no-clobber: skip downloads that would download to existing files