Skip to content

Instantly share code, notes, and snippets.

@raek
Created March 26, 2015 22:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raek/bc5d237be26bcaf8ce46 to your computer and use it in GitHub Desktop.
Save raek/bc5d237be26bcaf8ce46 to your computer and use it in GitHub Desktop.
List all of Bartosz Milewski's blog posts
# Run with python2
#
# Dependencies:
# pip install requests beautifulsoup4
import requests
import bs4
def main():
print "<!doctype html>".encode("utf-8")
print "<html><head><meta charset=\"utf-8\"></head><body><ul>".encode("utf-8")
archive_urls = find_archives("http://bartoszmilewski.com/")
for archive_url in archive_urls:
for post_url, post_title in find_posts(archive_url):
print ("<li><a href=\"%s\">%s</a></li>" %
(post_url, post_title)).encode("utf-8")
print "</ul></body></html>".encode("utf-8")
def find_archives(site_url):
response = requests.get(site_url)
soup = bs4.BeautifulSoup(response.text)
return [a.attrs.get("href")
for a in soup.select("#archives-3 a")]
def find_posts(archive_url):
response = requests.get(archive_url)
soup = bs4.BeautifulSoup(response.text)
return [(a.attrs.get("href"), a.get_text())
for a in soup.select(".post-title a")]
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment