Skip to content

Instantly share code, notes, and snippets.

@grena
Created October 15, 2015 08:04
Show Gist options
  • Save grena/328925f87bc44141bd6b to your computer and use it in GitHub Desktop.
Save grena/328925f87bc44141bd6b to your computer and use it in GitHub Desktop.
Scraper for http://downloads.khinsider.com/, a game OST download website.
import json
import wget
import re
import urllib.request
from datetime import datetime
from bs4 import BeautifulSoup
# CONFIGURATION
MAIN_URL = 'http://downloads.khinsider.com/game-soundtracks/album/world-of-warcraft-direct-game-rip-'
# END CONFIGURATION
page = urllib.request.urlopen(MAIN_URL)
soup = BeautifulSoup(page, "lxml")
# All page links
pageLinks = soup.find("table", align=["center"]).findAll("a")
urls = []
# Unique URLS
for pageLink in pageLinks:
urls.append(pageLink.get('href'))
urls = list(set(urls))
for url in urls:
subPage = urllib.request.urlopen(url)
subSoup = BeautifulSoup(subPage, "lxml")
dl = subSoup.find("audio")
src = dl.get('src')
wget.download(src)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment