Skip to content

Instantly share code, notes, and snippets.

@wareya
Last active March 30, 2018 21:26
Show Gist options
  • Save wareya/4305e2f971a78c960402ac69f308128c to your computer and use it in GitHub Desktop.
Save wareya/4305e2f971a78c960402ac69f308128c to your computer and use it in GitHub Desktop.
#!python
from bs4 import BeautifulSoup
import urllib
from urllib.parse import urljoin
import sys
def get_top_300(url):
r = urllib.request.urlopen(url)
data = r.read()
r.close()
soup = BeautifulSoup(data, "html.parser")
novels = []
for li in soup.select(".ranking_list .rank_h a"):
novels += [li.get("href")]
novels = [url.rstrip("/").rsplit('/', 1)[-1] for url in novels]
return novels
if __name__ == "__main__":
novels = get_top_300(sys.argv[1])
print(" ".join(novels))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment