Skip to content

Instantly share code, notes, and snippets.

@asukaminato0721
Last active December 31, 2021 20:31
Show Gist options
  • Save asukaminato0721/e08fb715bb28f72ab1c3f2b306ef46cf to your computer and use it in GitHub Desktop.
Save asukaminato0721/e08fb715bb28f72ab1c3f2b306ef46cf to your computer and use it in GitHub Desktop.
script to fetch douban top250 & imdb250
from typing import List
import requests
from lxml.etree import HTML
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:95.0) Gecko/20100101 Firefox/95.0",
}
params = (
("start", "75"), # ["0", "25", "50", "75"]
("sort", "seq"),
("playable", "0"),
("sub_type", ""),
)
response = requests.get(
"https://www.douban.com/doulist/968362/",
headers=headers,
params=params,
)
html = HTML(response.text)
result: List[str] = html.xpath("//div[@class='title']/a/text()")
for i in result:
i = i.strip()
if i:
print(i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment