Skip to content

Instantly share code, notes, and snippets.

@AtomicVar
Created February 27, 2019 12:50
Show Gist options
  • Save AtomicVar/f87205f6d4cddd2e2bb2fdb1f2e6c1ed to your computer and use it in GitHub Desktop.
Save AtomicVar/f87205f6d4cddd2e2bb2fdb1f2e6c1ed to your computer and use it in GitHub Desktop.
博客园编程语言文章数量 爬虫
import requests
from bs4 import BeautifulSoup
base_url = 'https://zzk.cnblogs.com/s/blogpost'
cookies = dict(
ZzkNoRobotCookie=
'CfDJ8KlpyPucjmhMuZTmH8oiYTOsZWLqcxSx0sRuNdfc35P334ttmwTqPekgb1OOGtp_JeXby7PZgQla4HC63Y3_nnWwF8kvdklA71DbLOQ2ADfziUqy4BkuXQUgJEB4y2kj6w'
)
languages = [
'Python', '.NET', 'JavaScript', 'C++', 'Java', 'Erlang', 'Scala', 'PHP',
'Haskell', 'Lisp', 'C#', 'Julia', 'Nodejs'
]
for lang in languages:
r = requests.get(base_url, params={'w': lang}, cookies=cookies)
if not r.status_code == 200:
print(f'Error code: {r.status_code}')
continue
# print(r.text)
soup = BeautifulSoup(r.text)
try:
print(f'{lang}: {soup.find(id="CountOfResults").get_text()}')
except:
print(r.url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment