Skip to content

Instantly share code, notes, and snippets.

@cocomoff
Created January 5, 2019 13:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cocomoff/e97efcf4dcb3dc7de013bf4a2d3467ac to your computer and use it in GitHub Desktop.
Save cocomoff/e97efcf4dcb3dc7de013bf4a2d3467ac to your computer and use it in GitHub Desktop.
hatenablog-title-count
from bs4 import BeautifulSoup
import requests
import lxml
from collections import defaultdict, Counter
N = 100
l_title = []
for i in range(1, N):
r = requests.get("http://URL/archive/?page={}".format(i))
soup = BeautifulSoup(r.text, 'lxml')
elems = soup.find_all(class_="entry-title-link")
for e in elems:
year = int(e.text.split(" ")[0].split("/")[0])
if year == 2018:
l_title.append(e.text)
list_title = []
for title in l_title:
tdate, t = title.split(" ")
list_title.append(t)
print(counter.most_common())
counter = Counter(list_title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment