Skip to content

Instantly share code, notes, and snippets.

@lacucaracha-jp
Last active May 3, 2021
Embed
What would you like to do?
Hatena
import sys
import requests
import urllib.parse
import json
import sqlite3
import time
from bs4 import BeautifulSoup
from datetime import datetime as dt
pdate = sys.argv[1]
db = sys.argv[2]
url = "https://b.hatena.ne.jp/hotentry/all/" + pdate
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
conn = sqlite3.connect(db)
c = conn.cursor()
elems = soup.find_all("div" , class_ = "entrylist-contents")
print("★★★★★" + pdate + "★★★★★")
for i,e in enumerate(elems):
purl = e.find("a").get("href")
burl = "https://b.hatena.ne.jp/entry/jsonlite/?url=" + urllib.parse.quote(purl)
Page = requests.get(burl).json()
count = Page["count"]
title = Page["title"]
eid = Page["eid"]
category = e.find("li" , class_ = "entrylist-contents-category").text.strip()
#EUCに変換できない文字が含まれる場合は標準出力出来ないため、エラーの場合は適当にスルー
try:
print(str(i+1) + ":" + str(count) + ":" + str(title))
except Exception as e:
print(str(i+1) + ":" + str(count))
check = c.execute('select count(*) "count" from Page where EID = ?',(eid,)).fetchone()[0]
if check == 1:
continue
c.execute("delete from Bookmark where EID = ?",(eid,))
c.execute("delete from Star where EID = ?",(eid,))
c.execute("insert into Page(EID,Date,category,Entryrank,Count,Title,URL) values(?,?,?,?,?,?,?)",(eid,dt.strptime(pdate,'%Y%m%d'),category,i+1,count,title,purl))
for bookmark in Page["bookmarks"]:
buser = bookmark["user"]
btags = bookmark["tags"]
bcomment = bookmark["comment"]
btimestamp = dt.strptime(bookmark["timestamp"], '%Y/%m/%d %H:%M')
surl = "https://s.hatena.com/entry.json?uri=" + urllib.parse.quote("https://b.hatena.ne.jp/" + buser + "/" + btimestamp.strftime('%Y%m%d') + "#bookmark-" + eid)
StarData = []
err_count = 0
star_count = 0
if(len(bcomment)!=0):
while err_count < 10 :
try:
Bookmark = requests.get(surl).json()
stars = Bookmark["entries"]
if len(stars)!=0:
for star in stars[0]["stars"]:
StarData.append((star["name"],None,buser,eid,star["quote"]))
star_count += 1
if("colored_stars" in stars[0]):
for color_stars in stars[0]["colored_stars"]:
for color_star in color_stars["stars"]:
star_count += 1
StarData.append((color_star["name"],color_stars["color"],buser,eid,color_star["quote"]))
break
except Exception as e:
print(surl)
print("エラー発生:" + str(err_count))
err_count += 1
time.sleep(err_count)
c.executemany('Insert into Star(STARUSER,COLOR,BOOKMARKUSER,EID,QUOTE) values(?,?,?,?,?)',StarData)
c.execute('Insert into Bookmark(EID,BOOKMARKUSER,URL,STARCOUNT,TIMESTAMP,COMMENT) values(?,?,?,?,?,?)',(eid,buser,purl,star_count,btimestamp,bcomment))
conn.commit()
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment