Skip to content

Instantly share code, notes, and snippets.

@lacucaracha-jp
Last active May 3, 2021 05:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lacucaracha-jp/36a9198be0ee88d52262dafba2bb2c1d to your computer and use it in GitHub Desktop.
Save lacucaracha-jp/36a9198be0ee88d52262dafba2bb2c1d to your computer and use it in GitHub Desktop.
Hatena
import sys
import requests
import urllib.parse
import json
import sqlite3
import time
from bs4 import BeautifulSoup
from datetime import datetime as dt
pdate = sys.argv[1]
db = sys.argv[2]
url = "https://b.hatena.ne.jp/hotentry/all/" + pdate
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
conn = sqlite3.connect(db)
c = conn.cursor()
elems = soup.find_all("div" , class_ = "entrylist-contents")
print("★★★★★" + pdate + "★★★★★")
for i,e in enumerate(elems):
purl = e.find("a").get("href")
burl = "https://b.hatena.ne.jp/entry/jsonlite/?url=" + urllib.parse.quote(purl)
Page = requests.get(burl).json()
count = Page["count"]
title = Page["title"]
eid = Page["eid"]
category = e.find("li" , class_ = "entrylist-contents-category").text.strip()
#EUCに変換できない文字が含まれる場合は標準出力出来ないため、エラーの場合は適当にスルー
try:
print(str(i+1) + ":" + str(count) + ":" + str(title))
except Exception as e:
print(str(i+1) + ":" + str(count))
check = c.execute('select count(*) "count" from Page where EID = ?',(eid,)).fetchone()[0]
if check == 1:
continue
c.execute("delete from Bookmark where EID = ?",(eid,))
c.execute("delete from Star where EID = ?",(eid,))
c.execute("insert into Page(EID,Date,category,Entryrank,Count,Title,URL) values(?,?,?,?,?,?,?)",(eid,dt.strptime(pdate,'%Y%m%d'),category,i+1,count,title,purl))
for bookmark in Page["bookmarks"]:
buser = bookmark["user"]
btags = bookmark["tags"]
bcomment = bookmark["comment"]
btimestamp = dt.strptime(bookmark["timestamp"], '%Y/%m/%d %H:%M')
surl = "https://s.hatena.com/entry.json?uri=" + urllib.parse.quote("https://b.hatena.ne.jp/" + buser + "/" + btimestamp.strftime('%Y%m%d') + "#bookmark-" + eid)
StarData = []
err_count = 0
star_count = 0
if(len(bcomment)!=0):
while err_count < 10 :
try:
Bookmark = requests.get(surl).json()
stars = Bookmark["entries"]
if len(stars)!=0:
for star in stars[0]["stars"]:
StarData.append((star["name"],None,buser,eid,star["quote"]))
star_count += 1
if("colored_stars" in stars[0]):
for color_stars in stars[0]["colored_stars"]:
for color_star in color_stars["stars"]:
star_count += 1
StarData.append((color_star["name"],color_stars["color"],buser,eid,color_star["quote"]))
break
except Exception as e:
print(surl)
print("エラー発生:" + str(err_count))
err_count += 1
time.sleep(err_count)
c.executemany('Insert into Star(STARUSER,COLOR,BOOKMARKUSER,EID,QUOTE) values(?,?,?,?,?)',StarData)
c.execute('Insert into Bookmark(EID,BOOKMARKUSER,URL,STARCOUNT,TIMESTAMP,COMMENT) values(?,?,?,?,?,?)',(eid,buser,purl,star_count,btimestamp,bcomment))
conn.commit()
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment