Skip to content

Instantly share code, notes, and snippets.

@Cartman0
Created October 17, 2017 15:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Cartman0/640829438679132e0a936a2a049d6aa2 to your computer and use it in GitHub Desktop.
Save Cartman0/640829438679132e0a936a2a049d6aa2 to your computer and use it in GitHub Desktop.
はてなブログAPI 全記事のentry idを取得
import requests
import bs4
import re
import time
def get_collection_uri(hatena_id, blog_id, password):
service_doc_uri = "https://blog.hatena.ne.jp/{hatena_id:}/{blog_id:}/atom".format(hatena_id=hatena_id, blog_id=blog_id)
res_service_doc = requests.get(url=service_doc_uri, auth=(hatena_id, password))
if res_service_doc.ok:
soup_servicedoc_xml = bs4.BeautifulSoup(res_service_doc.content, features="xml")
collection_uri = soup_servicedoc_xml.collection.get("href")
return collection_uri
return False
def get_entry_id_list(hatena_id, blog_id, password, limit_max_iterations=50, wait_s=0.01, print_collection_uri=True):
'''
return relesed order
'''
collection_uri = get_collection_uri(hatena_id, blog_id, password)
if not collection_uri:
raise Exception("Not get collection uri.")
entry_id_list = []
for i in range(limit_max_iterations):
if print_collection_uri:
print(collection_uri)
# Basic認証で記事一覧を取得
res_collection = requests.get(collection_uri, auth=(hatena_id, password))
if not res_collection.ok:
return False
# Beatifulsoup4でDOM化
soup_collection_xml = bs4.BeautifulSoup(res_collection.content, features="xml")
# entry elementのlistを取得
entries = soup_collection_xml.find_all("entry")
# 下書きを無視
pub_entry_list = list(filter(lambda e: e.find("app:draft").string != "yes", entries))
# entry idを取得
entry_id_list.extend([re.search(r"-(\d+)$", string=e.id.string).group(1) for e in pub_entry_list])
# 次のcollection_uriへ更新
link_next = soup_collection_xml.find("link", rel="next")
if not link_next:
return entry_id_list
collection_uri = link_next.get("href")
if not collection_uri:
return entry_id_list
# wait
time.sleep(wait_s)# 10ms
print("warning: possible to left some entry_id")
return entry_id_list
get_entry_id_list(hatena_id, blog_id, password, limit_max_iterations=50, wait_s=0.01)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment