Last active
December 10, 2019 13:46
-
-
Save michimani/9ee3a194996d09f18a94a4dc2b2a856c to your computer and use it in GitHub Desktop.
Get share count in Hatena Bookmark of each of posts in Hugo site, and save it as json files on S3.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import sleep | |
import boto3 | |
import json | |
import logging | |
import re | |
import traceback | |
import urllib.parse | |
import urllib.request | |
import xml.etree.ElementTree as ET | |
HATEBU_CNT_API = 'https://bookmark.hatenaapis.com/count/entry?url=' | |
S3_BUCKET = '<your-s3-bucket-name>' | |
SITE_MAP_KEY = 'sitemap.xml' | |
HUGO_HOST = '<your-hugo-site-host>' # eg) https://michimani.net | |
s3 = boto3.resource('s3') | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
def get_hatebu_count(post_url): | |
count = 0 | |
hatebu_url = HATEBU_CNT_API + urllib.parse.quote(post_url) | |
try: | |
with urllib.request.urlopen(hatebu_url) as res: | |
count = int(res.read()) | |
except Exception as e: | |
logger.error('Hatebu count request failed: %s', traceback.format_exc()) | |
return count | |
def get_post_url_list(): | |
post_url_list = [] | |
try: | |
s3_object = s3.Object(bucket_name=S3_BUCKET, key=SITE_MAP_KEY) | |
sitemap = s3_object.get()['Body'].read().decode('utf-8') | |
xml_root = ET.fromstring(sitemap) | |
ns = {'post': 'http://www.sitemaps.org/schemas/sitemap/0.9'} | |
reg = re.compile('^' + re.escape(HUGO_HOST + '/post/') + '.+') | |
for url_part in xml_root.findall('post:url/post:loc', ns): | |
if reg.match(url_part.text): | |
post_url_list.append(url_part.text) | |
except Exception as e: | |
logger.error('Get post url failed: %s', traceback.format_exc()) | |
return post_url_list | |
def put_hatebu_count_file(post_url, hatebu_count): | |
try: | |
object_key = get_key_from_post_url(post_url) | |
s3obj = s3.Object(S3_BUCKET, object_key) | |
data = json.dumps({'cnt': hatebu_count}, ensure_ascii=False) | |
s3obj.put(Body=data) | |
except Exception as e: | |
logger.error('Put count data failed: %s', traceback.format_exc()) | |
def get_key_from_post_url(post_url): | |
return 'data/htbcnt/{post_key}.json'.format( | |
post_key=post_url.replace(HUGO_HOST + '/post/', '').replace('/', '')) | |
def count_needs_update(post_url, new_count): | |
res = False | |
try: | |
object_key = get_key_from_post_url(post_url) | |
cnt_data_obj = s3.Object(bucket_name=S3_BUCKET, key=object_key) | |
cnt_data_raw = cnt_data_obj.get()["Body"].read().decode("utf-8") | |
cnt_data = json.loads(cnt_data_raw) | |
if new_count > cnt_data['cnt']: | |
res = True | |
except Exception: | |
print('Hatebu count file does not exists.') | |
res = True | |
return res | |
def lambda_handler(event, context): | |
post_list = get_post_url_list() | |
for post_url in post_list: | |
sleep(0.5) | |
count = get_hatebu_count(post_url) | |
if count_needs_update(post_url, count) is True: | |
put_hatebu_count_file(post_url, count) | |
logger.info('Updated for "{}", new Hatebu count is "{}"'.format(post_url, count)) | |
else: | |
logger.info('No update requred for "{}"'.format(post_url)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment