Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Get share count in Hatena Bookmark of each of posts in Hugo site, and save it as json files on S3.
from time import sleep
import boto3
import json
import logging
import re
import traceback
import urllib.parse
import urllib.request
import xml.etree.ElementTree as ET
HATEBU_CNT_API = 'https://bookmark.hatenaapis.com/count/entry?url='
S3_BUCKET = '<your-s3-bucket-name>'
SITE_MAP_KEY = 'sitemap.xml'
HUGO_HOST = '<your-hugo-site-host>' # eg) https://michimani.net
s3 = boto3.resource('s3')
logger = logging.getLogger()
logger.setLevel(logging.INFO)
def get_hatebu_count(post_url):
count = 0
hatebu_url = HATEBU_CNT_API + urllib.parse.quote(post_url)
try:
with urllib.request.urlopen(hatebu_url) as res:
count = int(res.read())
except Exception as e:
logger.error('Hatebu count request failed: %s', traceback.format_exc())
return count
def get_post_url_list():
post_url_list = []
try:
s3_object = s3.Object(bucket_name=S3_BUCKET, key=SITE_MAP_KEY)
sitemap = s3_object.get()['Body'].read().decode('utf-8')
xml_root = ET.fromstring(sitemap)
ns = {'post': 'http://www.sitemaps.org/schemas/sitemap/0.9'}
reg = re.compile('^' + re.escape(HUGO_HOST + '/post/') + '.+')
for url_part in xml_root.findall('post:url/post:loc', ns):
if reg.match(url_part.text):
post_url_list.append(url_part.text)
except Exception as e:
logger.error('Get post url failed: %s', traceback.format_exc())
return post_url_list
def put_hatebu_count_file(post_url, hatebu_count):
try:
object_key = get_key_from_post_url(post_url)
s3obj = s3.Object(S3_BUCKET, object_key)
data = json.dumps({'cnt': hatebu_count}, ensure_ascii=False)
s3obj.put(Body=data)
except Exception as e:
logger.error('Put count data failed: %s', traceback.format_exc())
def get_key_from_post_url(post_url):
return 'data/htbcnt/{post_key}.json'.format(
post_key=post_url.replace(HUGO_HOST + '/post/', '').replace('/', ''))
def count_needs_update(post_url, new_count):
res = False
try:
object_key = get_key_from_post_url(post_url)
cnt_data_obj = s3.Object(bucket_name=S3_BUCKET, key=object_key)
cnt_data_raw = cnt_data_obj.get()["Body"].read().decode("utf-8")
cnt_data = json.loads(cnt_data_raw)
if new_count > cnt_data['cnt']:
res = True
except Exception:
print('Hatebu count file does not exists.')
res = True
return res
def lambda_handler(event, context):
post_list = get_post_url_list()
for post_url in post_list:
sleep(0.5)
count = get_hatebu_count(post_url)
if count_needs_update(post_url, count) is True:
put_hatebu_count_file(post_url, count)
logger.info('Updated for "{}", new Hatebu count is "{}"'.format(post_url, count))
else:
logger.info('No update requred for "{}"'.format(post_url))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.