Skip to content

Instantly share code, notes, and snippets.

@ckhung
Created September 4, 2021 02:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ckhung/44cbe8f667c84ab54fc81b37d027cf1a to your computer and use it in GitHub Desktop.
Save ckhung/44cbe8f667c84ab54fc81b37d027cf1a to your computer and use it in GitHub Desktop.
把 「xml-js 所轉出的噗浪 rss => json」 再轉成 html
#!/usr/bin/python3
# for f in *.xml ; do xml-js $f --compact | jq . > ${f/%xml/json} ; done
# python3 prj2html.py *.json > new.html
# 詳見 https://newtoypia.blogspot.com/2021/09/xml-js-jq-rss.html
import argparse, json, re
from warnings import warn
parser = argparse.ArgumentParser(
description='把 「xml-js 所轉出的噗浪 rss => json」 再轉成 html',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('rssjson', nargs='*', help='rssjson1 rssjson2 ...')
args = parser.parse_args()
verbs = {
'分享': 'shares',
'問': 'asks',
'愛': 'loves',
'喜歡': 'likes',
'好奇': 'wonders',
'已經': 'has',
'想要': 'wants',
'打算': 'will',
'期待': 'wishes',
'希望': 'hopes',
'覺得': 'feels',
'說': 'says',
'需要': 'needs',
'討厭': 'hates',
'轉噗': 'replurks',
'转噗': 'replurks',
'警告!': 'warns',
'replurks': 'replurks',
}
allplurks = {}
for rjfn in args.rssjson:
with open(rjfn) as f:
data = json.load(f)
if not ('feed' in data and 'entry' in data['feed']):
warn(f'warning: file "{rjfn}" ignored because it does not have ".feed.entry"')
continue
for e in data['feed']['entry']:
pid = e['link']['_attributes']['href']
datetime = re.match(r'^(20\d\d)-(\d\d)-(\d\d)T(\d\d:\d\d)', e['published']['_text'])
content = re.match(r'^(\w+)\s+(\S+)\s+(.*)', e['content']['_text'])
v = content.group(2) # verb
if not v in verbs:
warn(f'verb "{v}" not recognized')
v = '說'
allplurks[pid] = {
'year': datetime.group(1),
'month': datetime.group(2),
'text': '<li><a href=\'http://www.plurk.com{}\'>{}-{} {}</a> {} <span class=\'qualifier {}\'>{}</span> {}'.format(
pid, datetime.group(2), datetime.group(3), datetime.group(4),
content.group(1), verbs[v], v, content.group(3)
)
}
last_month = ''
for p in sorted(allplurks.keys(), reverse=True) :
entry = allplurks[p]
if entry['month'] != last_month:
print('''
</ul>
<h3 class='month'>{}年{}月</h3>
<ul class='plurk'>
'''.format(entry['year'], entry['month']) )
last_month = entry['month']
print(entry['text'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment