Skip to content

Instantly share code, notes, and snippets.

@shimizukawa
Last active December 8, 2019 18:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shimizukawa/0074b62a39b4c94fab399438ef3dc112 to your computer and use it in GitHub Desktop.
Save shimizukawa/0074b62a39b4c94fab399438ef3dc112 to your computer and use it in GitHub Desktop.
Export Feedly tagged URLs into Scrapbox style text
"""
Export Feedly tagged URLs into Scrapbox style text
https://scrapbox.io/shimizukawa/Feedly_API%E3%81%A7%E3%83%87%E3%83%BC%E3%82%BF%E3%82%92%E5%8F%96%E5%BE%97%E3%81%99%E3%82%8B
"""
import os
import re
from pprint import pprint
from datetime import datetime
import webbrowser
from urllib.parse import quote
import requests
import bleach
FEEDLY_TOKEN = os.environ['FEEDLY_TOKEN']
SCRAPBOX_URL = os.environ['SCRAPBOX_URL'].rstrip('/')
def main():
headers = {
'Authorization': f'OAuth {FEEDLY_TOKEN}',
}
res = requests.get('https://cloud.feedly.com/v3/tags', headers=headers)
res.raise_for_status()
tags = res.json()
for tag in tags:
if 'label' not in tag:
continue
urls = set()
res = requests.get('https://cloud.feedly.com/v3/streams/contents',
{'streamId': tag['id']},
headers=headers)
res.raise_for_status()
contents = res.json()
body = []
body.append('imported from #feedly')
body.append('')
for c in contents['items']:
try:
url, formatted = format(c)
except KeyError:
pprint(c)
raise
if url in urls:
continue
urls.add(url)
body.append(formatted)
body.append('')
title = quote(tag['label'])
formatted_body = quote('\n'.join(body))
scrapbox_url = f"{SCRAPBOX_URL}/{title}?body={formatted_body}"
print(scrapbox_url)
webbrowser.open_new_tab(scrapbox_url)
def format(c):
"""
タイトル、リンク、タグ、キーワード、コンテンツの一部(先頭100文字くらいをtag除去)
"""
formatted_list = []
if 'canonicalUrl' in c:
url = c['canonicalUrl']
elif 'alternate' in c:
url = c['alternate'][0]['href']
else:
raise RuntimeError()
title = re.sub(r'[\[\]\t\n]]', ' ', c['title']).strip()
formatted_list.append(f"[{url} {title}]")
formatted_list.append(f"\tPublished: {datetime.fromtimestamp(int(c['published']) / 1000).strftime('%Y/%m/%d')}")
formatted_list.append(f"\tTag: " + ', '.join(f"[{t['label']}]" for t in c['tags'] if 'global.' not in t['id']))
if 'keywords' in c:
formatted_list.append(f"\tKeyword: " + ', '.join(f"[{k}]" for k in c['keywords']))
if 'summary' in c:
formatted_list.append(blockquote(c['summary']['content']))
elif 'content' in c:
formatted_list.append(blockquote(c['content']['content']))
return url, '\n'.join(formatted_list)
def blockquote(text):
plain = bleach.clean(text, tags=[], strip=True)[:100]
quoted = '\n'.join([
f'\t>{p.strip()}'
for p in plain.split('\n')
if p.strip()
][:3])
return quoted
if __name__ == '__main__':
main()
"""
Export Feedly tagged URLs into Scrapbox style text
https://scrapbox.io/shimizukawa/Feedly_API%E3%81%A7%E3%83%87%E3%83%BC%E3%82%BF%E3%82%92%E5%8F%96%E5%BE%97%E3%81%99%E3%82%8B
"""
import os
import re
from pprint import pprint
from datetime import datetime
import requests
import bleach
FEEDLY_TOKEN = os.environ['FEEDLY_TOKEN']
def main():
headers = {
'Authorization': f'OAuth {FEEDLY_TOKEN}',
}
res = requests.get('https://cloud.feedly.com/v3/tags', headers=headers)
res.raise_for_status()
tags = res.json()
for tag in tags:
if 'label' not in tag:
continue
urls = set()
res = requests.get('https://cloud.feedly.com/v3/streams/contents',
{'streamId': tag['id']},
headers=headers)
res.raise_for_status()
contents = res.json()
print('----------------')
print(tag['label'])
print('imported from #feedly')
print()
for c in contents['items']:
try:
url, formatted = format(c)
except KeyError:
pprint(c)
raise
if url in urls:
continue
urls.add(url)
print(formatted)
print()
def format(c):
"""
タイトル、リンク、タグ、キーワード、コンテンツの一部(先頭100文字くらいをtag除去)
"""
formatted_list = []
if 'canonicalUrl' in c:
url = c['canonicalUrl']
elif 'alternate' in c:
url = c['alternate'][0]['href']
else:
raise RuntimeError()
title = re.sub(r'[\[\]\t\n]]', ' ', c['title']).strip()
formatted_list.append(f"[{url} {title}]")
formatted_list.append(f"\tPublished: {datetime.fromtimestamp(int(c['published']) / 1000).strftime('%Y/%m/%d')}")
formatted_list.append(f"\tTag: " + ', '.join(f"[{t['label']}]" for t in c['tags'] if 'global.' not in t['id']))
if 'keywords' in c:
formatted_list.append(f"\tKeyword: " + ', '.join(f"[{k}]" for k in c['keywords']))
if 'summary' in c:
formatted_list.append(quote(c['summary']['content']))
elif 'content' in c:
formatted_list.append(quote(c['content']['content']))
return url, '\n'.join(formatted_list)
def quote(text):
plain = bleach.clean(text, tags=[], strip=True)[:100]
quoted = '\n'.join([
f'\t>{p.strip()}'
for p in plain.split('\n')
if p.strip()
][:3])
return quoted
if __name__ == '__main__':
main()
bleach==3.1.0
certifi==2019.11.28
chardet==3.0.4
idna==2.8
requests==2.22.0
six==1.13.0
urllib3==1.25.7
webencodings==0.5.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment