Skip to content

Instantly share code, notes, and snippets.

@paulegan
Last active March 14, 2023 23:44
Show Gist options
  • Save paulegan/0cc38660a22465626694c8b3c5aeeb80 to your computer and use it in GitHub Desktop.
Save paulegan/0cc38660a22465626694c8b3c5aeeb80 to your computer and use it in GitHub Desktop.
Script to that takes Instapaper csv export and imports to Feedly as new entries under a board/tag

Get a Feedly developer token from here:
https://developer.feedly.com/v3/developer/

Get a Mercury API key from here (used to extract metadata on articles):
https://mercury.postlight.com/web-parser/

Download your Instapaper data here:
https://www.instapaper.com/user

Create a new board in Feedly and record the tag ID from the URL, e.g.
https://feedly.com/i/tag/user/XXX/tag/Instapaper (take user/XXX/tag/Instapaper)

Run this script with the following parameters:

python3 import_instapaper_to_feedly.py --feedly-token XXX --mercury-token XXX --feedly-tag-id user/XXX/tag/Instapaper instapaper-export.csv
import time
import csv
import dateutil.parser
import requests
import logging
import argparse
def read_instapaper_export(filename):
with open(filename) as f:
for row in csv.reader(f):
if row[0].startswith('http'):
yield row
def parse_article_content(url, args):
params = {'url': url}
headers = {'x-api-key': args.mercury_token}
response = requests.get('https://mercury.postlight.com/parser', params=params, headers=headers)
response.raise_for_status()
content = response.json()
if 'title' not in content and content.get('failed'):
raise Exception(content['messages'])
content['url'] = url # force url back to original
try:
content['published'] = int(dateutil.parser.parse(content['date_published']).strftime('%s'))
except (KeyError, TypeError, ValueError):
content['published'] = None
return content
def create_feedly_entry(content, args):
timestamp = content['published'] * 1000 if content['published'] else None
entry = {
'published': timestamp,
'crawled': timestamp,
'updated': timestamp,
'origin': {'htmlUrl': content['url'], 'title': content['title']},
'title': content['title'],
'author': content['author'],
'summary': {'content': content['excerpt']},
'content': {'content': content['content']},
'enclosure': [{'href': content['lead_image_url'], 'type': 'image/jpeg'}],
'alternate': [{'href': content['url'], 'type': 'text/html'}],
'tags': [{'id': args.feedly_tag_id, 'label': args.feedly_tag_label}],
'createdBy': {'application': 'Instapaper', 'userAgent': 'import_instapaper_to_feedly'},
}
headers = {'Authorization': 'OAuth ' + args.feedly_token}
while True:
response = requests.post('https://cloud.feedly.com/v3/entries/', headers=headers, json=entry)
if response.status_code == 429: # respect rate limiting
sleeptime = int(response.headers['X-RateLimit-Reset'])
logging.warning('Hit Feedly rate limit: Sleeping %ss', sleeptime)
time.sleep(sleeptime)
else:
break
response.raise_for_status()
return response.json()
def run_import(args):
records = reversed(list(read_instapaper_export(args.instapaper_file)))
for url, title, selection, folder in records:
if args.instapaper_folder and args.instapaper_folder != folder:
continue
if args.filter_string and args.filter_string not in url:
continue
try:
content = parse_article_content(url, args)
except Exception:
logging.warning('Unable to parse article content for %s, creating reference only', url)
content = {
'url': url,
'title': title,
'excerpt': title or url,
'content': title,
'author': None,
'published': None,
'lead_image_url': None,
}
try:
create_feedly_entry(content, args)
except KeyboardInterrupt:
break
except Exception as e:
logging.error('Error importing %s: %s', url, e)
else:
logging.info('Imported %s', url)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--feedly-token', required=True, help='Feedly developer access token')
parser.add_argument('--feedly-tag-id', required=True, help='ID of Feedly tag/board, e.g. user/xxx/tag/Instapaper')
parser.add_argument('--feedly-tag-label', default='Instapaper', help='Label for Feedly tag/board')
parser.add_argument('--mercury-token', required=True, help='API Key for Mercury Web Parser')
parser.add_argument('--instapaper-folder', help='Import only those entries from the specified Instapaper folder')
parser.add_argument('--filter-string', help='Import only those entries which contain the specified string in the URL')
parser.add_argument('instapaper_file')
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
run_import(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment