Skip to content

Instantly share code, notes, and snippets.

@lemon24
Last active May 6, 2022 14:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lemon24/b7c4039ee6657ebb2347b5e338a0dca7 to your computer and use it in GitHub Desktop.
Save lemon24/b7c4039ee6657ebb2347b5e338a0dca7 to your computer and use it in GitHub Desktop.
Twitter reader plugin prototype for https://github.com/lemon24/reader/issues/271
"""
Prototype showing how a Twitter reader plugin could work, based on
https://github.com/lemon24/reader/issues/271#issuecomment-1111789547
FooRetriever and FakeParser are simplified versions of the actual thing.
Typing left as an exercise for the reader (won't be straightforward).
UpdaterWrapper is needed because we can't choose updaters
(like we do with parsers and mime types); we'd likely use feed.version.
Of note:
* The "enrich" bits are in updater just because it was easy to add them there.
It should be possible to tell:
* core/storage: "I need some extra data in feed for update"
* storage: "I need some extra entry data in entries for update"
* The foo updater shares some logic with reader._updater:
* "not modified" early exit
* no entries but had error exit
* ... but it does *not* share other logic:
* stale feeds (?)
* entry data + entry for update (old entry) merging
* fancy "should I update feed/entry" logic
"""
from contextlib import contextmanager
from dataclasses import dataclass
from reader import make_reader, Content
from reader._parser import RetrieveResult
from reader._types import FeedData, EntryData, FeedUpdateIntent, EntryUpdateIntent
TYPE = 'application/x.foo'
class FooRetriever:
slow_to_read = False
@contextmanager
def __call__(self, url, http_etag=None, *args):
print('retriever got http_etag', http_etag)
etag, recent_entries, api_key = http_etag
etag = int(etag) if etag else None
# fake some results
if not etag:
data = {1: [1]}
new_etag = 1
elif etag == 1:
data = {8: [8]}
# derived from recent_entries on purpose
data.update({int(id): [int(id)+1] for id in recent_entries})
new_etag = 8
else:
data = {}
new_etag = etag
yield RetrieveResult(data, TYPE, new_etag)
def validate_url(self, url):
pass
class FooParser:
http_accept = TYPE
def __call__(self, url, file, headers=None):
print("parser got file", file)
feed = FeedData(url, version='foo')
entries = [
EntryData(url, str(id), content=(Content(content, TYPE),))
for id, content in file.items()
]
return feed, entries
@dataclass
class UpdaterWrapper:
original: ...
reader: ...
def process_old_feed(self, feed):
if not feed.url.startswith('foo:'):
return self.original.process_old_feed(feed)
# "enrich" http_etag with the ids of recent entries,
# so we can check them for new replies
ids = [e.id for e in reader.get_entries(feed=feed)]
api_key = reader.get_tag((), 'foo-api-key', 'key')
feed = feed._replace(http_etag=(feed.http_etag, ids, api_key))
return feed
def make_update_intents(self, old_feed, now, global_now, parsed_feed, entry_pairs):
if not old_feed.url.startswith('foo:'):
return self.original.make_update_intents(
old_feed, now, global_now, parsed_feed, entry_pairs
)
# copied from _updater ("Not modified.")
if not parsed_feed:
if not old_feed.last_updated:
return FeedUpdateIntent(old_feed.url, now), ()
if old_feed.last_exception:
return FeedUpdateIntent(old_feed.url, old_feed.last_updated), ()
return None, ()
entries = []
for new_entry, entry_for_update in entry_pairs:
entry_new = not entry_for_update
if entry_new:
merged_entry = new_entry
else:
# "entrich" entry for update,
# so we can merge new tweets
old_entry = reader.get_entry((old_feed.url, new_entry.id))
# merge in new tweets
old_content = old_entry.content[0]
new_content = new_entry.content[0]
merged_content = new_content._replace(
value=old_content.value + new_content.value
)
merged_entry = new_entry._replace(content=(merged_content,))
entry = EntryUpdateIntent(
merged_entry,
now,
now if entry_new else None,
global_now if entry_new else None,
)
entries.append(entry)
feed = None
if entries:
feed = FeedUpdateIntent(
old_feed.url,
now,
parsed_feed.feed,
parsed_feed.http_etag,
parsed_feed.http_last_modified,
)
# copied from _updater
if not feed and old_feed.last_exception:
feed = FeedUpdateIntent(old_feed.url, old_feed.last_updated)
return feed, entries
reader = make_reader(':memory:')
reader._parser.mount_retriever('foo:', FooRetriever())
reader._parser.mount_parser_by_mime_type(FooParser())
reader._updater = UpdaterWrapper(reader._updater, reader)
reader.add_feed('foo:bar')
reader.add_feed('https://death.andgravity.com/_feed/index.xml')
for i in range(3):
print(f"--- update #{i+1}")
reader.update_feeds()
print("entry count after update:", reader.get_entry_counts().total)
print("entries after update (foo:bar):")
for e in reader.get_entries(feed='foo:bar'):
print(f" {e.id}: {e.content[0].value}")
print()
"""
output:
--- update #1
retriever got http_etag (None, [], 'key')
parser got file {1: [1]}
entry count after update: 22
entries after update (foo:bar):
1: [1]
--- update #2
retriever got http_etag ('1', ['1'], 'key')
parser got file {8: [8], 1: [2]}
entry count after update: 23
entries after update (foo:bar):
8: [8]
1: [1, 2]
--- update #3
retriever got http_etag ('8', ['8', '1'], 'key')
parser got file {}
entry count after update: 23
entries after update (foo:bar):
8: [8]
1: [1, 2]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment