Created
February 13, 2020 16:00
-
-
Save slachterman-g/7089b3c08b156f63ac836f22d63c6467 to your computer and use it in GitHub Desktop.
SSE Client for Wikimedia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import json, time, sys, os | |
from sseclient import SSEClient as EventSource | |
from google.cloud import pubsub_v1 | |
project_id = os.environ['PROJECT'] | |
topic_name = os.environ['TOPIC'] | |
publisher = pubsub_v1.PublisherClient() | |
topic_path = publisher.topic_path(project_id, topic_name) | |
futures = dict() | |
url = 'https://stream.wikimedia.org/v2/stream/recentchange' | |
keys = ['id', 'timestamp', 'user', 'title'] | |
for event in EventSource(url): | |
if event.event == 'message': | |
try: | |
change = json.loads(event.data) | |
changePub = {k: change.get(k, 0) for k in keys} | |
except ValueError: | |
pass | |
else: | |
payloadJson = json.dumps(changePub).encode('utf-8') | |
future = publisher.publish( | |
topic_path, data=payloadJson) | |
futures[payloadJson] = future | |
while futures: | |
time.sleep(5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment