fragglet/tweet_images.py

## tweet_images.py
"""Extracts media URLs from a tweet archive."""
import glob
import json

for filename in glob.glob("tweets/*.js"):
	with open(filename, "r") as f:
		data = f.read()
		_, data = data.split("=", 1)
	tweets = json.loads(data)
	for tweet in tweets:
		medias = tweet.get("entities", {}).get("media", [])
		for m in medias:
			url = m.get("media_url_https", "")
			if "pbs.twimg.com" in url:
				url += ":orig"
			if url:
				print(url)
	"""Extracts media URLs from a tweet archive."""
	import glob
	import json

	for filename in glob.glob("tweets/*.js"):
	with open(filename, "r") as f:
	data = f.read()
	_, data = data.split("=", 1)
	tweets = json.loads(data)
	for tweet in tweets:
	medias = tweet.get("entities", {}).get("media", [])
	for m in medias:
	url = m.get("media_url_https", "")
	if "pbs.twimg.com" in url:
	url += ":orig"
	if url:
	print(url)