Instantly share code, notes, and snippets.
Last active
October 23, 2023 15:01
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save impiaaa/af2c9f69fe30aff83acd7f3183bcd2c9 to your computer and use it in GitHub Desktop.
Multi-instance Mastodon hashtag search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -- CONFIGURATION -- | |
# Your home instance | |
myinstance = "octodon.social" | |
# Create an "application" in the "development" section in account settings, if | |
# you haven't already. Paste the client key, client secret, and access token | |
# here. | |
clientId="" | |
clientSecret="" | |
accessToken="" | |
# Hashtags that you want to search (in quotes, separated by commas) | |
tags = {"gamedev", "landscape"} | |
# "Seed" instances. For each tag, it will start searching with these, and then | |
# branch out and also search any instances found in those previous searches. | |
# Includes your home instance by default. (in quotes, separated by commas) | |
instances = {"mastodon.art", "mastodon.gamedev.place"} | |
# If you've done a search previously, you can set this to True to search all | |
# instances previously discovered, for all tags. | |
exhaustive = False | |
# These domains will not be searched, and statuses from them will not be | |
# resolved. (in quotes, separated by commas) | |
blockedDomains = {"artalley.porn", "birdsite.link"} | |
# These domains will not be searched, but statuses from them found on other | |
# instances will be resolved. | |
dontSearch = {} | |
# Maximum amount of time in the past to restrict the search. | |
from datetime import timedelta | |
maxSearchTime = timedelta(days=8) | |
# -- END CONFIGURATION -- | |
# Setup | |
import codecs, json, urllib.error | |
from datetime import datetime | |
from mastodon import Mastodon, MastodonAPIError | |
from urllib.parse import urlparse, quote_plus | |
from urllib.request import urlopen | |
import ssl | |
m = Mastodon(api_base_url="https://"+myinstance, | |
client_id=clientId, | |
client_secret=clientSecret, | |
access_token=accessToken) | |
try: | |
resolvedStatuses = set(open("resolvedstatuses.txt").read().splitlines()) | |
except FileNotFoundError: | |
resolvedStatuses = set() | |
if exhaustive: | |
instances.update({urlparse(line).netloc for line in resolvedStatuses}) | |
instances.difference_update(dontSearch) | |
instances.remove(myinstance) | |
weekago = datetime.now()-maxSearchTime | |
for tag in tags: | |
print("Tag:", tag) | |
instancesToSearch = [myinstance]+list(instances) | |
searchedInstances = set() | |
while len(instancesToSearch) > 0: | |
instance = instancesToSearch.pop(0) | |
print(" Instance:", instance) | |
searchedInstances.add(instance) | |
status = None | |
while status is None or datetime.strptime(status['created_at'][:19], "%Y-%m-%dT%H:%M:%S") > weekago: | |
url = "https://{instance}/api/v1/timelines/tag/{tag}".format(instance=instance, tag=quote_plus(tag)) | |
if status is not None: | |
url += "?max_id="+status['id'] | |
try: | |
resp = urlopen(url, timeout=30) | |
except Exception as e: | |
print(" Failed to open:", e) | |
dontSearch.add(self.instance) | |
break | |
headers = resp.info() | |
if "Content-Type" in headers and "charset=" in headers["Content-Type"].casefold(): | |
contenttype = headers["Content-Type"].casefold() | |
ctparms = dict([[kv.strip() for kv in p.split('=')] for p in contenttype[contenttype.find(';')+1:].split(';')]) | |
encoding = ctparms["charset"] | |
else: | |
encoding = 'latin1' | |
try: | |
statuses = json.load(codecs.getreader(encoding)(resp)) | |
except Exception as e: | |
print(" Error loading statuses:", e) | |
dontSearch.add(self.instance) | |
break | |
print(" Got", len(statuses), "statuses") | |
if len(statuses) == 0: | |
break | |
for status in statuses: | |
otherInstance = urlparse(status['url']).netloc | |
if otherInstance in blockedDomains: | |
continue | |
if otherInstance not in searchedInstances and otherInstance not in instancesToSearch and otherInstance not in dontSearch: | |
print(" Will also search", otherInstance) | |
instancesToSearch.append(otherInstance) | |
if status['url'] in resolvedStatuses: | |
continue | |
if otherInstance == myinstance or instance == myinstance: | |
resolvedStatuses.add(status['url']) | |
else: | |
try: | |
m.search_v1(status['url'], resolve=True) | |
print(" Resolved", status['url']) | |
resolvedStatuses.add(status['url']) | |
open("resolvedstatuses.txt", 'a').write(status['url']+'\n') | |
except MastodonAPIError as e: | |
print(" ", e) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment