Skip to content

Instantly share code, notes, and snippets.

@impiaaa
Last active October 23, 2023 15:01
Show Gist options
  • Save impiaaa/af2c9f69fe30aff83acd7f3183bcd2c9 to your computer and use it in GitHub Desktop.
Save impiaaa/af2c9f69fe30aff83acd7f3183bcd2c9 to your computer and use it in GitHub Desktop.
Multi-instance Mastodon hashtag search
#!/usr/bin/env python3
# -- CONFIGURATION --
# Your home instance
myinstance = "octodon.social"
# Create an "application" in the "development" section in account settings, if
# you haven't already. Paste the client key, client secret, and access token
# here.
clientId=""
clientSecret=""
accessToken=""
# Hashtags that you want to search (in quotes, separated by commas)
tags = {"gamedev", "landscape"}
# "Seed" instances. For each tag, it will start searching with these, and then
# branch out and also search any instances found in those previous searches.
# Includes your home instance by default. (in quotes, separated by commas)
instances = {"mastodon.art", "mastodon.gamedev.place"}
# If you've done a search previously, you can set this to True to search all
# instances previously discovered, for all tags.
exhaustive = False
# These domains will not be searched, and statuses from them will not be
# resolved. (in quotes, separated by commas)
blockedDomains = {"artalley.porn", "birdsite.link"}
# These domains will not be searched, but statuses from them found on other
# instances will be resolved.
dontSearch = {}
# Maximum amount of time in the past to restrict the search.
from datetime import timedelta
maxSearchTime = timedelta(days=8)
# -- END CONFIGURATION --
# Setup
import codecs, json, urllib.error
from datetime import datetime
from mastodon import Mastodon, MastodonAPIError
from urllib.parse import urlparse, quote_plus
from urllib.request import urlopen
import ssl
m = Mastodon(api_base_url="https://"+myinstance,
client_id=clientId,
client_secret=clientSecret,
access_token=accessToken)
try:
resolvedStatuses = set(open("resolvedstatuses.txt").read().splitlines())
except FileNotFoundError:
resolvedStatuses = set()
if exhaustive:
instances.update({urlparse(line).netloc for line in resolvedStatuses})
instances.difference_update(dontSearch)
instances.remove(myinstance)
weekago = datetime.now()-maxSearchTime
for tag in tags:
print("Tag:", tag)
instancesToSearch = [myinstance]+list(instances)
searchedInstances = set()
while len(instancesToSearch) > 0:
instance = instancesToSearch.pop(0)
print(" Instance:", instance)
searchedInstances.add(instance)
status = None
while status is None or datetime.strptime(status['created_at'][:19], "%Y-%m-%dT%H:%M:%S") > weekago:
url = "https://{instance}/api/v1/timelines/tag/{tag}".format(instance=instance, tag=quote_plus(tag))
if status is not None:
url += "?max_id="+status['id']
try:
resp = urlopen(url, timeout=30)
except Exception as e:
print(" Failed to open:", e)
dontSearch.add(self.instance)
break
headers = resp.info()
if "Content-Type" in headers and "charset=" in headers["Content-Type"].casefold():
contenttype = headers["Content-Type"].casefold()
ctparms = dict([[kv.strip() for kv in p.split('=')] for p in contenttype[contenttype.find(';')+1:].split(';')])
encoding = ctparms["charset"]
else:
encoding = 'latin1'
try:
statuses = json.load(codecs.getreader(encoding)(resp))
except Exception as e:
print(" Error loading statuses:", e)
dontSearch.add(self.instance)
break
print(" Got", len(statuses), "statuses")
if len(statuses) == 0:
break
for status in statuses:
otherInstance = urlparse(status['url']).netloc
if otherInstance in blockedDomains:
continue
if otherInstance not in searchedInstances and otherInstance not in instancesToSearch and otherInstance not in dontSearch:
print(" Will also search", otherInstance)
instancesToSearch.append(otherInstance)
if status['url'] in resolvedStatuses:
continue
if otherInstance == myinstance or instance == myinstance:
resolvedStatuses.add(status['url'])
else:
try:
m.search_v1(status['url'], resolve=True)
print(" Resolved", status['url'])
resolvedStatuses.add(status['url'])
open("resolvedstatuses.txt", 'a').write(status['url']+'\n')
except MastodonAPIError as e:
print(" ", e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment