impiaaa/extrahashtags.py

## extrahashtags.py
#!/usr/bin/env python3

# -- CONFIGURATION --

# Your home instance
myinstance = "octodon.social"

# Create an "application" in the "development" section in account settings, if
# you haven't already. Paste the client key, client secret, and access token
# here.
clientId=""
clientSecret=""
accessToken=""

# Hashtags that you want to search (in quotes, separated by commas)
tags = {"gamedev", "landscape"}

# "Seed" instances. For each tag, it will start searching with these, and then
# branch out and also search any instances found in those previous searches.
# Includes your home instance by default. (in quotes, separated by commas)
instances = {"mastodon.art", "mastodon.gamedev.place"}

# If you've done a search previously, you can set this to True to search all
# instances previously discovered, for all tags.
exhaustive = False

# These domains will not be searched, and statuses from them will not be
# resolved. (in quotes, separated by commas)
blockedDomains = {"artalley.porn", "birdsite.link"}

# These domains will not be searched, but statuses from them found on other
# instances will be resolved.
dontSearch = {}

# Maximum amount of time in the past to restrict the search.
from datetime import timedelta
maxSearchTime = timedelta(days=8)

# -- END CONFIGURATION --

# Setup
import codecs, json, urllib.error
from datetime import datetime
from mastodon import Mastodon, MastodonAPIError
from urllib.parse import urlparse, quote_plus
from urllib.request import urlopen
import ssl

m = Mastodon(api_base_url="https://"+myinstance,
    client_id=clientId,
    client_secret=clientSecret,
    access_token=accessToken)
try:
    resolvedStatuses = set(open("resolvedstatuses.txt").read().splitlines())
except FileNotFoundError:
    resolvedStatuses = set()
if exhaustive:
    instances.update({urlparse(line).netloc for line in resolvedStatuses})
    instances.difference_update(dontSearch)
instances.remove(myinstance)
weekago = datetime.now()-maxSearchTime

for tag in tags:
    print("Tag:", tag)
    instancesToSearch = [myinstance]+list(instances)
    searchedInstances = set()
    while len(instancesToSearch) > 0:
        instance = instancesToSearch.pop(0)
        print("  Instance:", instance)
        searchedInstances.add(instance)
        status = None
        while status is None or datetime.strptime(status['created_at'][:19], "%Y-%m-%dT%H:%M:%S") > weekago:
            url = "https://{instance}/api/v1/timelines/tag/{tag}".format(instance=instance, tag=quote_plus(tag))
            if status is not None:
                url += "?max_id="+status['id']
            try:
                resp = urlopen(url, timeout=30)
            except Exception as e:
                print("  Failed to open:", e)
                dontSearch.add(self.instance)
                break
            headers = resp.info()
            if "Content-Type" in headers and "charset=" in headers["Content-Type"].casefold():
                contenttype = headers["Content-Type"].casefold()
                ctparms = dict([[kv.strip() for kv in p.split('=')] for p in contenttype[contenttype.find(';')+1:].split(';')])
                encoding = ctparms["charset"]
            else:
                encoding = 'latin1'
            try:
                statuses = json.load(codecs.getreader(encoding)(resp))
            except Exception as e:
                print("  Error loading statuses:", e)
                dontSearch.add(self.instance)
                break
            print("  Got", len(statuses), "statuses")
            if len(statuses) == 0:
                break
            for status in statuses:
                otherInstance = urlparse(status['url']).netloc
                if otherInstance in blockedDomains:
                    continue
                if otherInstance not in searchedInstances and otherInstance not in instancesToSearch and otherInstance not in dontSearch:
                    print("    Will also search", otherInstance)
                    instancesToSearch.append(otherInstance)
                if status['url'] in resolvedStatuses:
                    continue
                if otherInstance == myinstance or instance == myinstance:
                    resolvedStatuses.add(status['url'])
                else:
                    try:
                        m.search_v1(status['url'], resolve=True)
                        print("    Resolved", status['url'])
                        resolvedStatuses.add(status['url'])
                        open("resolvedstatuses.txt", 'a').write(status['url']+'\n')
                    except MastodonAPIError as e:
                        print("   ", e)
	#!/usr/bin/env python3

	# -- CONFIGURATION --

	# Your home instance
	myinstance = "octodon.social"

	# Create an "application" in the "development" section in account settings, if
	# you haven't already. Paste the client key, client secret, and access token
	# here.
	clientId=""
	clientSecret=""
	accessToken=""

	# Hashtags that you want to search (in quotes, separated by commas)
	tags = {"gamedev", "landscape"}

	# "Seed" instances. For each tag, it will start searching with these, and then
	# branch out and also search any instances found in those previous searches.
	# Includes your home instance by default. (in quotes, separated by commas)
	instances = {"mastodon.art", "mastodon.gamedev.place"}

	# If you've done a search previously, you can set this to True to search all
	# instances previously discovered, for all tags.
	exhaustive = False

	# These domains will not be searched, and statuses from them will not be
	# resolved. (in quotes, separated by commas)
	blockedDomains = {"artalley.porn", "birdsite.link"}

	# These domains will not be searched, but statuses from them found on other
	# instances will be resolved.
	dontSearch = {}

	# Maximum amount of time in the past to restrict the search.
	from datetime import timedelta
	maxSearchTime = timedelta(days=8)

	# -- END CONFIGURATION --

	# Setup
	import codecs, json, urllib.error
	from datetime import datetime
	from mastodon import Mastodon, MastodonAPIError
	from urllib.parse import urlparse, quote_plus
	from urllib.request import urlopen
	import ssl

	m = Mastodon(api_base_url="https://"+myinstance,
	client_id=clientId,
	client_secret=clientSecret,
	access_token=accessToken)
	try:
	resolvedStatuses = set(open("resolvedstatuses.txt").read().splitlines())
	except FileNotFoundError:
	resolvedStatuses = set()
	if exhaustive:
	instances.update({urlparse(line).netloc for line in resolvedStatuses})
	instances.difference_update(dontSearch)
	instances.remove(myinstance)
	weekago = datetime.now()-maxSearchTime

	for tag in tags:
	print("Tag:", tag)
	instancesToSearch = [myinstance]+list(instances)
	searchedInstances = set()
	while len(instancesToSearch) > 0:
	instance = instancesToSearch.pop(0)
	print(" Instance:", instance)
	searchedInstances.add(instance)
	status = None
	while status is None or datetime.strptime(status['created_at'][:19], "%Y-%m-%dT%H:%M:%S") > weekago:
	url = "https://{instance}/api/v1/timelines/tag/{tag}".format(instance=instance, tag=quote_plus(tag))
	if status is not None:
	url += "?max_id="+status['id']
	try:
	resp = urlopen(url, timeout=30)
	except Exception as e:
	print(" Failed to open:", e)
	dontSearch.add(self.instance)
	break
	headers = resp.info()
	if "Content-Type" in headers and "charset=" in headers["Content-Type"].casefold():
	contenttype = headers["Content-Type"].casefold()
	ctparms = dict([[kv.strip() for kv in p.split('=')] for p in contenttype[contenttype.find(';')+1:].split(';')])
	encoding = ctparms["charset"]
	else:
	encoding = 'latin1'
	try:
	statuses = json.load(codecs.getreader(encoding)(resp))
	except Exception as e:
	print(" Error loading statuses:", e)
	dontSearch.add(self.instance)
	break
	print(" Got", len(statuses), "statuses")
	if len(statuses) == 0:
	break
	for status in statuses:
	otherInstance = urlparse(status['url']).netloc
	if otherInstance in blockedDomains:
	continue
	if otherInstance not in searchedInstances and otherInstance not in instancesToSearch and otherInstance not in dontSearch:
	print(" Will also search", otherInstance)
	instancesToSearch.append(otherInstance)
	if status['url'] in resolvedStatuses:
	continue
	if otherInstance == myinstance or instance == myinstance:
	resolvedStatuses.add(status['url'])
	else:
	try:
	m.search_v1(status['url'], resolve=True)
	print(" Resolved", status['url'])
	resolvedStatuses.add(status['url'])
	open("resolvedstatuses.txt", 'a').write(status['url']+'\n')
	except MastodonAPIError as e:
	print(" ", e)