Skip to content

Instantly share code, notes, and snippets.

@hakanai
Last active April 29, 2019 11:07
Show Gist options
  • Save hakanai/4335ef83a4895824b3a8ff3dc3f93232 to your computer and use it in GitHub Desktop.
Save hakanai/4335ef83a4895824b3a8ff3dc3f93232 to your computer and use it in GitHub Desktop.
Another Mastodon Toots Exporter in Python 3.
#!/usr/bin/env python3
#
# TootsExporter.py
# - Another Mastodon Toots Exporter in Python 3.
#
# Dependency:
# - pip(3) install "requests[socks]"
#
# License: MIT
import os
import requests
import json
import csv
import re
import shutil
from os.path import isfile
from time import sleep
from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
proxies = {
# 'http': 'socks5://127.0.0.1:1080',
# 'https': 'socks5://127.0.0.1:1080',
} # For the Great Firewall
def atomic_write(data, filename):
temp_file = filename + '.tmp'
f = open(temp_file, 'w')
json.dump(data, f)
f.flush()
os.fsync(f.fileno())
f.close()
os.rename(temp_file, filename)
retries = Retry(total=6, backoff_factor=0.1, read=3, connect=3,
status_forcelist=[500, 502, 503, 504])
rq = requests.Session()
rq.mount('https://', HTTPAdapter(max_retries=retries))
rq.proxies = proxies
rq.timeout = 3
# 0. Load local data
if isfile('Toots.json'):
print('Loading Toots.json...')
toots_data = json.load(open('Toots.json'))
since_id = str(toots_data[-1]['id'])
print(str(len(toots_data)) + ' toots have been loaded.')
else:
toots_data = []
since_id = '0'
# 1. Authentication
if isfile('Auth.json'):
print('Loading Auth.json...')
auth = json.load(open('Auth.json'))
client_id = auth['client_id']
client_secret = auth['client_secret']
access_token = auth['access_token']
instance = auth['instance']
headers = auth['headers']
print('Checking access token...')
test = rq.get(instance + '/api/v1/accounts/verify_credentials',
proxies=proxies, headers=headers, timeout=3).json()
if test.get('error', False):
print('Your access token has been outdated.')
login = False
else:
print('Your access token is still valid!')
login = True
rq.mount(instance, HTTPAdapter(max_retries=retries))
rq.headers.update(headers)
else:
auth = {}
client_id = ''
client_secret = ''
access_token = ''
instance = ''
headers = {}
login = False
if not login:
if not instance:
# 1.1 Connect to instance
print('Please enter the link to your Mastodon instance.')
print('e.g. https://pawoo.net , then press Enter to continue.')
instance = 'https://' + input('Link: ').replace('http://', '')\
.replace('https://', '').split('/')[0].strip()
print('Connecting to ' + instance + ' ...')
test = rq.get(instance + '/api/v1/instance').json()
if test.get('title', False):
print('Success.')
else:
print('Failed, exiting...')
exit()
rq.mount(instance, HTTPAdapter(max_retries=retries))
if not client_id:
# 1.2 Apply for a new app
print('Applying for a new app...')
payload = {'client_name': 'TootsExporter',
'redirect_uris': 'urn:ietf:wg:oauth:2.0:oob',
'scopes': 'read'}
test = rq.post(instance + '/api/v1/apps', data=payload).json()
if test.get('Error', False):
print('Failed, exiting...')
exit()
else:
print('Success.')
client_id = test['client_id']
client_secret = test['client_secret']
# 1.3 Login with Authentication Code
print('Now please open the link below in browser to authorize this app:')
oauth_uri = instance + '/oauth/authorize' + \
'?scope=read&response_type=code' + \
'&redirect_uri=urn:ietf:wg:oauth:2.0:oob' + \
'&client_id=' + client_id
print(oauth_uri)
print('After authentication, please copy the code in the web page')
print('and paste below, then press Enter to continue.')
auth_code = input('Authorization Code: ').strip()
params = {'client_id': client_id,
'client_secret': client_secret,
'grant_type': 'authorization_code',
'code': auth_code,
'redirect_uri': 'urn:ietf:wg:oauth:2.0:oob'}
test = rq.post(instance + '/oauth/token', params=params).json()
if test.get('access_token', False):
print('Login successfully.')
else:
print('Failed, exiting...')
exit()
access_token = test['access_token']
headers = {'Authorization': 'Bearer ' + access_token}
rq.headers.update(headers)
auth = {'client_id': client_id,
'client_secret': client_secret,
'instance': instance,
'access_token': access_token,
'headers': headers}
atomic_write(auth, 'Auth.json')
print('Auth info has been saved to `Auth.json`.')
# 2. Get the target user
if isfile('User.json'):
print('Loading User.json...')
user_data = json.load(open('User.json'))
username = user_data['username']
else:
print('\nNow please enter the username of the account you want to export,')
print('e.g. @pixiv, then press Enter to continue.')
print('If you want to export your toots, just press Enter.')
print('If you want to export someone else, the amount of toots you can')
print('export is always smaller than the total count.')
username = input('Username: ').replace('@', '').strip()
user_data = {'username': username}
atomic_write(user_data, 'User.json')
print('User info has been saved to `User.json`.')
if username == '':
test = rq.get(instance + '/api/v1/accounts/verify_credentials').json()
userid = str(test['id'])
else:
test = re.search(r'/api/salmon/(\d+)',
rq.get(instance + '/@' + username).text)
userid = test.group(1)
test = rq.get(instance + '/api/v1/accounts/' + userid).json()
print('Username: ' + test['username'])
print('ID: ' + str(test['id']))
print('Created at: ' + test['created_at'])
print('Toots:' + str(test['statuses_count']))
# 3. Export all toots
print('\nExporting toots, please wait...')
test = rq.get(instance + '/api/v1/accounts/' + userid + \
'/statuses?limit=40&since_id=' + since_id).json()
toots = []
while len(test) > 0:
for i in test:
i.pop('account', None)
toots.append(i)
print(str(len(toots)) + ' toots have been exported.')
max_id = str(test[-1]['id'])
sleep(1)
test = rq.get(instance + '/api/v1/accounts/' + userid + \
'/statuses?limit=40&max_id=' + \
max_id + '&since_id=' + since_id).json()
toots_data.extend(toots.__reversed__())
print('Total: ' + str(len(toots_data)))
# 4. Save to local files
atomic_write(toots_data, 'Toots.json')
toots_csv = open('Toots.csv', 'w', errors='ignore')
csv_writer = csv.writer(toots_csv)
csv_writer.writerow(['id', 'url', 'content', 'visibility', 'date', 'media'])
for i in toots_data:
csv_writer.writerow([
i['id'],
i['url'],
re.sub(r'<.*?>', '', i['content'].replace('</p>', '\n').replace('<br', '\n<br')).strip(),
i['visibility'],
i['created_at'],
(lambda x: '\n'.join([j['url'] for j in x]) if len(x) else '')(i['media_attachments'])
])
toots_csv.close()
print('Success. All toots have been exported to `Toots.json` and `Toots.csv`.')
if isfile('all_media.json'):
print('Importing from all_media.json...')
all_media = json.load(open('all_media.json'))
else:
all_media = []
count = len(all_media)
for i in toots.__reversed__():
if len(i['media_attachments']):
for j in i['media_attachments']:
all_media.append(j['url'])
print('Total: ' + str(len(all_media)) + ', ' +
str(len(all_media) - count) + ' need to be downloaded.')
if len(all_media) - count > 0:
atomic_write(all_media, 'all_media.json')
rq.mount('https://' + all_media[-1].replace('http://', '') \
.replace('https://','').strip().split('/')[0], \
HTTPAdapter(max_retries=retries))
for i in range(count, len(all_media)):
filename = str(i + 1) + '.' + all_media[i].split('.')[-1]
fb = rq.get(all_media[i], stream=True, proxies=proxies, timeout=3)
with open(filename, 'wb') as f:
shutil.copyfileobj(fb.raw, f)
print(filename + ' OK')
sleep(1)
print('Success.')

Q&A

1. Why?

Because in the latest release of Mastodon(v2.1.0) there is still no way to export your toots and media files.

2. How to use it?

Just download it and open your terminal:

pip(3) install "requests[socks]"
python(3) TootsExporter.py

For Windows users you can use Linux subsystem on Windows 10 or Python Release for Windows.

Note that in Linux subsystem on Windows 10, the file encoding will be changed to UTF-8, while ANSI in Python Release for Windows. The CSV file encoded in ANSI or Unicode can be opened in Excel directly, so you may need to convert the encoding via Notepad.exe manually. And since ANSI cannot encode emojis, all emojis in your toots may be lost.

For Chinese user, a robust proxy may be necessary. If you always fail in SSL handshake errors, please open your Shadowsocks and remove the # sign in line 22 and 23.

3. Why do I need to copy the authentication code manually?

Well, here is the sad story: mastodon/documentation#485

And more info: https://stackoverflow.com/questions/17427707/whats-the-right-oauth-2-0-flow-for-a-mobile-app

4. What can I do without authentication in Mastodon API V1?

Well you can...

import requests

# 1. Get instance information
requests.get('https://pawoo.net' + '/api/v1/instance').json()

# 2. All about a specific status
# Get the status id by finding out link of the toot 'https://xxx.xx/web/statuses/:id'
requests.get('https://pawoo.net' + '/api/v1/statuses/' + toot_id).json()
requests.get('https://pawoo.net' + '/api/v1/statuses/' + toot_id + '/context').json()
requests.get('https://pawoo.net' + '/api/v1/statuses/' + toot_id + '/card').json()
requests.get('https://pawoo.net' + '/api/v1/statuses/' + toot_id + '/reblogged_by').json()
requests.get('https://pawoo.net' + '/api/v1/statuses/' + toot_id + '/favourited_by').json()

# 3. Retrieve public timeline
requests.get('https://pawoo.net' + '/api/v1/timelines/public').json()

# 4. Retrieve tag timeline
requests.get('https://pawoo.net' + '/api/v1/timelines/tag/' + hashtag).json()

# 5. Fetch custom emojis (Useless)
requests.get('https://pawoo.net' + '/api/v1/custom_emojis').json()

Now since you have the access token in Auth.json, you can play around with the full API. Good luck!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment