Skip to content

Instantly share code, notes, and snippets.

@Craftplacer
Created January 23, 2022 13:07
Show Gist options
  • Save Craftplacer/20d8095350544a5058daa74fde56f02e to your computer and use it in GitHub Desktop.
Save Craftplacer/20d8095350544a5058daa74fde56f02e to your computer and use it in GitHub Desktop.
Mastodon-compatible archiver

Mastodon-compatible archiver

A small C# program to dump/archive someone's account on your disk.

Usage

program (full path of folder where data should be stored) [-D]

-D causes the program to also delete the post after archiving.

Create 3 necessary files for operation in the working directory:

  • token: Authentication token (can be grabbed by peeking into frontend network requests)
  • instance: The raw instance domain
  • account: Account ID you want to archive
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Threading.Tasks;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
string token = File.ReadAllText("token").ReplaceLineEndings(string.Empty).Trim();
string instance = File.ReadAllText("instance").ReplaceLineEndings(string.Empty).Trim();
string account = File.ReadAllText("account").ReplaceLineEndings(string.Empty).Trim();
bool shouldDelete = args.Contains("-D");
using var http = new HttpClient();
http.BaseAddress = new Uri($"https://{instance}/api/v1/");
http.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token);
string? lastId = File.Exists("last-id") ? File.ReadAllText("last-id") : null;
int page = 0;
ulong count = 0;
int remoteCount = 0;
const int maxDeleteAttempts = 3;
const int maxMediaDownloadAttempts = 3;
const int threadCount = 3;
List<long> ticks = new();
if (shouldDelete)
{
Console.WriteLine("Running in archive & delete mode!");
}
else
{
Console.WriteLine("Running in archive mode");
}
while (true)
{
Console.WriteLine();
LogColored($"Page #{++page} ({count} statuses so far)", ConsoleColor.White);
PrintTimes();
Console.WriteLine();
remoteCount = await GetRemoteStatusCount();
string param = lastId == null ? string.Empty : $"?max_id={lastId}";
string response = await http.GetStringAsync($"accounts/{account}/statuses" + param);
var statuses = JArray.Parse(response)!;
if (statuses.Count == 0)
{
LogColored("Reached the end -- quitting", ConsoleColor.Green);
break;
}
lastId = statuses.Last()["id"]!.ToString();
ParallelOptions options = new() { MaxDegreeOfParallelism = threadCount };
await Parallel.ForEachAsync(statuses, options, async (token, _) => await ArchiveStatus(token));
}
async Task<int> GetRemoteStatusCount()
{
string credsResponse = await http.GetStringAsync($"accounts/{account}");
return (int)JObject.Parse(credsResponse)["statuses_count"]!;
}
async Task ArchiveStatus(JToken status)
{
Stopwatch stopwatch = new();
string id = (String)status["id"]!;
if (status["reblog"]!.Type != JTokenType.Null)
{
LogColored($"Skipping reblog {id}...", ConsoleColor.Magenta);
return;
}
stopwatch.Restart();
await SaveStatusAsync(status);
if (shouldDelete)
{
await TryDelete(id);
}
stopwatch.Stop();
ticks.Add(stopwatch.Elapsed.Ticks);
}
async Task TryDelete(string id)
{
for (var i = 0; i < maxDeleteAttempts; i++)
{
try
{
Console.WriteLine($"Deleting {id}...");
var deleteResponse = await http.DeleteAsync($"statuses/{id}");
deleteResponse.EnsureSuccessStatusCode();
count++;
break;
}
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.TooManyRequests)
{
LogColored("Waiting... (Too Many Requests)", ConsoleColor.Yellow);
await Task.Delay(5000);
}
catch (HttpRequestException ex) when (ex.StatusCode == HttpStatusCode.NotFound)
{
LogColored($"Status {id} already disappeared", ConsoleColor.Yellow);
break;
}
catch (Exception e)
{
LogColored($"Failed processing status {id}:\n{e}", ConsoleColor.Red);
if (i >= (maxDeleteAttempts - 1))
{
Debugger.Break();
}
else
{
int delay = (int)Math.Pow(50, i * .5d);
await Task.Delay(delay);
}
}
}
}
void LogColored(string message, ConsoleColor color)
{
Console.ForegroundColor = color;
Console.WriteLine(message);
Console.ResetColor();
}
void PrintTimes()
{
var avgTicks = ticks.Count != 0 ? ticks.Average() : 0;
var avg = new TimeSpan(ticks: (long)avgTicks);
var left = (ulong)remoteCount - count;
var eta = avg.Multiply((ulong)(left / (double)threadCount));
var ss = Math.Round(1 / avg.TotalSeconds, 2);
Console.WriteLine($"{ss} s/s; {left} left; ETA: {eta}; Avg: {avg}");
}
async Task SaveStatusAsync(JToken status)
{
string id = (String)status["id"]!;
Console.WriteLine($"Saving {id}...");
string directory = GetDirectory(id);
string filePath = Path.Combine(directory, id + ".json");
Directory.CreateDirectory(directory);
string json = JsonConvert.SerializeObject(status);
await File.WriteAllTextAsync(filePath, json);
await SaveAttachmentsAsync(status);
}
async Task SaveAttachmentsAsync(JToken status)
{
string id = (String)status["id"]!;
string statusDirectory = Path.Combine(GetDirectory(id), id);
var attachments = status["media_attachments"]!;
if (attachments.Any())
{
Directory.CreateDirectory(statusDirectory);
}
foreach (var attachment in attachments)
{
String path = Path.Combine(statusDirectory, (String)attachment["id"]!);
String url = (String)attachment["remote_url"]!;
for (var i = 0; i < maxMediaDownloadAttempts; i++) {
try
{
using (var stream = await http.GetStreamAsync(url))
using (var fileStream = new FileStream(path, FileMode.Create, FileAccess.Write))
{
await stream.CopyToAsync(fileStream);
}
break;
}
catch
{
LogColored($"Fail downloading attachment ({url}) - Attempt #{i+1}", ConsoleColor.Red);
}
}
}
}
string GetDirectory(string id)
{
string part1 = id.Substring(0, 2);
string part2 = id.Substring(2, 2);
return Path.Combine(args[0], part1, part2);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment