Skip to content

Instantly share code, notes, and snippets.

@LongHairedHacker
Last active November 19, 2022 16:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save LongHairedHacker/360ea2005c5a53bb126d4097f9e801e7 to your computer and use it in GitHub Desktop.
Save LongHairedHacker/360ea2005c5a53bb126d4097f9e801e7 to your computer and use it in GitHub Desktop.
Extend all the t.co short links in a twitter backup
#!/usr/bin/env python3
import os
from os import path
import sys
import re
import requests
import shutil
url_cache = dict()
def resolve_url(url):
global url_cache
if not url in url_cache.keys():
try:
r = requests.get(url, allow_redirects=False)
if r.status_code == 301:
url_cache[url] = r.headers['Location']
else:
url_cache[url] = url
except UnicodeDecodeError:
return url
return url_cache[url]
def replace_links_in_file(filepath):
URL_REGEX = re.compile(r".*(https?:\/\/t\.co/[a-zA-Z0-9]*).*")
print("Processing %s" % (filepath))
infile = open(filepath, "r")
outfile = open(filepath + "_tmp", "w")
for line in infile:
match = URL_REGEX.match(line)
if match is not None:
url = match.group(1)
new_url = resolve_url(url)
print("%s -> %s" % (url, new_url))
line = line.replace(url, new_url)
outfile.write(line)
outfile.close()
infile.close()
shutil.move(filepath + "_tmp", filepath)
print("Done %s" % (filepath))
def main():
if len(sys.argv) != 2:
print("%s <backup directory>" % (sys.argv[0]))
sys.exit(1)
backup_dir = sys.argv[1]
replace_links_in_file(path.join(backup_dir, "data/direct-messages-group.js"))
replace_links_in_file(path.join(backup_dir, "data/direct-messages.js"))
replace_links_in_file(path.join(backup_dir, "data/profile.js"))
replace_links_in_file(path.join(backup_dir, "data/like.js"))
replace_links_in_file(path.join(backup_dir, "data/tweets.js"))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment