Skip to content

Instantly share code, notes, and snippets.

@bepvte
Created April 8, 2024 13:43
Show Gist options
  • Save bepvte/5fd9979cb8b8fe576a6a94262fc1a862 to your computer and use it in GitHub Desktop.
Save bepvte/5fd9979cb8b8fe576a6a94262fc1a862 to your computer and use it in GitHub Desktop.
organize a folder of twitter likes by date posted?
#!/usr/bin/env python3
import re
from pathlib import Path
import shutil
from datetime import datetime
from os import walk, scandir
from pprint import pprint
import sqlite3
idpat = re.compile(r"^twitter_@[\w-]+_(\d+)_(\d).*$", re.ASCII)
def path_to_date(p: str) -> datetime:
id = int(idpat.search(p)[1])
if id < 29700859247:
raise Exception("wat da")
offset = 1288834974657
return datetime.fromtimestamp(((id >> 22)+offset) / 1000)
def mover():
made_folder = {}
for item in scandir("./out"):
if item.is_dir():
continue
# date = path_to_date(item.name)
date = datetime.fromtimestamp(item.stat().st_mtime)
newfolder = Path("./out", date.strftime("%Y-%m"))
if made_folder.get(str(newfolder)) is None:
newfolder.mkdir(exist_ok=True)
made_folder[str(newfolder)] = True
shutil.move(item.path, newfolder)
# print(f"would move {item.name} to {newfolder}/{item.name}")
def sqliter():
idtodir = {}
PREFIX = Path("/mnt/c/Users/bepvte/twitterlikes/out")
for (dirpath, dirs, files) in walk("./out"):
for file in files:
match = idpat.search(file)
id = int(match[1]+match[2])
idtodir[id] = Path(dirpath).name
changes = {}
con = sqlite3.connect("/home/bep/.local/share/rclip/openclip.sqlite3", isolation_level=None)
con.set_trace_callback(print)
cur = con.cursor()
cur.arraysize = 400
cur = cur.execute(f"SELECT id, filepath FROM images WHERE filepath LIKE '{PREFIX}/%' AND filepath NOT LIKE '%/202_-__/%'")
while res := cur.fetchmany():
for x in res:
match = idpat.search(Path(x[1]).name)
id = int(match[1]+match[2])
if id not in idtodir:
continue
changes[x[0]] = idtodir[id]
del res
cur = con.cursor()
changed = 0
cur.execute("BEGIN")
for (dbid, dir) in changes.items():
newpath = PREFIX.joinpath(dir)
cur.execute(f"UPDATE images SET filepath = replace(filepath, '{PREFIX}', ?) WHERE id = ?", (newpath.as_posix(), dbid))
changed += 1;
cur.execute("COMMIT")
con.close()
def archiver():
from more_itertools import chunked
PREFIX = Path("c:/Users/bepvte/twitterlikes")
ids = set()
for (dirpath, dirs, files) in walk("./out"):
for file in files:
match = idpat.search(file)
key = f"twitter{match[1]}_0_{match[2]}"
ids.add(key)
con = sqlite3.connect(PREFIX.joinpath("./archive.sqlite3"), isolation_level=None)
# con.set_trace_callback(print)
cur = con.cursor()
cur.arraysize = 400
cur.execute("BEGIN")
count = 0
for elems in chunked(ids, 100):
cur.executemany("INSERT INTO archive VALUES (?)", ((x,) for x in elems))
count += len(elems)
print(count)
cur.execute("COMMIT")
con.close()
mover()
# archiver()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment