Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
reverse-engineered bit-for-bit identical Discourse archives
import gzip
import shutil
with open("dump.sql", "rb") as infile:
with open("dump.sql.gz", "wb") as outfile:
gzip_outfile = gzip.GzipFile(
"", # empty file name in raw gzip header
compresslevel=4, # passed by Discourse to `pg_dump`
with gzip_outfile:
shutil.copyfileobj(infile, gzip_outfile)
with open("dump.sql.gz", "rb+") as outfile: # poke bytes without truncating # index of (XFL, OS); see RFC 1952, section 2.3.1
outfile.write(b"\x00\x03") # XFL = tradeoff compression ratio; OS = unix
import tarfile
# Monkey patch for <>
# Set `tarinfo=HackedTarInfo` in `` if using an old CPython
class HackedTarInfo(tarfile.TarInfo):
def _create_header(info, format, encoding, errors):
buf = tarfile.TarInfo._create_header(info, format, encoding, errors)
modifications = [
(0x148, b"\x00" * 16),
for (start, splice) in modifications:
buf = buf[:start] + splice + buf[start + len(splice) :]
chksum = tarfile.calc_chksums(buf[-tarfile.BLOCKSIZE :])[0]
buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
return buf
with"XXXXXXXX.tar", mode="x", format=tarfile.GNU_FORMAT) as outfile:
with open("dump.sql.gz", "rb") as infile:
info = outfile.gettarinfo("dump.sql.gz", fileobj=infile)
info.uid = 0o1753
info.gid = 0o0041
info.mode = 0o644
info.uname = "discourse"
info.gname = "www-data"
info.mtime = int("XXXXXXXXXX") # fixme: install actual mtime
with open("dump.sql.gz", "rb") as infile:
outfile.addfile(info, fileobj=infile)
gzip --fast -k XXXXXXXX.tar
printf '\xAA\xBB\xCC\xDD' | dd of=XXXXXXXX.tar.gz conv=notrunc bs=1 seek=4
# ^ replace bytes with actual mtime (Unix format)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.