Skip to content

Instantly share code, notes, and snippets.

@wchargin
Created January 20, 2020 17:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wchargin/3906cf9a1f9c9d58909de1b59ce1da6c to your computer and use it in GitHub Desktop.
Save wchargin/3906cf9a1f9c9d58909de1b59ce1da6c to your computer and use it in GitHub Desktop.
reverse-engineered bit-for-bit identical Discourse archives
import gzip
import shutil
with open("dump.sql", "rb") as infile:
with open("dump.sql.gz", "wb") as outfile:
gzip_outfile = gzip.GzipFile(
"", # empty file name in raw gzip header
fileobj=outfile,
compresslevel=4, # passed by Discourse to `pg_dump`
mtime=0,
)
with gzip_outfile:
shutil.copyfileobj(infile, gzip_outfile)
with open("dump.sql.gz", "rb+") as outfile: # poke bytes without truncating
outfile.seek(8) # index of (XFL, OS); see RFC 1952, section 2.3.1
outfile.write(b"\x00\x03") # XFL = tradeoff compression ratio; OS = unix
import tarfile
# Monkey patch for <https://bugs.python.org/issue18819>
# Set `tarinfo=HackedTarInfo` in `tarfile.open` if using an old CPython
class HackedTarInfo(tarfile.TarInfo):
@staticmethod
def _create_header(info, format, encoding, errors):
buf = tarfile.TarInfo._create_header(info, format, encoding, errors)
modifications = [
(0x148, b"\x00" * 16),
]
for (start, splice) in modifications:
buf = buf[:start] + splice + buf[start + len(splice) :]
chksum = tarfile.calc_chksums(buf[-tarfile.BLOCKSIZE :])[0]
buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
return buf
with tarfile.open("XXXXXXXX.tar", mode="x", format=tarfile.GNU_FORMAT) as outfile:
with open("dump.sql.gz", "rb") as infile:
info = outfile.gettarinfo("dump.sql.gz", fileobj=infile)
info.uid = 0o1753
info.gid = 0o0041
info.mode = 0o644
info.uname = "discourse"
info.gname = "www-data"
info.mtime = int("XXXXXXXXXX") # fixme: install actual mtime
print(info.type)
print(info.ischr())
print(info.isblk())
print(info.isreg())
with open("dump.sql.gz", "rb") as infile:
outfile.addfile(info, fileobj=infile)
gzip --fast -k XXXXXXXX.tar
printf '\xAA\xBB\xCC\xDD' | dd of=XXXXXXXX.tar.gz conv=notrunc bs=1 seek=4
# ^ replace bytes with actual mtime (Unix format)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment