Created
January 20, 2020 17:48
-
-
Save wchargin/3906cf9a1f9c9d58909de1b59ce1da6c to your computer and use it in GitHub Desktop.
reverse-engineered bit-for-bit identical Discourse archives
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gzip | |
import shutil | |
with open("dump.sql", "rb") as infile: | |
with open("dump.sql.gz", "wb") as outfile: | |
gzip_outfile = gzip.GzipFile( | |
"", # empty file name in raw gzip header | |
fileobj=outfile, | |
compresslevel=4, # passed by Discourse to `pg_dump` | |
mtime=0, | |
) | |
with gzip_outfile: | |
shutil.copyfileobj(infile, gzip_outfile) | |
with open("dump.sql.gz", "rb+") as outfile: # poke bytes without truncating | |
outfile.seek(8) # index of (XFL, OS); see RFC 1952, section 2.3.1 | |
outfile.write(b"\x00\x03") # XFL = tradeoff compression ratio; OS = unix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tarfile | |
# Monkey patch for <https://bugs.python.org/issue18819> | |
# Set `tarinfo=HackedTarInfo` in `tarfile.open` if using an old CPython | |
class HackedTarInfo(tarfile.TarInfo): | |
@staticmethod | |
def _create_header(info, format, encoding, errors): | |
buf = tarfile.TarInfo._create_header(info, format, encoding, errors) | |
modifications = [ | |
(0x148, b"\x00" * 16), | |
] | |
for (start, splice) in modifications: | |
buf = buf[:start] + splice + buf[start + len(splice) :] | |
chksum = tarfile.calc_chksums(buf[-tarfile.BLOCKSIZE :])[0] | |
buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:] | |
return buf | |
with tarfile.open("XXXXXXXX.tar", mode="x", format=tarfile.GNU_FORMAT) as outfile: | |
with open("dump.sql.gz", "rb") as infile: | |
info = outfile.gettarinfo("dump.sql.gz", fileobj=infile) | |
info.uid = 0o1753 | |
info.gid = 0o0041 | |
info.mode = 0o644 | |
info.uname = "discourse" | |
info.gname = "www-data" | |
info.mtime = int("XXXXXXXXXX") # fixme: install actual mtime | |
print(info.type) | |
print(info.ischr()) | |
print(info.isblk()) | |
print(info.isreg()) | |
with open("dump.sql.gz", "rb") as infile: | |
outfile.addfile(info, fileobj=infile) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gzip --fast -k XXXXXXXX.tar | |
printf '\xAA\xBB\xCC\xDD' | dd of=XXXXXXXX.tar.gz conv=notrunc bs=1 seek=4 | |
# ^ replace bytes with actual mtime (Unix format) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment