Skip to content

Instantly share code, notes, and snippets.

@sueszli
Last active April 19, 2024 13:44
Show Gist options
  • Save sueszli/c8bd7ec5d821e281be9cabcf2fa51fef to your computer and use it in GitHub Desktop.
Save sueszli/c8bd7ec5d821e281be9cabcf2fa51fef to your computer and use it in GitHub Desktop.
bypassing github storage service
import hashlib
import sys
import pathlib
import subprocess
"""
github commits are restricted to 25-50 MiB, varying based on the push method [^1].
to handle files beyond this limit, git lfs (large file storage) pointers are necessary, referencing an external lfs server [^2].
however, this method incurs a monthly cloud storage fee to github [^3].
this is a failed attempt at bypassing the file size limit by committing a large file in small chunks:
> remote: warning: File huge-ass-file.tar is 60.00 MB; this is larger than GitHub's recommended maximum file size of 50.00 MB
> remote: error: Trace: 2fa983a46f7b5205ea9bbef6e118069f7426f07618935e67ed6225df9647d617
> remote: error: See https://gh.io/lfs for more information.
> ...
> remote: error: File huge-ass-file.tar is 150.00 MB; this exceeds GitHub's file size limit of 100.00 MB
> remote: error: File huge-ass-file.tar is 200.00 MB; this exceeds GitHub's file size limit of 100.00 MB
[^1]: docs: https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-large-files-on-github#file-size-limits
[^2]: nice comment: wokwokwok, 2021 on hackernews, https://news.ycombinator.com/item?id=27134972#:~:text=of%20such%20projects-,wokwokwok,-on%20May%2013
[^3]: https://docs.github.com/en/billing/managing-billing-for-git-large-file-storage/about-billing-for-git-large-file-storage
"""
def assert_matching_checksums(filepath1: pathlib.Path, filepath2: pathlib.Path) -> None:
print(f"verifying checksums...")
checksum1 = hashlib.md5(pathlib.Path(filepath1).read_bytes()).hexdigest()
checksum2 = hashlib.md5(pathlib.Path(filepath2).read_bytes()).hexdigest()
assert checksum1 == checksum2, f"checksums do not match: {checksum1} != {checksum2}"
print(f"checksums match: {checksum1} == {checksum2}")
def assert_matching_filesizes(filepath1: pathlib.Path, filepath2: pathlib.Path) -> None:
print(f"verifying file sizes...")
filesize1 = filepath1.stat().st_size
filesize2 = filepath2.stat().st_size
assert filesize1 == filesize2, f"file sizes do not match: {filesize1} != {filesize2}"
print(f"file sizes match: {filesize1} == {filesize2}")
if __name__ == "__main__":
file = pathlib.Path(sys.argv[1])
assert pathlib.Path(".git").exists(), "put this script inside the git directory you want to copy the file to"
assert file.exists(), f"file does not exist: {file}"
assert file.is_file(), f"not a file: {file}"
assert not any([sibling.name == ".git" for sibling in list(file.parent.glob("*"))]), f"{file} should not be in a .git directory"
filesize = file.stat().st_size
print(f"{file.name} size: {file.stat().st_size}")
print(f"copying and committing chunks to github...")
chunk_size = 30 * 1024 * 1024
num_chunks = (file.stat().st_size // chunk_size) + 1
with open(file.name, "wb") as f:
pass
for i in range(num_chunks):
with open(file, "rb") as f:
# read
f.seek(i * chunk_size)
chunk = f.read(chunk_size)
if not chunk:
print(f"no more chunks to read at iteration {i}")
break
# append to file in this directory
with open(file.name, "ab") as g:
g.write(chunk)
# push to github
subprocess.run(["git", "add", file.name])
subprocess.run(["git", "commit", "-m", f"git lfs exploit auto commit: {file.name} - {i}/{num_chunks}"])
subprocess.run(["git", "push"])
print(f"\033[92mprogress: {i}/{num_chunks} \033[0m")
assert_matching_checksums(file, pathlib.Path(file.name))
assert_matching_filesizes(file, pathlib.Path(file.name))
print(f"finished! {file.name} pushed to github")
@sueszli
Copy link
Author

sueszli commented Apr 19, 2024

update: the script above worked just fine and it was surprisingly easy to set up

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment