sueszli/git-lfs-exploit.py

## git-lfs-exploit.py
import hashlib
import sys
import pathlib
import subprocess


"""
github commits are restricted to 25-50 MiB, varying based on the push method [^1].
to handle files beyond this limit, git lfs (large file storage) pointers are necessary, referencing an external lfs server [^2].
however, this method incurs a monthly cloud storage fee to github [^3].

this is a failed attempt at bypassing the file size limit by committing a large file in small chunks:

> remote: warning: File huge-ass-file.tar is 60.00 MB; this is larger than GitHub's recommended maximum file size of 50.00 MB
> remote: error: Trace: 2fa983a46f7b5205ea9bbef6e118069f7426f07618935e67ed6225df9647d617
> remote: error: See https://gh.io/lfs for more information.
> ...
> remote: error: File huge-ass-file.tar is 150.00 MB; this exceeds GitHub's file size limit of 100.00 MB
> remote: error: File huge-ass-file.tar is 200.00 MB; this exceeds GitHub's file size limit of 100.00 MB

[^1]: docs: https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-large-files-on-github#file-size-limits
[^2]: nice comment: wokwokwok, 2021 on hackernews, https://news.ycombinator.com/item?id=27134972#:~:text=of%20such%20projects-,wokwokwok,-on%20May%2013
[^3]: https://docs.github.com/en/billing/managing-billing-for-git-large-file-storage/about-billing-for-git-large-file-storage
"""


def assert_matching_checksums(filepath1: pathlib.Path, filepath2: pathlib.Path) -> None:
    print(f"verifying checksums...")
    checksum1 = hashlib.md5(pathlib.Path(filepath1).read_bytes()).hexdigest()
    checksum2 = hashlib.md5(pathlib.Path(filepath2).read_bytes()).hexdigest()
    assert checksum1 == checksum2, f"checksums do not match: {checksum1} != {checksum2}"
    print(f"checksums match: {checksum1} == {checksum2}")


def assert_matching_filesizes(filepath1: pathlib.Path, filepath2: pathlib.Path) -> None:
    print(f"verifying file sizes...")
    filesize1 = filepath1.stat().st_size
    filesize2 = filepath2.stat().st_size
    assert filesize1 == filesize2, f"file sizes do not match: {filesize1} != {filesize2}"
    print(f"file sizes match: {filesize1} == {filesize2}")


if __name__ == "__main__":
    file = pathlib.Path(sys.argv[1])

    assert pathlib.Path(".git").exists(), "put this script inside the git directory you want to copy the file to"
    assert file.exists(), f"file does not exist: {file}"
    assert file.is_file(), f"not a file: {file}"
    assert not any([sibling.name == ".git" for sibling in list(file.parent.glob("*"))]), f"{file} should not be in a .git directory"
    filesize = file.stat().st_size
    print(f"{file.name} size: {file.stat().st_size}")

    print(f"copying and committing chunks to github...")
    chunk_size = 30 * 1024 * 1024
    num_chunks = (file.stat().st_size // chunk_size) + 1

    with open(file.name, "wb") as f:
        pass

    for i in range(num_chunks):
        with open(file, "rb") as f:
            # read
            f.seek(i * chunk_size)
            chunk = f.read(chunk_size)
            if not chunk:
                print(f"no more chunks to read at iteration {i}")
                break

            # append to file in this directory
            with open(file.name, "ab") as g:
                g.write(chunk)

            # push to github
            subprocess.run(["git", "add", file.name])
            subprocess.run(["git", "commit", "-m", f"git lfs exploit auto commit: {file.name} - {i}/{num_chunks}"])
            subprocess.run(["git", "push"])
            print(f"\033[92mprogress: {i}/{num_chunks} \033[0m")

    assert_matching_checksums(file, pathlib.Path(file.name))
    assert_matching_filesizes(file, pathlib.Path(file.name))
    print(f"finished! {file.name} pushed to github")
	import hashlib
	import sys
	import pathlib
	import subprocess


	"""
	github commits are restricted to 25-50 MiB, varying based on the push method [^1].
	to handle files beyond this limit, git lfs (large file storage) pointers are necessary, referencing an external lfs server [^2].
	however, this method incurs a monthly cloud storage fee to github [^3].

	this is a failed attempt at bypassing the file size limit by committing a large file in small chunks:

	> remote: warning: File huge-ass-file.tar is 60.00 MB; this is larger than GitHub's recommended maximum file size of 50.00 MB
	> remote: error: Trace: 2fa983a46f7b5205ea9bbef6e118069f7426f07618935e67ed6225df9647d617
	> remote: error: See https://gh.io/lfs for more information.
	> ...
	> remote: error: File huge-ass-file.tar is 150.00 MB; this exceeds GitHub's file size limit of 100.00 MB
	> remote: error: File huge-ass-file.tar is 200.00 MB; this exceeds GitHub's file size limit of 100.00 MB

	[^1]: docs: https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-large-files-on-github#file-size-limits
	[^2]: nice comment: wokwokwok, 2021 on hackernews, https://news.ycombinator.com/item?id=27134972#:~:text=of%20such%20projects-,wokwokwok,-on%20May%2013
	[^3]: https://docs.github.com/en/billing/managing-billing-for-git-large-file-storage/about-billing-for-git-large-file-storage
	"""


	def assert_matching_checksums(filepath1: pathlib.Path, filepath2: pathlib.Path) -> None:
	print(f"verifying checksums...")
	checksum1 = hashlib.md5(pathlib.Path(filepath1).read_bytes()).hexdigest()
	checksum2 = hashlib.md5(pathlib.Path(filepath2).read_bytes()).hexdigest()
	assert checksum1 == checksum2, f"checksums do not match: {checksum1} != {checksum2}"
	print(f"checksums match: {checksum1} == {checksum2}")


	def assert_matching_filesizes(filepath1: pathlib.Path, filepath2: pathlib.Path) -> None:
	print(f"verifying file sizes...")
	filesize1 = filepath1.stat().st_size
	filesize2 = filepath2.stat().st_size
	assert filesize1 == filesize2, f"file sizes do not match: {filesize1} != {filesize2}"
	print(f"file sizes match: {filesize1} == {filesize2}")


	if __name__ == "__main__":
	file = pathlib.Path(sys.argv[1])

	assert pathlib.Path(".git").exists(), "put this script inside the git directory you want to copy the file to"
	assert file.exists(), f"file does not exist: {file}"
	assert file.is_file(), f"not a file: {file}"
	assert not any([sibling.name == ".git" for sibling in list(file.parent.glob("*"))]), f"{file} should not be in a .git directory"
	filesize = file.stat().st_size
	print(f"{file.name} size: {file.stat().st_size}")

	print(f"copying and committing chunks to github...")
	chunk_size = 30 * 1024 * 1024
	num_chunks = (file.stat().st_size // chunk_size) + 1

	with open(file.name, "wb") as f:
	pass

	for i in range(num_chunks):
	with open(file, "rb") as f:
	# read
	f.seek(i * chunk_size)
	chunk = f.read(chunk_size)
	if not chunk:
	print(f"no more chunks to read at iteration {i}")
	break

	# append to file in this directory
	with open(file.name, "ab") as g:
	g.write(chunk)

	# push to github
	subprocess.run(["git", "add", file.name])
	subprocess.run(["git", "commit", "-m", f"git lfs exploit auto commit: {file.name} - {i}/{num_chunks}"])
	subprocess.run(["git", "push"])
	print(f"\033[92mprogress: {i}/{num_chunks} \033[0m")

	assert_matching_checksums(file, pathlib.Path(file.name))
	assert_matching_filesizes(file, pathlib.Path(file.name))
	print(f"finished! {file.name} pushed to github")