Skip to content

Instantly share code, notes, and snippets.

@patbeagan1
Last active April 30, 2021 21:19
Show Gist options
  • Save patbeagan1/ee4b609aac4d332a9eddb7f19a6676ca to your computer and use it in GitHub Desktop.
Save patbeagan1/ee4b609aac4d332a9eddb7f19a6676ca to your computer and use it in GitHub Desktop.
Generates a set of chunks for a given file. The chunks are named in such a way that each chunk is able to find the hash of the next one. This means that the file is circular - given any chunk, the rest of the file will be attainable
#!python3
from hashlib import sha256
import sys
import os
import random
import difflib
prefix = "out/"
suffix = ".plasmid"
origin = b'0'
prev = origin
num_bytes = 48
def hashme(it):
return sha256(it).hexdigest()
def to_bytes(it):
return it.encode("utf-8")
class Writer:
def process_data(self, filename, piece):
global prev
out_file = prefix + prev.decode("utf-8") + suffix
lhash = hashme(to_bytes(filename) + piece).encode("utf-8")
prev = lhash
with open(out_file, "wb") as out:
out.write(piece + lhash)
# print("...")
# print(prev)
# print(piece + lhash)
# print("...")
def write_plasmid(self, filename):
with open(filename, 'rb') as f:
os.makedirs(prefix, exist_ok=True)
while True:
piece = f.read(num_bytes)
if len(piece) < num_bytes:
self.process_data(filename, piece)
os.rename(prefix + origin.decode() + suffix,
prefix + prev.decode() + suffix)
break
self.process_data(filename, piece)
class Reader:
def __init__(self):
self.content_bottom = b''
self.content_top = b''
self.is_content_bottom = True
self.visited = set()
def read_plasmid(self, filename_outer):
def recurse_files(filename):
# print(f"{filename} + {self.visited}")
if filename in self.visited:
return
try:
# print(filename)
filesize = os.path.getsize(filename)
with open(filename, "rb") as f:
filecontent = f.read()
content = filecontent[0:-64]
lhash = filecontent[-64:]
# print(to_bytes("f")+filecontent)
# print(to_bytes("c")+content)
# print(to_bytes("l") + lhash)
if self.is_content_bottom:
self.content_bottom += content
if filesize < (num_bytes + 64):
self.is_content_bottom = False
else:
self.content_top += content
filename_new = lhash.decode() + suffix
self.visited.add(filename)
recurse_files(filename_new)
except FileNotFoundError:
print("Pieces were missing!")
recurse_files(filename_outer)
return self.content_top + self.content_bottom
def parse(command, filename):
if command == 'r':
print(filename)
content = Reader().read_plasmid(filename)
print(content.decode())
return content
elif command == 'w':
Writer().write_plasmid(filename)
elif command == 'c':
content = Reader().read_plasmid(filename)
with open(sys.argv[3]) as f:
fromlines = content.decode()
tolines = f.read()
print(fromlines == tolines)
s = difflib.HtmlDiff().make_file(fromlines=fromlines, tolines= tolines)
with open("diff.html", "w") as d:
d.write(s)
elif command == 'rw':
parse('w', filename)
content = parse('r', filename)
with open(filename) as f:
print(content.decode() == f.read())
else:
raise NotImplementedError()
if __name__ == "__main__":
command = sys.argv[1]
filename = (sys.argv[2])
parse(command, filename)
@patbeagan1
Copy link
Author

Todo - put the hash of the next file in the original, so that we can check to make sure it has not been modified.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment