Last active
April 30, 2021 21:19
-
-
Save patbeagan1/ee4b609aac4d332a9eddb7f19a6676ca to your computer and use it in GitHub Desktop.
Generates a set of chunks for a given file. The chunks are named in such a way that each chunk is able to find the hash of the next one. This means that the file is circular - given any chunk, the rest of the file will be attainable
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!python3 | |
from hashlib import sha256 | |
import sys | |
import os | |
import random | |
import difflib | |
prefix = "out/" | |
suffix = ".plasmid" | |
origin = b'0' | |
prev = origin | |
num_bytes = 48 | |
def hashme(it): | |
return sha256(it).hexdigest() | |
def to_bytes(it): | |
return it.encode("utf-8") | |
class Writer: | |
def process_data(self, filename, piece): | |
global prev | |
out_file = prefix + prev.decode("utf-8") + suffix | |
lhash = hashme(to_bytes(filename) + piece).encode("utf-8") | |
prev = lhash | |
with open(out_file, "wb") as out: | |
out.write(piece + lhash) | |
# print("...") | |
# print(prev) | |
# print(piece + lhash) | |
# print("...") | |
def write_plasmid(self, filename): | |
with open(filename, 'rb') as f: | |
os.makedirs(prefix, exist_ok=True) | |
while True: | |
piece = f.read(num_bytes) | |
if len(piece) < num_bytes: | |
self.process_data(filename, piece) | |
os.rename(prefix + origin.decode() + suffix, | |
prefix + prev.decode() + suffix) | |
break | |
self.process_data(filename, piece) | |
class Reader: | |
def __init__(self): | |
self.content_bottom = b'' | |
self.content_top = b'' | |
self.is_content_bottom = True | |
self.visited = set() | |
def read_plasmid(self, filename_outer): | |
def recurse_files(filename): | |
# print(f"{filename} + {self.visited}") | |
if filename in self.visited: | |
return | |
try: | |
# print(filename) | |
filesize = os.path.getsize(filename) | |
with open(filename, "rb") as f: | |
filecontent = f.read() | |
content = filecontent[0:-64] | |
lhash = filecontent[-64:] | |
# print(to_bytes("f")+filecontent) | |
# print(to_bytes("c")+content) | |
# print(to_bytes("l") + lhash) | |
if self.is_content_bottom: | |
self.content_bottom += content | |
if filesize < (num_bytes + 64): | |
self.is_content_bottom = False | |
else: | |
self.content_top += content | |
filename_new = lhash.decode() + suffix | |
self.visited.add(filename) | |
recurse_files(filename_new) | |
except FileNotFoundError: | |
print("Pieces were missing!") | |
recurse_files(filename_outer) | |
return self.content_top + self.content_bottom | |
def parse(command, filename): | |
if command == 'r': | |
print(filename) | |
content = Reader().read_plasmid(filename) | |
print(content.decode()) | |
return content | |
elif command == 'w': | |
Writer().write_plasmid(filename) | |
elif command == 'c': | |
content = Reader().read_plasmid(filename) | |
with open(sys.argv[3]) as f: | |
fromlines = content.decode() | |
tolines = f.read() | |
print(fromlines == tolines) | |
s = difflib.HtmlDiff().make_file(fromlines=fromlines, tolines= tolines) | |
with open("diff.html", "w") as d: | |
d.write(s) | |
elif command == 'rw': | |
parse('w', filename) | |
content = parse('r', filename) | |
with open(filename) as f: | |
print(content.decode() == f.read()) | |
else: | |
raise NotImplementedError() | |
if __name__ == "__main__": | |
command = sys.argv[1] | |
filename = (sys.argv[2]) | |
parse(command, filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Todo - put the hash of the next file in the original, so that we can check to make sure it has not been modified.