Skip to content

Instantly share code, notes, and snippets.

@hjwp
Created November 6, 2020 12:00
Show Gist options
  • Save hjwp/da89d0672854245bf2ae914909232fe9 to your computer and use it in GitHub Desktop.
Save hjwp/da89d0672854245bf2ae914909232fe9 to your computer and use it in GitHub Desktop.
import hashlib
import os
import shutil
from pathlib import Path
def hash_file(path):
hasher = hashlib.sha1()
with path.open("rb") as file:
while buf:= file.read(hasher.block_size):
hasher.update(buf)
return hasher.hexdigest()
def sync(source, dest):
# Walk the source folder and build a dict of filenames and their hashes
source_hashes = {}
for folder, _, files in os.walk(source):
for fn in files:
source_hashes[hash_file(Path(folder) / fn)] = fn
seen = set() # Keep track of the files we've found in the target
# Walk the target folder and get the filenames and hashes
for folder, _, files in os.walk(dest):
for fn in files:
dest_path = Path(folder) / fn
dest_hash = hash_file(dest_path)
seen.add(dest_hash)
# if there's a file in target that's not in source, delete it
if dest_hash not in source_hashes:
dest_path.remove()
# if there's a file in target that has a different path in source,
# move it to the correct path
elif dest_hash in source_hashes and fn != source_hashes[dest_hash]:
shutil.move(dest_path, Path(folder) / source_hashes[dest_hash])
# for every file that appears in source but not target, copy the file to
# the target
for src_hash, fn in source_hashes.items():
if src_hash not in seen:
shutil.copy(Path(source) / fn, Path(dest) / fn)
import tempfile
from pathlib import Path
import shutil
from sync import sync
# integration tests
def test_when_a_file_exists_in_the_source_but_not_the_destination():
try:
source = tempfile.mkdtemp()
dest = tempfile.mkdtemp()
content = "I am a very useful file"
(Path(source) / 'my-file').write_text(content)
sync(source, dest)
expected_path = Path(dest) / 'my-file'
assert expected_path.exists()
assert expected_path.read_text() == content
finally:
shutil.rmtree(source)
shutil.rmtree(dest)
def test_when_a_file_has_been_renamed_in_the_source():
try:
source = tempfile.mkdtemp()
dest = tempfile.mkdtemp()
content = "I am a file that was renamed"
source_path = Path(source) / 'source-filename'
old_dest_path = Path(dest) / 'dest-filename'
expected_dest_path = Path(dest) / 'source-filename'
source_path.write_text(content)
old_dest_path.write_text(content)
sync(source, dest)
assert old_dest_path.exists() is False
assert expected_dest_path.read_text() == content
finally:
shutil.rmtree(source)
shutil.rmtree(dest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment