Last active
March 4, 2019 07:39
-
-
Save iliakonnov/63cfa5b2d3abd387c8442819b7e8aba1 to your computer and use it in GitHub Desktop.
Simple and fast python copier with continue feature and progressbar.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sys import argv | |
from hashlib import md5 | |
import time | |
import shutil | |
import os | |
import sqlite3 | |
import progressbar # https://pypi.org/project/progressbar2/3.18.1/ | |
def download(fromDir, toDir, copyId): | |
save_period = 1024 * 1024 * 100 # 100 Mb | |
length = 4 * 1024 * 1024 # 4 MiB chunk | |
result = True | |
dbExists = os.path.isfile(copyId + '.db') | |
conn = sqlite3.connect(copyId + '.db') | |
c = conn.cursor() | |
if dbExists: | |
files = [i[0] for i in c.execute("SELECT path FROM files WHERE status != 'C'")] | |
copiedCount = len([i[0] for i in c.execute("SELECT path FROM files WHERE status == 'C'")]) | |
else: | |
print('Searching for files...') | |
files = [] | |
lastPrint = time.time() | |
for dp, dn, filenames in os.walk(fromDir): | |
for f in filenames: | |
f = fromDir.join(os.path.join(dp, f).split(fromDir)[1:]).lstrip(os.sep) | |
files.append(f) | |
if time.time() - lastPrint > 2: | |
print(len(files), f) | |
lastPrint = time.time() | |
print('Filling database with {} files'.format(len(files))) | |
c.execute('CREATE TABLE files (path TEXT UNIQUE, status CHAR(1), hash TEXT)') | |
c.executemany("INSERT INTO files VALUES (?, 'N', NULL)", [(i,) for i in files]) | |
c.execute('CREATE UNIQUE INDEX path_idx ON files(path)') | |
conn.commit() | |
copiedCount = 0 | |
conn.execute('PRAGMA synchronous=OFF') | |
# conn.execute('PRAGMA journal_mode=OFF') # Faster, but unsafe | |
copied_bytes = 0 | |
copied_cache = [] | |
filesCount = len(files) | |
totalFiles = filesCount + copiedCount | |
for i in files: | |
fromPath = os.path.join(fromDir, i) | |
toPath = os.path.join(toDir, i) | |
speed = -1 | |
try: | |
os.makedirs(os.path.split(toPath)[0], exist_ok=True) | |
exists = os.path.exists(toPath) | |
if exists: | |
print('Overwriting {fr} -> {to}...'.format(fr=fromPath, to=toPath)) | |
else: | |
print('Copying {fr} -> {to}...'.format(fr=fromPath, to=toPath)) | |
if os.path.isdir(fromPath): | |
print('Skipping dir!') | |
continue | |
copiedSize = 0 | |
# https://hg.python.org/cpython/file/3.6/Lib/shutil.py#l120 | |
match = exists | |
copied = md5() | |
with open(fromPath, 'rb') as fsrc: | |
if exists: | |
orig = md5() | |
with open(toPath, 'rb') as fdst: | |
readedSize = 0 | |
with progressbar.ProgressBar(max_value=progressbar.UnknownLength) as bar: | |
start = time.time() | |
bar.update(readedSize) | |
while match: | |
buf = fsrc.read(length) | |
orig.update(buf) | |
buf2 = fdst.read(length) | |
copied.update(buf2) | |
if not buf and not buf2: | |
break | |
if orig.digest() != copied.digest(): | |
match = False | |
readedSize += length | |
readTime = time.time() - start | |
if readTime == 0: | |
readSpeed = float('inf') | |
else: | |
readSpeed = (readedSize / 1024 / 1024) / readTime | |
else: | |
readSpeed = float('-inf') | |
if match: | |
print('Files matches!') | |
speed = float('-inf') | |
else: | |
orig = md5() | |
with open(toPath, 'wb') as fdst: | |
# https://hg.python.org/cpython/file/3.6/Lib/shutil.py#l76 | |
with progressbar.ProgressBar(max_value=progressbar.UnknownLength) as bar: | |
start = time.time() | |
bar.update(copiedSize) | |
fsrc.seek(0) | |
while not match: | |
buf = fsrc.read(length) | |
if not buf: | |
break | |
orig.update(buf) | |
fdst.write(buf) | |
copiedSize += length | |
bar.update(copiedSize) | |
copyTime = time.time() - start | |
if copyTime == 0: | |
speed = float('inf') | |
else: | |
speed = (copiedSize / 1024 / 1024) / copyTime | |
if not match: | |
copied = md5() | |
hashProgress = 0 | |
with open(toPath, 'rb') as fdst: | |
with progressbar.ProgressBar(max_value=copiedSize) as bar: | |
start = time.time() | |
bar.update(hashProgress) | |
while 1: | |
buf = fdst.read(length) | |
if not buf: | |
break | |
copied.update(buf) | |
hashProgress += length | |
bar.update(hashProgress) | |
copyTime = time.time() - start | |
if copyTime == 0: | |
hashSpeed = float('inf') | |
else: | |
hashSpeed = (hashProgress / 1024 / 1024) / copyTime | |
if orig.digest() != copied.digest(): | |
raise Exception("Hash mismatch!") | |
else: | |
hashSpeed = float('-inf') | |
shutil.copystat(fromPath, toPath) | |
except Exception as e: | |
result = False | |
c.execute("UPDATE files SET status='E' WHERE path=?", (i,)) | |
conn.commit() | |
print(e) | |
print('ERROR: {e}\n'.format(e=str(e))) | |
# raise | |
else: | |
copied_bytes += copiedSize | |
copied_cache.append((orig.hexdigest(), i)) | |
copiedCount += 1 | |
filesCount -= 1 | |
if copied_bytes > save_period: | |
copied_bytes = 0 | |
c.executemany("UPDATE files SET status='C', hash=? WHERE path=?", copied_cache) | |
conn.commit() | |
copied_cache = [] | |
print('Done. {s:.3f} Mb/s write; {hs:.3f} Mb/s hash check; {rs:.3f} Mb/s read; ({c}/{t}: {f})\n'.format( | |
s=speed, | |
hs=hashSpeed, | |
rs=readSpeed, | |
c=copiedCount, | |
t=totalFiles, | |
f=filesCount | |
)) | |
c.executemany("UPDATE files SET status='C', hash=? WHERE path=?", copied_cache) | |
print('All copied!') | |
conn.commit() | |
conn.close() | |
return result | |
if __name__ == "__main__": | |
if len(argv) == 4: | |
result = download(argv[1], argv[2], argv[3]) | |
while not result: | |
result = download(argv[1], argv[2], argv[3]) | |
print('No errors!') | |
else: | |
print('Use: download.py fromDir toDir copyId') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment