Skip to content

Instantly share code, notes, and snippets.

@minhoryang
Created March 21, 2017 13:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save minhoryang/3b91c393563abbac3f9c4268e0a71eaf to your computer and use it in GitHub Desktop.
Save minhoryang/3b91c393563abbac3f9c4268e0a71eaf to your computer and use it in GitHub Desktop.
import csv
import os
import glob
from hashfs import HashFS
def getTime(filename):
time = os.stat(filename)
return (time.st_atime, time.st_mtime)
def recoveryTime(orig, newer):
os.utime(newer, getTime(orig))
def genRecord(orig, newer):
origTime = getTime(orig) if orig else None
return {
'Name': os.path.basename(orig) if orig else None,
'isDuplicated': newer.is_duplicate if newer else None,
'Hash': newer.id if newer else None,
'st_atime': origTime[0] if origTime else None,
'st_mtime': origTime[1] if origTime else None,
}
def openDB(db):
db = open(db, 'a')
db_cursor = csv.DictWriter(db, fieldnames=genRecord(None, None))
if not db.tell():
db_cursor.writeheader()
return (db, db_cursor)
def __main__(directory, prefix='hashed/'):
if not os.path.isdir(prefix):
os.mkdir(prefix)
base_dir = os.path.join(prefix, directory)
fs = HashFS(base_dir)
db, db_cursor = openDB(base_dir + '.csv')
for target_file in glob.glob(os.path.join(directory, '*')): # FIXME: is sorted?
stored_file = fs.put(target_file) # TODO: HashFS Monkey-patching for gzip compression?
recoveryTime(target_file, stored_file.abspath)
db_cursor.writerow(
genRecord(target_file, stored_file)
)
os.remove(target_file) # TODO: optional?
db.close()
if __name__ == '__main__':
import sys
for argv in sys.argv[1:]:
__main__(argv)
import csv
import os
import shutil
from hashfs import HashFS
def getTime(target_file):
return (float(target_file['st_atime']), float(target_file['st_mtime']))
def recoveryTime(orig, newer):
os.utime(newer, getTime(orig), follow_symlinks=False)
def openDB(db):
db = open(db, 'r')
return (db, csv.DictReader(db))
def getRecords(db_cursor):
records = {
# Hash: [Record, ...],
}
for stored_record in db_cursor:
Hash = stored_record.pop('Hash')
if Hash not in records:
records[Hash] = [stored_record]
else:
records[Hash].append(stored_record)
return records
def __main__(directory, prefix='hashed/'):
base_dir = os.path.join(prefix, directory)
if not os.path.isdir(prefix) or not os.path.isdir(base_dir) or not os.path.isfile(base_dir + '.csv'):
raise Exception('Not Hashed Yet')
if not os.path.isdir(directory):
os.mkdir(directory)
fs = HashFS(prefix + directory)
db, db_cursor = openDB(prefix + directory + '.csv')
for hash, records in getRecords(db_cursor).items():
stored_file = fs.get(hash)
for target_file in records:
target_filename = os.path.join(directory, target_file['Name'])
if not os.path.isfile(target_filename):
os.symlink(stored_file.abspath, target_filename)
# TODO: optional 1. all copy? (original)
# 2. 1 copy & rest symlink it?
# 3. all symlink?
recoveryTime(target_file, target_filename) # TODO: optionally from stored_file?
# TODO: optional break?
db.close()
if __name__ == '__main__':
import sys
for argv in sys.argv[1:]:
__main__(argv)
@minhoryang
Copy link
Author

TODO: hashfs_gist.py -> hashfs_gist_fs.py -> hashfs_gist.py feature

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment