Created
December 8, 2020 17:11
-
-
Save Steven24K/931480f4f2d6f81a0fae9c49337ac6c9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import os | |
from os import listdir | |
from os.path import isfile, join | |
from glob import glob | |
from pathlib import Path | |
import shutil | |
import datetime | |
import functools | |
# Just a pair to hold the name and file size | |
class MyImage: | |
def __init__(self, filename, fileSize, path): | |
self.FileName = filename | |
self.fileSize = fileSize | |
self.AbsoultePath = path | |
INPUT_DIRECTORY = './input' | |
OUTPUT_DIRECTORY = './output' | |
CSV_file = 'file_managed.csv' | |
use_file_size= True | |
matched_files = [] | |
files_not_found = [] | |
print('Started reading files from {} :: input directory'.format(INPUT_DIRECTORY)) | |
# Get all files from directory | |
#input_files = [f for f in listdir(INPUT_DIRECTORY) if isfile(join(INPUT_DIRECTORY, f))] | |
input_files = [] | |
for root,d_names,f_names in os.walk(INPUT_DIRECTORY): | |
for f in f_names: | |
real_path = os.path.join(root, f) | |
input_files.append(MyImage(f, Path(real_path).stat().st_size, real_path)) | |
print(input_files) | |
print('INPUT FILES IN ' + INPUT_DIRECTORY + ' :' + str(len(input_files))) | |
# Check all if the files in the CSV | |
csvFile = open(CSV_file, newline='') | |
filereader = csv.reader(csvFile, delimiter=',', quotechar='"') | |
for row in filereader: | |
filename = row[4] | |
if filename in map(lambda x: x.FileName, input_files): | |
index = list(map(lambda x: x.FileName, input_files)).index(filename) | |
matched_files.append((row, input_files[index])) | |
else: | |
files_not_found.append(row) | |
# Check if file size matches (RANGE) | |
if use_file_size: | |
for row in matched_files: | |
filesize = row[0][7] | |
if not int(filesize) in map(lambda x: x.fileSize, input_files): | |
matched_files.remove(row) | |
files_not_found.append(row) | |
#Copy files to output dir with respect to relative directory | |
for row in matched_files: | |
filename = row[0][4] | |
dir_structure = row[0][5].replace('public://', '').replace(filename, '') | |
path = os.path.join(OUTPUT_DIRECTORY, dir_structure) | |
try: | |
os.makedirs(path, exist_ok=False) | |
except: | |
print("DIRECTORY {} allready exists".format(OUTPUT_DIRECTORY)) | |
destination = OUTPUT_DIRECTORY + '/' + dir_structure + filename | |
source = row[1].AbsoultePath #INPUT_DIRECTORY + '/' + filename #TODO: Add absolute path | |
if not os.path.exists(destination): | |
shutil.copy(source, destination) | |
else: | |
print(destination + ' does allready exists') | |
os.makedirs('logs', exist_ok=True) | |
logs = open('logs/log_{}_unmatched_files.txt'.format(datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S')), 'w') | |
log_template = '{},"{}","{}","{}","{}","{}","{}","{}","{}","{}"' | |
# logs.write('"fid","uuid","langcode","uid","filename","uri","filemime","filesize","status","created","changed"') | |
try: | |
logs.write(str(list(map(lambda x: x[5], files_not_found)))) | |
except Exception as e: | |
print('Error: {}'.format(e)) | |
print(""" | |
SUMMARY | |
Found files {} | |
Not found files {} | |
Total files {} | |
""".format(str(len(matched_files)), str(len(files_not_found)), str(len(matched_files) + len(files_not_found)))) | |
print('Done.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment