Skip to content

Instantly share code, notes, and snippets.

@KR1470R
Last active August 31, 2023 08:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save KR1470R/1729c935ea8206c08e1be17abd48737d to your computer and use it in GitHub Desktop.
Save KR1470R/1729c935ea8206c08e1be17abd48737d to your computer and use it in GitHub Desktop.
CV Type file corrector
###########################################################################################
#################################### Requirements #####################################
# Python version 3.10 and newest(didn't tested on older versions)
# pip install python-magic
#################################### How does it work #####################################
# Traverses through files in specified directory, checking the metadata of each file:
# - if file type is not pdf/docx/doc/txt - REMOVES it,
# - if extension file different from the type in metadata(pdf/docx/doc/txt) - RENAMES it.
#################################### How to Run #####################################
# 1. save the script to your system.
# 2. python cv-files-corrector.py /path/to/folder/with/cv
# 3. await to "Done!" message.
###########################################################################################
import magic
import os
import sys
args = sys.argv
if len(args) <= 1:
print("Directory with resumes should be specified!")
sys.exit(1)
dir_path = sys.argv[1]
def should_be_removed(file_mime_type, path):
removed = False
for removable_type in types_to_remove:
if removable_type in file_mime_type:
os.remove(path)
removed = True
return removed
return removed
def get_file_type(path):
mime = magic.Magic(mime=True)
file_mime_type = mime.from_file(path)
if "pdf" in file_mime_type:
return "pdf"
elif "msword" in file_mime_type:
return "doc"
elif "vnd.openxmlformats-officedocument.wordprocessingml.document" in file_mime_type:
return "docx"
elif "text/plain" in file_mime_type:
return "txt"
else:
print(f"Removing unsupportable file {path} with type {file_mime_type}")
return None
files = os.listdir(dir_path)
for file in files:
filecreds = os.path.basename(file).split(".")
if len(filecreds) == 1:
print(f"Removing unsupported file {file}")
os.remove(f"{dir_path}/{file}")
continue
filename = filecreds[0]
filename_type = filecreds[1]
full_path = f"{dir_path}/{file}"
file_type = get_file_type(full_path)
if file_type is not None and file_type != filename_type:
new_file_path = f"{dir_path}/{filename}.{file_type}"
os.rename(full_path, new_file_path)
print(f"Renamed {full_path} to {new_file_path}")
print("Done!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment