Skip to content

Instantly share code, notes, and snippets.

@don1138
Last active November 7, 2023 01:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save don1138/7dcf338f27614c01bf0f6bcc3bbbc74e to your computer and use it in GitHub Desktop.
Save don1138/7dcf338f27614c01bf0f6bcc3bbbc74e to your computer and use it in GitHub Desktop.
Extract metadata from image files
# Extract Metadata v1.3.0
# This script reads the images in a directory and all subdirectories, copies the EXIF metadata, and saves it to a text file with the same name as the source image.
import os
import shlex
from PIL import Image, ExifTags
# Ask the user for the parent directory path and strip trailing whitespace
PARENT_DIRECTORY_input = input("Enter the parent directory path: ").strip()
# Use shlex to sanitize the path
PARENT_DIRECTORY_components = shlex.split(PARENT_DIRECTORY_input)
PARENT_DIRECTORY = ' '.join(PARENT_DIRECTORY_components)
# Define the metadata tags to exclude
EXCLUDE_TAGS = ["ExifOffset"]
# Initialize counters for written and skipped files
files_written = 0
files_skipped = 0
def extract_metadata(image_path):
try:
image = Image.open(image_path)
exif_data = image._getexif()
if exif_data:
metadata = {}
for tag, value in exif_data.items():
tag_name = ExifTags.TAGS.get(tag, tag)
metadata[tag_name] = value
return metadata
else:
return {}
except Exception as e:
print(f"Error extracting metadata from {image_path}: {e}")
return {}
def save_metadata_to_txt(image_path, metadata):
global files_written, files_skipped # Access global counters
if not metadata:
print(f"× No data found in {image_path} -- skipped")
files_skipped += 1 # Increment the skipped files counter
return # Skip writing if no metadata found
base_name = os.path.splitext(image_path)[0]
txt_path = base_name + ".txt"
# Check if the file already exists
count = 1
while os.path.exists(txt_path):
txt_path = f"{base_name}-{count}.txt"
count += 1
with open(txt_path, "w", encoding="utf-8") as txt_file:
for key, value in metadata.items():
if key not in EXCLUDE_TAGS: # Exclude specified tags
if isinstance(value, bytes):
try:
value = value.decode("utf-8")
except UnicodeDecodeError:
value = value.decode("latin-1") # Try latin-1 if utf-8 fails
value = value.replace("\x00", "") # Remove null characters
txt_file.write(f"{key}: {value}\n")
print(f"Metadata saved to {txt_path}")
files_written += 1 # Increment the written files counter
# Read the file content and replace "UserComment: UNICODE" with an empty string
with open(txt_path, "r", encoding="utf-8") as txt_file:
content = txt_file.read()
content = content.replace("UserComment: UNICODE", "")
# Write back the modified content to the file
with open(txt_path, "w", encoding="utf-8") as txt_file:
txt_file.write(content)
def main():
global files_written, files_skipped # Access global counters
for subdir, _, files in os.walk(PARENT_DIRECTORY):
for file in files:
if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
image_path = os.path.join(subdir, file)
if not file.startswith('.') and not file.startswith('._'):
metadata = extract_metadata(image_path)
save_metadata_to_txt(image_path, metadata)
# Print out the counts of written and skipped files
print(f"\nFiles written: {files_written}")
print(f"Files skipped: {files_skipped}\n")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment