Skip to content

Instantly share code, notes, and snippets.

@giuseppe998e
Created July 16, 2022 14:03
Show Gist options
  • Save giuseppe998e/9671be639311a5251e493064dada707e to your computer and use it in GitHub Desktop.
Save giuseppe998e/9671be639311a5251e493064dada707e to your computer and use it in GitHub Desktop.
A Python3 script that asynchronously renames files (media) in a directory
#!/usr/bin/env python3
# Copyright 2022 Giuseppe Eletto <giuseppe@eletto.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# REQUIRES: pip install aiofiles exif
from asyncio import run as asyncrun
from hashlib import md5
from aiofiles import os as asyncos
from aiofiles import open as asyncopen
from exif import Image
from pathlib import Path
from random import randint
import re
# Settings
MEDIA_DIR = "."
# Constants
REX_STR = r"(((?:1[0-9]|20)\d{2})[-_.:]?([012][0-9]|3[01])[-_.:]?([012][0-9]|3[01]))(?:[-_\s\w]+?(([01][0-9]|2[0-3])[-_.:]?([0-5][0-9])[-_.:]?([0-5][0-9])))?"
# Functions
def get_regex_filename(string):
if match := re.search(REX_STR, string):
if match.group(1) is not None:
year, month, day = match.group(2, 3, 4)
if match.group(5) is not None:
hour, minute, second = match.group(6, 7, 8)
else:
hour, minute, second = ("00", randint(0, 59), randint(0, 59))
minute, second = ("{:02d}".format(minute), "{:02d}".format(second))
return f"media_{year}{month}{day}_{hour}{minute}{second}"
return None
def get_exif_filename(exif):
date_time_exif = exif.get("datetime_original")
if date_time_exif is None:
date_time_exif = exif.get("datetime_digitized")
if date_time_exif is None:
date_time_exif = exif.get("datetime")
return get_regex_filename(date_time_exif)
# Async Functions
async def calc_md5(path):
chuck_size = 8 * 1024 # KBytes
async with asyncopen(path, "rb") as fd:
first_chunk = await fd.read(chuck_size)
md5_hash = md5(first_chunk)
while chunk := await fd.read(chuck_size):
md5_hash.update(chunk)
return md5_hash
return None # Never called, `asyncopen` should raise an exception.
async def rename_using_metadata(path, exif):
new_name = get_exif_filename(exif)
await asyncos.rename(path, f"{path.parent}/{new_name}{path.suffix}")
async def rename_using_filename(path, bytes):
new_name = get_regex_filename(path.stem)
if new_name is None:
md5_hash = await calc_md5(path) if bytes is None else md5(bytes)
new_name = f"media_{md5_hash.hexdigest()}"
await asyncos.rename(path, f"{path.parent}/{new_name}{path.suffix}")
async def main():
dir_path = Path(MEDIA_DIR)
for path in dir_path.glob('**/*'):
if path.is_dir(): continue
if path.match("*.jp*g") or path.match("*.png"):
try:
async with asyncopen(path, mode="rb") as img:
bytes = await img.read()
try:
exif = Image(bytes)
await rename_using_metadata(path, exif)
except:
await rename_using_filename(path, bytes)
except Exception as e:
print(e)
finally:
continue
await rename_using_filename(path, None)
# App entry point
if __name__ == "__main__":
asyncrun(main())
@giuseppe998e
Copy link
Author

giuseppe998e commented Jul 16, 2022

How it works

For "JPEG" and "PNG"

  1. Tries to read the exif data: datetime_original -> datetime_digitized -> datetime
  2. Uses regex to retrieve year, month, day and, if present, also hour, minute and second. Otherwise a random minute and second after midnight of that day is used.
  3. Renames the name into media_{year}{month}{day}_{hour}{minute}{second}.ext

N.B. If no exif data are present, the "generic" approach visible below is used.

For files other than "JPEG" or "PNG"

  1. Reads the current filename
  2. Uses a regex to retrieve year, month, day and, if present, also hour, minute and second.

NB If this approach does not work, generates the MD5 hash of the file and renames it to media_{md5}.ext.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment