Skip to content

Instantly share code, notes, and snippets.

@pgolding
Last active May 2, 2018 21:02
Show Gist options
  • Save pgolding/709fc889bd85daa5be88fbf6d2d422da to your computer and use it in GitHub Desktop.
Save pgolding/709fc889bd85daa5be88fbf6d2d422da to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"deletable": true,
"editable": true
},
"cell_type": "markdown",
"source": "# Check Image Meta Data (e.g. from EXIF)\n\nThis script pulls the EXIF data from a JPG image. For now, we skip PDF/PNG images although they do contain metadata.\n\nRequires:\n\n>`$ pip install exifread`\n\nSet `mount` to point to the mountpoint.\n\nSet `path_to_checks` to point at the sub-folder with the checks\n\nThe final path is the join of these two - i.e. `mount/path_to_checks`\n\nWe filter mostly for useful EXIF data fields (`cols`). But to grab all EXIF data set `limit_to_cols=False` when calling the `stream_exif` routine."
},
{
"metadata": {
"trusted": true,
"deletable": true,
"collapsed": false,
"editable": true
},
"cell_type": "code",
"source": "import os\nimport json\nimport sys\nimport exifread\nimport pandas\n\n# Control the checkpoint size - number of images to process and save at a time\ncheck_point = 100\n\n# EXIF tags that we want to extract and use as cols in our csv output\ncols = [\"EXIF ISOSpeedRatings\",\"EXIF Flash\",\"Image YResolution\",\"EXIF FNumber\",\"Image Orientation\",\"Image Model\",\n \"Image XResolution\",\"Image XResolution\",\"EXIF BrightnessValue\",\"EXIF ExifImageWidth\",\"EXIF ExifImageLength\",\n \"EXIF LensModel\", \"EXIF SubjectArea\", \"Image Make\"]\n\n# Exif data streamed to here:\ndf = pandas.DataFrame()\n\ndef save(data,csv_file):\n # Now create a frame for post-processing and/or CSV export\n df = pandas.DataFrame(data)\n df.set_index('file', inplace=True)\n df.to_csv(csv_file)\n\n# Set limit_to_cols to False if you want to collect ALL the EXIF fields (will slow things down)\ndef stream_exif(path,limit_to_cols=True,csv_file='Exif_data_checks.csv'):\n #path = os.getcwd() #current directory, if running script from checks folder\n print(\"Looking for jpg images in {}\".format(path))\n exif_stream = []\n try:\n extensions = [\"jpg\",\"jpeg\"]\n file_count = 0\n for subdir, dirs, files in os.walk(folder):\n filelist = filter(lambda f: f.split('.')[-1].lower() in extensions, files)\n filelist = sorted(filelist)\n if len(list(filelist)) == 0:\n print(\"No jpg images in folder {}\".format(subdir))\n else:\n print(\"Found {} jpg images in folder {}\".format(len(list(filelist)),subdir))\n for file in files:\n if file.split('.')[-1].lower() in extensions: \n with open(os.path.join(subdir,file), 'rb') as f:\n tags = exifread.process_file(f, details=False)\n if limit_to_cols:\n vals = {tag: field for (tag, field) in tags.items() if tag in cols}\n else:\n vals = {tag: field for (tag, field) in tags.items()}\n vals[\"file\"] = file\n exif_stream.append(vals)\n file_count += 1\n if file_count % check_point == 0:\n save(exif_stream,csv_file)\n if len(exif_stream) > 0:\n save(exif_stream,csv_file) \n print(\"Checks processed: {}\".format(file_count))\n except OSError as err:\n print(\"OS error: {0}\".format(err))\n except KeyError as err:\n print(\"KeyError: {} - check that the folder is correct\".format(err))\n except Exception as err:\n print(err)\n\n# change this to the folder mount point\nmount = os.getcwd()\n# change this to the checks folder\npath_to_checks = 'sample_checks_prosper'\nfolder = os.path.join(mount,path_to_checks)\n# iterate over the files and stream the exif data\nstream_exif(folder,csv_file='all_checks.csv')\n",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"language_info": {
"pygments_lexer": "ipython3",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"nbconvert_exporter": "python",
"name": "python",
"version": "3.5.4",
"file_extension": ".py",
"mimetype": "text/x-python"
},
"kernelspec": {
"name": "conda-env-ocr-py",
"display_name": "Python [conda env:ocr]",
"language": "python"
},
"gist_id": "709fc889bd85daa5be88fbf6d2d422da"
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment