Skip to content

Instantly share code, notes, and snippets.

@nbingham1
Created January 4, 2017 20:30
Show Gist options
  • Save nbingham1/ef46278f056f5785f5c0687156be5506 to your computer and use it in GitHub Desktop.
Save nbingham1/ef46278f056f5785f5c0687156be5506 to your computer and use it in GitHub Desktop.
Re-inject exif metadata into photos in a facebook archive
#!/usr/bin/python
from HTMLParser import HTMLParser
from PIL import Image
import piexif
import datetime
import sys
from fractions import Fraction
def rational(s):
f = Fraction(s).limit_denominator(10000000)
return (f.numerator, f.denominator)
def dms(s):
dd = float(s)
is_positive = dd >= 0
dd = abs(dd)
minutes,seconds = divmod(dd*3600,60)
degrees,minutes = divmod(minutes,60)
degrees = degrees if is_positive else 360 - degrees
return (rational(degrees),
rational(minutes),
rational(seconds))
def exif_date(s):
return datetime.datetime.fromtimestamp(
int(s)).strftime('%Y:%m:%d %H:%M:%S')
class ArchiveParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.contents = False
self.div = 0
self.path = ''
self.key = ''
self.isKey = False
self.isValue = False
self.exif_data = dict()
def handle_starttag(self, tag, attrs):
attr = dict(attrs)
if self.contents:
if 'class' in attr and attr['class'] == 'block':
self.div = 0
self.path = ''
if tag == 'div':
self.div += 1
if self.div > 0:
if tag == 'img':
if 'src' in attr:
self.path = attr['src']
elif tag == 'th':
self.isKey = True
elif tag == 'td':
self.isValue = True
else:
if 'class' in attr and attr['class'] == 'contents':
self.contents = True
def handle_endtag(self, tag):
if self.contents:
if tag == 'div':
self.div -= 1
if self.div == -1:
self.contents = False
if self.div == 0:
# write exif data here
if self.path:
img = Image.open(self.path)
if 'exif' in img.info:
exif_dict = piexif.load(img.info["exif"])
else:
exif_dict = {'0th': dict(), 'Exif': dict(), 'GPS': dict()}
for k,v in self.exif_data.iteritems():
if k == 'Taken':
exif_dict['Exif'][piexif.ExifIFD.DateTimeOriginal] = exif_date(v)
elif k == 'Camera Make':
exif_dict['0th'][piexif.ImageIFD.Make] = v
elif k == 'Camera Model':
exif_dict['0th'][piexif.ImageIFD.Model] = v
elif k == 'Orientation':
exif_dict['0th'][piexif.ImageIFD.Orientation] = int(v)
elif k == 'Exposure':
exif_dict['Exif'][piexif.ExifIFD.ExposureTime] = rational(v)
elif k == 'F-Stop':
exif_dict['Exif'][piexif.ExifIFD.FNumber] = rational(v)
elif k == 'ISO Speed':
exif_dict['Exif'][piexif.ExifIFD.ISOSpeed] = int(v)
elif k == 'Focal Length':
exif_dict['Exif'][piexif.ExifIFD.FocalLength] = rational(v)
elif k == 'Latitude':
exif_dict['GPS'][piexif.GPSIFD.GPSLatitude] = dms(v)
elif k == 'Longitude':
exif_dict['GPS'][piexif.GPSIFD.GPSLongitude] = dms(v)
elif k == 'Modified':
exif_dict['0th'][piexif.ImageIFD.DateTime] = exif_date(v)
else:
print 'Unhandled Key: ' + k + ': ' + v
exif_bytes = piexif.dump(exif_dict)
img.save(self.path, exif=exif_bytes)
self.path = ''
self.key = ''
self.isKey = False
self.isValue = False
self.exif_data = dict()
elif tag == 'th':
self.isKey = False
elif tag == 'td':
self.isValue = False
def handle_data(self, data):
if self.isKey:
self.key = data
elif self.isValue and self.key:
self.exif_data[self.key] = data
# Parse a facebook 'index.htm' file and re-inject the exif data
if len(sys.argv) > 1:
parser = ArchiveParser()
with open(sys.argv[1], 'r') as fp:
content = fp.read()
parser.feed(content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment