Created
January 4, 2017 20:30
-
-
Save nbingham1/ef46278f056f5785f5c0687156be5506 to your computer and use it in GitHub Desktop.
Re-inject exif metadata into photos in a facebook archive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from HTMLParser import HTMLParser | |
from PIL import Image | |
import piexif | |
import datetime | |
import sys | |
from fractions import Fraction | |
def rational(s): | |
f = Fraction(s).limit_denominator(10000000) | |
return (f.numerator, f.denominator) | |
def dms(s): | |
dd = float(s) | |
is_positive = dd >= 0 | |
dd = abs(dd) | |
minutes,seconds = divmod(dd*3600,60) | |
degrees,minutes = divmod(minutes,60) | |
degrees = degrees if is_positive else 360 - degrees | |
return (rational(degrees), | |
rational(minutes), | |
rational(seconds)) | |
def exif_date(s): | |
return datetime.datetime.fromtimestamp( | |
int(s)).strftime('%Y:%m:%d %H:%M:%S') | |
class ArchiveParser(HTMLParser): | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.contents = False | |
self.div = 0 | |
self.path = '' | |
self.key = '' | |
self.isKey = False | |
self.isValue = False | |
self.exif_data = dict() | |
def handle_starttag(self, tag, attrs): | |
attr = dict(attrs) | |
if self.contents: | |
if 'class' in attr and attr['class'] == 'block': | |
self.div = 0 | |
self.path = '' | |
if tag == 'div': | |
self.div += 1 | |
if self.div > 0: | |
if tag == 'img': | |
if 'src' in attr: | |
self.path = attr['src'] | |
elif tag == 'th': | |
self.isKey = True | |
elif tag == 'td': | |
self.isValue = True | |
else: | |
if 'class' in attr and attr['class'] == 'contents': | |
self.contents = True | |
def handle_endtag(self, tag): | |
if self.contents: | |
if tag == 'div': | |
self.div -= 1 | |
if self.div == -1: | |
self.contents = False | |
if self.div == 0: | |
# write exif data here | |
if self.path: | |
img = Image.open(self.path) | |
if 'exif' in img.info: | |
exif_dict = piexif.load(img.info["exif"]) | |
else: | |
exif_dict = {'0th': dict(), 'Exif': dict(), 'GPS': dict()} | |
for k,v in self.exif_data.iteritems(): | |
if k == 'Taken': | |
exif_dict['Exif'][piexif.ExifIFD.DateTimeOriginal] = exif_date(v) | |
elif k == 'Camera Make': | |
exif_dict['0th'][piexif.ImageIFD.Make] = v | |
elif k == 'Camera Model': | |
exif_dict['0th'][piexif.ImageIFD.Model] = v | |
elif k == 'Orientation': | |
exif_dict['0th'][piexif.ImageIFD.Orientation] = int(v) | |
elif k == 'Exposure': | |
exif_dict['Exif'][piexif.ExifIFD.ExposureTime] = rational(v) | |
elif k == 'F-Stop': | |
exif_dict['Exif'][piexif.ExifIFD.FNumber] = rational(v) | |
elif k == 'ISO Speed': | |
exif_dict['Exif'][piexif.ExifIFD.ISOSpeed] = int(v) | |
elif k == 'Focal Length': | |
exif_dict['Exif'][piexif.ExifIFD.FocalLength] = rational(v) | |
elif k == 'Latitude': | |
exif_dict['GPS'][piexif.GPSIFD.GPSLatitude] = dms(v) | |
elif k == 'Longitude': | |
exif_dict['GPS'][piexif.GPSIFD.GPSLongitude] = dms(v) | |
elif k == 'Modified': | |
exif_dict['0th'][piexif.ImageIFD.DateTime] = exif_date(v) | |
else: | |
print 'Unhandled Key: ' + k + ': ' + v | |
exif_bytes = piexif.dump(exif_dict) | |
img.save(self.path, exif=exif_bytes) | |
self.path = '' | |
self.key = '' | |
self.isKey = False | |
self.isValue = False | |
self.exif_data = dict() | |
elif tag == 'th': | |
self.isKey = False | |
elif tag == 'td': | |
self.isValue = False | |
def handle_data(self, data): | |
if self.isKey: | |
self.key = data | |
elif self.isValue and self.key: | |
self.exif_data[self.key] = data | |
# Parse a facebook 'index.htm' file and re-inject the exif data | |
if len(sys.argv) > 1: | |
parser = ArchiveParser() | |
with open(sys.argv[1], 'r') as fp: | |
content = fp.read() | |
parser.feed(content) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment