Instantly share code, notes, and snippets.

@AndiH /README.md
Last active Aug 29, 2015

Embed
What would you like to do?
Graph generation for a bunch of pictures

Here are Python scripts to generate and analyze and print data of a bunch of pictures. I made them for our scout campsite, at which we generated 1700 pictures with five photographers in the summer of 2014.

You find a write-up in German here: http://static.andreasherten.de/2014/09/06/campsite-pictures.html

The scripts are a bit messy. I'm sorry for that.

Files

  • picStats.py gathers information and saves them into a JSON file
  • createPictureGraphs.py reads in this information, does some resorting and stuff, and uses pyplot (with prettyplotlib) to display them
  • compareYears.py looks at three different JSON files of three different years and display some graph.
import json
import numpy as np
import prettyplotlib as ppl
import matplotlib.pyplot as plt
import datetime
from collections import defaultdict
import operator
def saveFigures(figure, filename):
figure.savefig(filename + '.png', bbox_inches='tight')
figure.savefig(filename + '.pdf', bbox_inches='tight')
figure.savefig(filename + '.svg', bbox_inches='tight')
def parseDate(datestring):
return datetime.datetime.strptime(datestring, "%Y-%m-%d %H:%M:%S")
with open('data-2014.json', 'r') as f:
data2014 = json.load(f)
with open('data-2012-01.json', 'r') as f:
data201201 = json.load(f)
with open('data-2012-02.json', 'r') as f:
data201202 = json.load(f)
data2012 = dict(data201201.items() + data201202.items())
with open('data-2010.json', 'r') as f:
data2010 = json.load(f)
def extractDates(data, dates):
times = [parseDate(data[f][0]) for f in data]
informationDict = defaultdict(int)
for time in times:
informationDict[dates[time.day]] += 1
return informationDict
dates2014 = {1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16}
points2014 = extractDates(data2014, dates2014)
points2014[17] = 0
print points2014
dates2012 = {3: 1, 4: 2, 5: 3, 6: 4, 7: 5, 8: 6, 9: 7, 10: 8, 11: 9, 12: 10, 13: 11, 14: 12, 15: 13, 16: 14, 17: 15, 18: 16, 19: 17}
points2012 = extractDates(data2012, dates2012)
points2012[0] = 0
print points2012
dates2010 = {7: 1, 8: 2, 9: 3, 10: 4, 11: 5, 12: 6, 13: 7, 14: 8, 15: 9, 16: 10, 17: 11, 18: 12, 19: 13, 20: 14, 21: 15, 22: 16}
points2010 = extractDates(data2010, dates2010)
points2010[0] = 0
points2010[17] = 0
print points2010
normalizedDays = np.arange(0, 18)
fig2 = plt.figure(figsize=(14, 8))
ax2 = fig2.add_subplot(111)
w = 0.3
ppl.bar(ax2, normalizedDays - w, points2014.values(), width=w, label="2014", color=ppl.colors.set2[0])
ppl.bar(ax2, normalizedDays, points2012.values(), width=w, label="2012", color=ppl.colors.set2[1])
ppl.bar(ax2, normalizedDays + w, points2010.values(), width=w, label="2010", color=ppl.colors.set2[2])
ax2.set_xlim(0 - w, 18 + w)
ax2.set_xticks(normalizedDays)
ax2.set_xticklabels(["Tag " + str(i) for i in range(0, 18)])
ax2.set_ylabel("Anzahl Fotos pro Tag")
ax2.set_xlabel("Tag des Sommerlagers")
ppl.legend(ax2, loc='upper right')
saveFigures(fig2, '2014-2012-2012-bilder_pro_tag')
fig3 = plt.figure(figsize=(14, 8))
ax3 = fig3.add_subplot(111)
w = 0.3
ppl.bar(ax3, normalizedDays - w, [float(datapoint)/sum(points2014.values()) * 100 for datapoint in points2014.values()], width=w, label="2014", color=ppl.colors.set2[0])
ppl.bar(ax3, normalizedDays, [float(datapoint)/sum(points2012.values()) * 100 for datapoint in points2012.values()], width=w, label="2012", color=ppl.colors.set2[1])
ppl.bar(ax3, normalizedDays + w, [float(datapoint)/sum(points2010.values()) * 100 for datapoint in points2010.values()], width=w, label="2010", color=ppl.colors.set2[2])
ax3.set_xlim(0 - w, 18 + w)
ax3.set_xticks(normalizedDays)
ax3.set_xticklabels(["Tag " + str(i) for i in range(0, 18)])
ax3.set_ylabel("Fotos pro Tag in Prozent vom jeweiligen Sommerlager")
ax3.set_xlabel("Tag des Sommerlagers")
ppl.legend(ax3, loc='upper right')
saveFigures(fig3, '2014-2012-2012-bilder_pro_tag-normalisiert')
# plt.show()
import json
import numpy as np
import prettyplotlib as ppl
import matplotlib.pyplot as plt
import datetime
from collections import defaultdict
import operator
def saveFigures(figure, filename):
figure.savefig(filename + '.png', bbox_inches='tight')
figure.savefig(filename + '.pdf', bbox_inches='tight')
figure.savefig(filename + '.svg', bbox_inches='tight')
def parseDate(datestring):
return datetime.datetime.strptime(datestring, "%Y-%m-%d %H:%M:%S")
with open('data-2014.json', 'r') as f:
data = json.load(f)
filenames = [f for f in data]
times = [datetime.datetime.strptime(data[f][0], "%Y-%m-%d %H:%M:%S") for f in data]
creators = []
for f in data:
for entry in data[f][1]:
if 'von' in str(entry):
creators.append(str(entry))
#########################
# Number Pictures per Day
#########################
fig = plt.figure(figsize=(14, 8))
ax = fig.add_subplot(111)
ppl.hist(ax, np.asarray([date.day for date in times]), bins=17, color=ppl.colors.set2[0])
ax.set_xticks(np.arange(1, 18))
ax.set_xticklabels(["Tag " + str(i) for i in range(0, 17)])
ax.set_ylabel("Anzahl Fotos pro Tag")
ax.set_xlabel("Tag des Sommerlagers")
ax.set_xlim(1, 17)
saveFigures(fig, '2014-anzahl_fotos_pro_tag')
#########################
# Number Pictures per Day per Photographer
#########################
date_andreas, date_tobias, date_susanne, date_peter, date_matthias, = [], [], [], [], []
for f in data:
for rawentry in data[f][1]:
entry = str(rawentry)
currentDatetime = parseDate(data[f][0])
if 'von Andreas' in entry:
date_andreas.append(currentDatetime)
elif 'von Tobias' in entry:
date_tobias.append(currentDatetime)
elif 'von Susanne' in entry:
date_susanne.append(currentDatetime)
elif 'von Peter' in entry:
date_peter.append(currentDatetime)
elif 'von Matthias' in entry:
date_matthias.append(currentDatetime)
def extractDays(a, b, c, d, e):
newa = [date.day for date in a]
newb = [date.day for date in b]
newc = [date.day for date in c]
newd = [date.day for date in d]
newe = [date.day for date in e]
return [np.asarray(newa), np.asarray(newb), np.asarray(newc), np.asarray(newd), np.asarray(newe)]
convertedDays = np.asarray(extractDays(date_andreas, date_tobias, date_susanne, date_peter, date_matthias))
fig2 = plt.figure(figsize=(14, 8))
ax2 = fig2.add_subplot(111)
ppl.hist(ax2, [convertedDays[0], convertedDays[1], convertedDays[2], convertedDays[3], convertedDays[4]], bins=17, stacked=True, label=['von Andreas', 'von Tobias', 'von Susanne', 'von Peter', 'von Matthias'], color=ppl.colors.set2[0:5])
legend = ax2.legend(loc='upper right')
ax2.set_xticks(np.arange(1, 18))
ax2.set_xticklabels(["Tag " + str(i) for i in range(0, 17)])
ax2.set_ylabel("Anzahl Fotos pro Tag")
ax2.set_xlabel("Tag des Sommerlagers")
ax2.set_xlim(1, 17)
saveFigures(fig2, '2014-anzahl_fotos_pro_tag_pro_fotograf')
#########################
# Number Pictures per Hour of Day
#########################
def toSeconds(d):
return 60 * 60 * d.hour + 60 * d.minute + d.second
timesInSeconds = np.asarray([toSeconds(day) for day in times])
fig3 = plt.figure(figsize=(14, 8))
ax3 = fig3.add_subplot(111)
ppl.hist(ax3, timesInSeconds, bins=23, range=[0, 23 * 60 * 60], color=ppl.colors.set2[0])
ax3.set_xticks(np.arange(0, 24 * 60 * 60, 60 * 60))
ax3.set_xticklabels([str(i) + " Uhr" for i in range(0,24)])
ax3.set_ylabel("Anzahl Fotos pro Uhrzeit")
ax3.set_xlabel("Uhrzeit")
ax3.set_xlim(0, 24 * 60 * 60)
fig3.autofmt_xdate()
saveFigures(fig3, '2014-anzahl_fotos_pro_uhrzeit')
# plt.show()
#########################
# Number Pictures per Hour of Day per Photographer
#########################
def extractSeconds(a, b, c, d, e):
newa = [toSeconds(date) for date in a]
newb = [toSeconds(date) for date in b]
newc = [toSeconds(date) for date in c]
newd = [toSeconds(date) for date in d]
newe = [toSeconds(date) for date in e]
return [np.asarray(newa), np.asarray(newb), np.asarray(newc), np.asarray(newd), np.asarray(newe)]
convertedSeconds = np.asarray(extractSeconds(date_andreas, date_tobias, date_susanne, date_peter, date_matthias))
fig4 = plt.figure(figsize=(14, 8))
ax4 = fig4.add_subplot(111)
ppl.hist(ax4, [convertedSeconds[0], convertedSeconds[1], convertedSeconds[2], convertedSeconds[3], convertedSeconds[4]], bins=23, range=[0, 23 * 60 * 60], stacked=True, label=['von Andreas', 'von Tobias', 'von Susanne', 'von Peter', 'von Matthias'], color=ppl.colors.set2[0:5])
legend = ax4.legend(loc='upper right')
ax4.set_xticks(np.arange(0, 24 * 60 * 60, 60 * 60))
ax4.set_xticklabels([str(i) + " Uhr" for i in range(0,24)])
ax4.set_ylabel("Anzahl Fotos pro Uhrzeit")
ax4.set_xlabel("Uhrzeit")
ax4.set_xlim(0, 24 * 60 * 60)
fig4.autofmt_xdate()
saveFigures(fig4, '2014-anzahl_fotos_pro_uhrzeit_pro_fotograf')
#########################
# Number of Face being Found
#########################
allNames = defaultdict(list)
namesAndPhotographers = defaultdict(list)
for entry in data:
nameTags = data[entry][2]
if nameTags is not None:
for name in nameTags if not isinstance(nameTags, basestring) else [nameTags]:
# print name, entry, parseDate(data[entry][0])
allNames[name].append(parseDate(data[entry][0]))
photographer = None
for rawentry in data[entry][1]:
# print rawentry
if 'von' in str(rawentry):
photographer = str(rawentry)
namesAndPhotographers[name].append(photographer)
heights = dict()
for key in allNames.keys():
heights[key] = len(allNames[key])
sortedHeights = sorted(heights.iteritems(), key=operator.itemgetter(1), reverse=True)
numberOfBars = len(sortedHeights)
fig5 = plt.figure(figsize=(14, 8))
# fig5.subplots_adjust(bottom=0.28)
ax5 = fig5.add_subplot(111)
# fig5.tight_layout()
ppl.bar(ax5, np.arange(numberOfBars), [nameanzahl[1] for nameanzahl in sortedHeights], color=ppl.colors.set2[0])
ax5.set_xticks(np.arange(0.2, numberOfBars + 0.2))
ax5.set_xticklabels([nameanzahl[0].split()[0] for nameanzahl in sortedHeights], rotation=90)
ax5.set_xlim(0,numberOfBars)
ax5.set_ylabel("Anzahl der automatisch gefundenen Gesichter")
ax5.set_xlabel("Personennamen")
saveFigures(fig5, '2014-anzahl_personen_gefunden')
# plt.autoscale()
# plt.show()
#########################
# Number of Face being Found per Photographer, Top 7
#########################
def extractSecondsForPersons(a, b, c, d, e, f, g):
newa = [toSeconds(date) for date in a]
newb = [toSeconds(date) for date in b]
newc = [toSeconds(date) for date in c]
newd = [toSeconds(date) for date in d]
newe = [toSeconds(date) for date in e]
newf = [toSeconds(date) for date in f]
newg = [toSeconds(date) for date in g]
return [np.asarray(newa), np.asarray(newb), np.asarray(newc), np.asarray(newd), np.asarray(newe), np.asarray(newf), np.asarray(newg)]
shownPersons = ['Peter Dick', 'Mark Hermann', 'Jonas Kox', 'Benedikt Dassen', 'Daniel Samer', 'Bastian Soiron', 'Luise Kessler']
convertedSecondsForPersons = np.asarray(extractSecondsForPersons(allNames[shownPersons[0]], allNames[shownPersons[1]], allNames[shownPersons[2]], allNames[shownPersons[3]], allNames[shownPersons[4]], allNames[shownPersons[5]], allNames[shownPersons[6]]))
fig6 = plt.figure(figsize=(14, 8))
ax6 = fig6.add_subplot(111)
ppl.hist(ax6, [convertedSecondsForPersons[0], convertedSecondsForPersons[1], convertedSecondsForPersons[2], convertedSecondsForPersons[3], convertedSecondsForPersons[4], convertedSecondsForPersons[5], convertedSecondsForPersons[6]], bins=23, range=[0, 23 * 60 * 60], stacked=True, label=[entry.split()[0] for entry in shownPersons], color=ppl.colors.set2[0:7])
legend = ax6.legend(loc='upper left')
ax6.set_xticks(np.arange(0, 24 * 60 * 60, 60 * 60))
ax6.set_xticklabels([str(i) + " Uhr" for i in range(0,24)])
ax6.set_ylabel("Anzahl der automatisch gefundenen Gesichter pro Uhrzeit")
ax6.set_xlabel("Uhrzeit")
ax6.set_xlim(0, 24 * 60 * 60)
fig6.autofmt_xdate()
saveFigures(fig6, '2014-anzahl_personen_gefunden_top7_pro_uhrzeit')
plt.show()
import pyexif
from os import listdir
from os.path import isfile, join
import json
pictureFiles = [join("#merged/", f) for f in listdir("#merged") if isfile(join("#merged/", f)) and f.endswith("jpg") or f.endswith("JPG")]
allTheInfo = dict()
for pictureFile in pictureFiles:
fileName = pictureFile
photo = pyexif.ExifEditor(pictureFile)
keywords = photo.getKeywords()
datetime = photo.getOriginalDateTime()
persons = photo.getTag("RegionName")
allTheInfo[fileName] = [datetime.strftime("%Y-%m-%d %H:%M:%S"), keywords, persons]
with open('data.json', 'wb') as f:
json.dump(allTheInfo, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment