Skip to content

Instantly share code, notes, and snippets.

@jpf
Created June 23, 2019 23:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpf/9cdc33632d83b3ce7e0361a1d732a55b to your computer and use it in GitHub Desktop.
Save jpf/9cdc33632d83b3ce7e0361a1d732a55b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import datetime
import os
import shelve
import requests
class CustomVisionResult:
def __init__(self, payload):
self._id = payload['id']
self._project = payload['project']
self._iteration = payload['iteration']
self._created = payload['created']
self.probabilities = {}
for prediction in payload['predictions']:
self.probabilities[prediction['tagName']] = prediction['probability']
def __getattr__(self, key):
return self.probabilities[key]
class CustomVision:
def __init__(self, prediction_key=None, prediction_endpoint=None, project_id=None, iteration=None, cache={}):
self.base_url = prediction_endpoint + project_id
self.iteration = iteration
self.session = requests.Session()
self.session.headers.update({'Prediction-Key': prediction_key})
def classify(self, thing):
result = None
if False:
url = '{}/classify/iterations/{}/image'.format(self.base_url, self.iteration)
result = self.session.post(url, files={'file', thing})
elif True:
url = '{}/classify/iterations/{}/url'.format(self.base_url, self.iteration)
if thing in cache:
result = cache[thing]
else:
print('Classifying: {}'.format(thing))
result = self.session.post(url, json={'Url': thing}).json()
if result:
cache[thing] = result
return CustomVisionResult(result)
thumbnail_dir = 'thumbnails'
if not os.path.exists(thumbnail_dir):
os.mkdir(thumbnail_dir)
proxies = {'http': 'http://localhost:3030'}
session = requests.Session()
session.proxies = proxies
start_url = 'http://chroniclingamerica.loc.gov/lccn/sn84026749.json'
def loc_url_to_thumbnail(url):
url = url.replace('.json', '')
url = url + '/image_92x120_from_0,0_to_5698,6998.jpg'
return url
def loc_url_to_filename(url):
return url.replace('.json', '').replace('http://', '').replace('/', '-') + '.jpeg'
def save_as_filename(url, filename):
if not os.path.isfile(filename):
print('Saving: {}'.format(filename))
r = session.get(url)
with open(filename, 'wb') as file:
file.write(r.content)
iteration = 'Iteration6'
# TODO: Move this to CustomVision class
cache_file = 'customvision-{}.cache'.format(iteration)
cache = shelve.open(cache_file, writeback=True)
cs = CustomVision(prediction_key='<YOUR PREDICTION KEY HERE>',
prediction_endpoint='https://westus2.api.cognitive.microsoft.com/customvision/v3.0/Prediction/',
project_id='<YOUR PROJECT ID HERE>',
iteration=iteration,
cache=cache)
start_date = '1916-04-23'
newspaper = session.get(start_url).json()
for issue in newspaper['issues']:
if issue['date_issued'] < start_date:
continue
issue_date = datetime.datetime.strptime(issue['date_issued'], '%Y-%m-%d')
issue_day = issue_date.strftime('%A')
if issue_day != 'Sunday':
continue
pages = session.get(issue['url']).json()['pages']
for page in pages:
url = page['url']
suggested_filename = loc_url_to_filename(url)
filename = thumbnail_dir + '/' + suggested_filename
thumbnail = loc_url_to_thumbnail(url)
prediction = None
try:
prediction = cs.classify(thumbnail)
except Exception as e:
print(prediction)
print(str(e))
continue
# Note: This assumes that the tag you're looking for is named "krazy"
if prediction.krazy < 0.7:
continue
save_as_filename(thumbnail, filename)
cache.sync()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment