Skip to content

Instantly share code, notes, and snippets.

@keithweaver
Last active May 17, 2017 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save keithweaver/bf460c5ab749fa829ac00ac9c0a6ec92 to your computer and use it in GitHub Desktop.
Save keithweaver/bf460c5ab749fa829ac00ac9c0a6ec92 to your computer and use it in GitHub Desktop.
Upload a whole folder of images to the Haar Cascade Market via API
# This file will find keywords in the file name to assign tags for the
# Cascade Market. Add keywords to the image file name in the following
# format "..._keyword_keyword2.jpg". This is explained more below.
#
#
# Tips:
# - Keep it consistent. I made the mistake of labeling a mix of longboard and
# longboarder to represent the same thing. As a result, when I train I have
# to make sure the other is not in the negative set or it will affect my
# cascade.
# - Typos - Look at your file names with fresh eyes
# - Don't use my exact naming convention. I should have used _ split keywords
# and - is a space. So a file could be light-blue-car_green-car which would
# have made things easier.
# - Tag as much as possible - worst case scenario you do not train based on that
# tag. It gives you better results and better understanding to whats in the
# images.
# Imports for getting the list of files.
from os import listdir
from os.path import isfile, join
# Imports for getting file info
import os
# Server Call Related
import json
import requests
# This is your Cascade Market API Key
API_KEY = 'YOUR_API_KEY';
# This file name
THIS_PYTHON_FILE_NAME = 'upload-files.py';
# If you are uploading from current directory, you do not want to upload this
# file. There is an images check but in addition.
# List of possible image extensions
IMAGE_EXTENSIONS = ['.jpg','.png'];
# The current location of the images. I just placed this script in the images
# folder for simplicity.
DIRECTORY_WITH_IMAGES = './';
# I want these tags on all images when uploading. I added '05-17-2017' which
# is the current date so I can link back to this upload. If you upload often,
# you can add date and time.
KEYWORDS_FOR_ALL = ['driving-cascade','rover-images','05-17-2017'];
# I followed the notation of labeling all my images in a folder to be file#_
# then followed by the things in the image. A final example would be like
# "file12_stop_sign_black_car_parked_car.jpg"
# These keywords are used to find those keywords so KEYWORDS is a list of all
# possible options.
KEYWORDS = ['stop_sign','black_car','person','parked_car','longboarder','longboard','crosswalk','white_car','light_blue_car','blue_car','speed_sign','no_parking','green_car','beige_car','red_car','silver_car','trafficlight','pot_holes','car'];
# If you have more specific tags that overlap with other tags, put the more
# specific tags closer to the start of the list above.
# Ex. KEYWORDS = ['light_blue_car','blue_car'];
# This would only get tagged with light_blue_car if the file name was:
# 'file_light_blue_car.jpg'
URL = 'https://keithweaver.ca/rest/cascades/v1/upload/image';
# Get all files
allFiles = [f for f in listdir(DIRECTORY_WITH_IMAGES) if isfile(join(DIRECTORY_WITH_IMAGES, f))]
# This returns a list of strings
# General sudo for what's happening:
# Loop through each
# Grab name of file
# Determine Tags
# Remove extension
# Look for keywords
# Remove keyword from file name
# Add KEYWORDS_FOR_ALL
# Stringify the list of tags
# Upload to Cascade market
# List of files that have server issues will be saved.
filesWithServerIssues = [];
for fileName in allFiles:
print ('Preparing ... ' + fileName)
if (fileName == THIS_PYTHON_FILE_NAME):
print ('Error: [' + fileName + '] is the current script file.')
else:
filename, file_extension = os.path.splitext(DIRECTORY_WITH_IMAGES + '/' + fileName)
if file_extension not in IMAGE_EXTENSIONS:
print ('Error: [' + fileName + '] does not have a supported file extension.')
else:
tagsForUpload = [];
# Determine tags
maniFileName = fileName # manipulated fileName
# Remove extension
maniFileName = maniFileName[:(len(file_extension) * -1)]
for keyword in KEYWORDS:
if keyword in maniFileName and keyword not in tagsForUpload:
# Add keyword to tags
tagsForUpload.append(keyword)
# Remove keyword from string
#maniFileName = maniFileName.replace(keyword,'',1)
maniFileName = maniFileName.replace(keyword,'')
# Remove limit b/c if light_blue_car is found multiple times
# then thats fine. I dont want to have to loop to verify all
# keywords are out. And I dont want blue_car to be found in
# light_blue_car.
# Add keywords for all
for keyword in KEYWORDS_FOR_ALL:
if keyword not in tagsForUpload:
tagsForUpload.append(keyword)
# Stringify the list
tagsStr = '[';
for tag in tagsForUpload:
if tagsStr != '[':
tagsStr += '",';
tagsStr += '"' + tag
tagsStr += '"]';
FILE_PATH = DIRECTORY_WITH_IMAGES + '/' + fileName
files = {'file': open(FILE_PATH,'rb')}
values = {'apikey': API_KEY, 'tags':tagsStr}
result = requests.post(URL, files=files, data=values)
response = result.json()
if (result.status_code == 200):
if (response['success'] == True):
print ('Added ' + fileName)
else:
print ('Error with params on server call.')
errorObj = { 'fileName': fileName, 'reason': response['message'] }
filesWithServerIssues.append(errorObj)
else:
print ('Error with server call.')
errorObj = { 'fileName': fileName, 'reason': response['message'] }
filesWithServerIssues.append(errorObj)
if (len(filesWithServerIssues) > 0):
# Server issue occurred
filePathNameWExt = './error-upload.json'
data = { 'issues': filesWithServerIssues };
with open(filePathNameWExt, 'w') as fp:
json.dump(data, fp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment