|
############################################################################## |
|
# Copyright (c) 2015-2016 BigML, Inc |
|
# |
|
# Permission is hereby granted, free of charge, to any person obtaining |
|
# a copy of this software and associated documentation files (the |
|
# "Software"), to deal in the Software without restriction, including |
|
# without limitation the rights to use, copy, modify, merge, publish, |
|
# distribute, sublicense, and/or sell copies of the Software, and to |
|
# permit persons to whom the Software is furnished to do so, subject to |
|
# the following conditions: |
|
# |
|
# The above copyright notice and this permission notice shall be |
|
# included in all copies or substantial portions of the Software. |
|
# |
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
############################################################################## |
|
|
|
import csv |
|
import sys |
|
import StringIO |
|
import argparse |
|
|
|
from bigml.api import BigML |
|
from bigml.model import Model |
|
from bigml.fields import Fields |
|
|
|
|
|
|
|
#!/usr/bin/env python |
|
# -*- coding: utf-8 -* |
|
|
|
"""Application Options: options published so that the user can change them |
|
in the command line |
|
|
|
""" |
|
|
|
SUMMARY = ("Example of script which predicts locally. It receives data from" |
|
" stdin and writes predictions to stdout") |
|
|
|
OPTIONS = { |
|
# model ID to be used |
|
'--model': { |
|
'required': 'True', |
|
'action': 'store', |
|
'dest': 'model_id', |
|
'help': "model/id"}, |
|
|
|
# delimiter in the file |
|
'--delimiter': { |
|
'action': 'store', |
|
'dest': 'delimiter', |
|
'default': ',', |
|
'help': "CSV delimiter"}, |
|
|
|
# Fields to be added to the prediction |
|
'--prediction-fields': { |
|
"action": 'store', |
|
"dest": 'prediction_fields', |
|
"help": ("Comma-separated list of input fields" |
|
" (predictors) to be added to the prediction.")}, |
|
} |
|
|
|
|
|
def parser_add_options(parser, options): |
|
"""Adds the options to the parser |
|
|
|
""" |
|
for option, properties in sorted(options.items(), key=lambda x: x[0]): |
|
parser.add_argument(option, **properties) |
|
|
|
|
|
def create_parser(user_options): |
|
"""Parses the user-given parameters. |
|
|
|
""" |
|
parser = argparse.ArgumentParser( |
|
description=SUMMARY, |
|
epilog="BigML, Inc") |
|
|
|
parser_add_options(parser, user_options) |
|
return parser |
|
|
|
|
|
def main(args=sys.argv[1:]): |
|
"""Parses command-line parameters and calls the actual main function. |
|
|
|
""" |
|
|
|
# If credentials are properly set in environment variables, there's no need |
|
# to explicitly create the api object. Otherwise, use next code to set them: |
|
# api = BigML("username", "api-key") |
|
# local_model = Model('model/53c83a8f48d9b6322800007d', api=api) |
|
|
|
command_args = create_parser(OPTIONS).parse_args(args) |
|
# transforming args object to dictionary |
|
context = vars(command_args) |
|
|
|
# Use the user-given local model |
|
local_model = Model(context['model_id']) |
|
|
|
# Read from stdin |
|
input_stream = StringIO.StringIO(sys.stdin.read()) |
|
|
|
# Read the CSV as a Dictionary assuming first line has headers |
|
reader = csv.DictReader(input_stream, delimiter=context['delimiter']) |
|
|
|
# List of fields to be added to the prediction |
|
prediction_fields = [] |
|
if context['prediction_fields']: |
|
prediction_fields = context['prediction_fields'].split(",") |
|
|
|
fieldnames = prediction_fields[:] |
|
# retrieving the classes available for the prediction |
|
categories = [element[0] for element in local_model.fields[ \ |
|
local_model.objective_id]['summary']['categories']] |
|
|
|
for category in categories: |
|
fieldnames.extend(['%s confidence' % category, |
|
'%s probability' % category]) |
|
|
|
# We will write to stdout, but can write to any file-like object |
|
output_stream = sys.stdout |
|
writer = csv.DictWriter(output_stream, fieldnames=fieldnames) |
|
writer.writeheader() |
|
# Predicting |
|
|
|
# Settings for predictions format |
|
kwargs = {"add_confidence": True, "multiple": "all"} |
|
# Predictions |
|
for input_data in reader: |
|
predictions = local_model.predict(input_data, **kwargs) |
|
output = {} |
|
for field in prediction_fields: |
|
if field in input_data: |
|
output.update({field: input_data[field]}) |
|
else: |
|
output.update({field: "-"}) |
|
for prediction in predictions: |
|
output.update({("%s confidence" % prediction["prediction"]): \ |
|
prediction['confidence']}) |
|
output.update({("%s probability" % prediction["prediction"]): \ |
|
prediction['probability']}) |
|
for category in categories: |
|
confidence = "%s confidence" % category |
|
if confidence not in output: |
|
output.update({("%s confidence" % category): 0}) |
|
output.update({("%s probability" % category): 0}) |
|
writer.writerow(output) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |