Skip to content

Instantly share code, notes, and snippets.

@mellertson
Last active February 3, 2017 02:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mellertson/0f9f1c4989c4a92832391515b6df53ac to your computer and use it in GitHub Desktop.
Save mellertson/0f9f1c4989c4a92832391515b6df53ac to your computer and use it in GitHub Desktop.
Using HTM for time-series predictions using multiple inputs
import pandas as pd
# TODO insert other necessary imports here
input_file_name = "/tmp/input.csv"
input_file_index = 3
input_file_num_rows = len(stored_input_file)
output_column = "col2"
cpu_count = 4 # change this to the number of cpu cores on your system
SWARM_CONFIG = {
"includedFields": [
],
"streamDef": {
"info": output_column,
"version": 1,
"streams": [
{
"info": output_column,
"source": "file://" + input_file_name,
"columns": ["*"],
"last_record": input_file_num_rows
}
]
},
"inferenceType": "TemporalAnomaly",
"inferenceArgs": {
"predictionSteps": [
1, 2, 3, 4, 5
],
"predictedField": output_column
},
"iterationCount": -1,
"swarmSize": str(swarm_size)
}
# add some additional parameters to the swarm's configuration
# I don't remembere now why I did it this way, it saved me time on something if I recall :-)
SWARM_CONFIG["streamDef"]["info"] = output_column
SWARM_CONFIG["streamDef"]["streams"][0]["info"] = output_column
SWARM_CONFIG["streamDef"]["streams"][0]["source"] = "file://" + input_file_name
SWARM_CONFIG["streamDef"]["streams"][0]["last_record"] = input_file_num_rows
SWARM_CONFIG["inferenceArgs"]["predictedField"] = output_column
# Add other columns as includeFields in SWARM_CONFIG
df=pd.read_csv(input_file_name, sep=',', error_bad_lines=0.0)
for i in range(0, len(df.columns)):
SWARM_CONFIG["includedFields"].append({"fieldName": df.columns[i], "fieldType": df.iloc[0, i]})
# rebuild the model by swarming over the data
permutations_runner.runWithConfig(SWARM_CONFIG, {'maxWorkers': cpu_count, 'overwrite': True})
# load model parameters from disk, and create a new model and enable inferencial predictions
model_params = get_model_parameters()
model = ModelFactory.create(model_params)
model.enableInference({"predictedField": output_column})
shifter = InferenceShifter()
results = None
# Make the predictions and do something with the results
for time_stamp, row_data in df.iterrows():
results = model.run({df[0][0]: time_stamp, output_column: row_data[output_column]})
# store the results from the prediction so we can compute stats
last_result = shifter.shift(results)
anomaly_score = last_result.inferences["anomalyScore"]
predicted_value = last_result.inferences['multiStepBestPredictions'][1]
last_inferences = last_result.inferences['multiStepPredictions'][1]
# ... insert code here to calculate and store statistics using the predicted values
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment