mellertson/htm-predictions-using-multiple-inputs.py

## htm-predictions-using-multiple-inputs.py
import pandas as pd
# TODO insert other necessary imports here

input_file_name = "/tmp/input.csv"
input_file_index = 3
input_file_num_rows = len(stored_input_file)
output_column = "col2"
cpu_count = 4 # change this to the number of cpu cores on your system

SWARM_CONFIG = {
    "includedFields": [
    ],
    "streamDef": {
      "info": output_column,
      "version": 1,
      "streams": [
        {
          "info": output_column,
          "source": "file://" + input_file_name,
          "columns": ["*"],
          "last_record": input_file_num_rows
        }
      ]
    },

    "inferenceType": "TemporalAnomaly",
    "inferenceArgs": {
      "predictionSteps": [
        1, 2, 3, 4, 5
      ],
      "predictedField": output_column
    },
    "iterationCount": -1,
    "swarmSize": str(swarm_size)
  }

# add some additional parameters to the swarm's configuration
# I don't remembere now why I did it this way, it saved me time on something if I recall :-)
SWARM_CONFIG["streamDef"]["info"] = output_column
SWARM_CONFIG["streamDef"]["streams"][0]["info"] = output_column
SWARM_CONFIG["streamDef"]["streams"][0]["source"] = "file://" + input_file_name
SWARM_CONFIG["streamDef"]["streams"][0]["last_record"] = input_file_num_rows
SWARM_CONFIG["inferenceArgs"]["predictedField"] = output_column


# Add other columns as includeFields in SWARM_CONFIG
df=pd.read_csv(input_file_name, sep=',', error_bad_lines=0.0)
for i in range(0, len(df.columns)):
  SWARM_CONFIG["includedFields"].append({"fieldName": df.columns[i], "fieldType": df.iloc[0, i]})

# rebuild the model by swarming over the data
permutations_runner.runWithConfig(SWARM_CONFIG, {'maxWorkers': cpu_count, 'overwrite': True})

# load model parameters from disk, and create a new model and enable inferencial predictions
model_params = get_model_parameters()
model = ModelFactory.create(model_params)
model.enableInference({"predictedField": output_column})
shifter = InferenceShifter()

results = None
# Make the predictions and do something with the results
for time_stamp, row_data in df.iterrows():
  results = model.run({df[0][0]: time_stamp, output_column: row_data[output_column]})

  # store the results from the prediction so we can compute stats
  last_result = shifter.shift(results)
  anomaly_score = last_result.inferences["anomalyScore"]
  predicted_value = last_result.inferences['multiStepBestPredictions'][1]
  last_inferences = last_result.inferences['multiStepPredictions'][1]

  # ... insert code here to calculate and store statistics using the predicted values
	import pandas as pd
	# TODO insert other necessary imports here

	input_file_name = "/tmp/input.csv"
	input_file_index = 3
	input_file_num_rows = len(stored_input_file)
	output_column = "col2"
	cpu_count = 4 # change this to the number of cpu cores on your system

	SWARM_CONFIG = {
	"includedFields": [
	],
	"streamDef": {
	"info": output_column,
	"version": 1,
	"streams": [
	{
	"info": output_column,
	"source": "file://" + input_file_name,
	"columns": ["*"],
	"last_record": input_file_num_rows
	}
	]
	},

	"inferenceType": "TemporalAnomaly",
	"inferenceArgs": {
	"predictionSteps": [
	1, 2, 3, 4, 5
	],
	"predictedField": output_column
	},
	"iterationCount": -1,
	"swarmSize": str(swarm_size)
	}

	# add some additional parameters to the swarm's configuration
	# I don't remembere now why I did it this way, it saved me time on something if I recall :-)
	SWARM_CONFIG["streamDef"]["info"] = output_column
	SWARM_CONFIG["streamDef"]["streams"][0]["info"] = output_column
	SWARM_CONFIG["streamDef"]["streams"][0]["source"] = "file://" + input_file_name
	SWARM_CONFIG["streamDef"]["streams"][0]["last_record"] = input_file_num_rows
	SWARM_CONFIG["inferenceArgs"]["predictedField"] = output_column


	# Add other columns as includeFields in SWARM_CONFIG
	df=pd.read_csv(input_file_name, sep=',', error_bad_lines=0.0)
	for i in range(0, len(df.columns)):
	SWARM_CONFIG["includedFields"].append({"fieldName": df.columns[i], "fieldType": df.iloc[0, i]})

	# rebuild the model by swarming over the data
	permutations_runner.runWithConfig(SWARM_CONFIG, {'maxWorkers': cpu_count, 'overwrite': True})

	# load model parameters from disk, and create a new model and enable inferencial predictions
	model_params = get_model_parameters()
	model = ModelFactory.create(model_params)
	model.enableInference({"predictedField": output_column})
	shifter = InferenceShifter()

	results = None
	# Make the predictions and do something with the results
	for time_stamp, row_data in df.iterrows():
	results = model.run({df[0][0]: time_stamp, output_column: row_data[output_column]})

	# store the results from the prediction so we can compute stats
	last_result = shifter.shift(results)
	anomaly_score = last_result.inferences["anomalyScore"]
	predicted_value = last_result.inferences['multiStepBestPredictions'][1]
	last_inferences = last_result.inferences['multiStepPredictions'][1]

	# ... insert code here to calculate and store statistics using the predicted values