Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
LSTM and anomaly detection of web domain query activity gathered from OpenDNS
from __future__ import print_function
Using OpenDNS domain query activity, we retrieve 5 days
of queries/hour to a domain for 240+ domains (stored
in dns.json). We predict the number of queries in
the next hour using a LSTM recurrent neural network.
An ad hoc anomaly detection is outlined in the final
for loop.
Refer to:
from pybrain.supervised import RPropMinusTrainer
from import buildNetwork
from pybrain.structure.modules import LSTMLayer
from pybrain.datasets import SequentialDataSet
from import NetworkWriter
from sys import stdout
import numpy as np
import random
import json
# Get queries/hr data
with open('dns.json', 'r') as f:
samples = map(lambda x: x['ts'][:-2], json.load(f))
# Shuffle to partition test/train
# Set train & test data
train_data, test_data = samples[:50], samples[200:]
# Initialize ds for rnn for 1 obsv and 1 next
ds = SequentialDataSet(1, 1)
# Add each timeseries (ts)
for ts in train_data:
# Add obsv and next
for t_1, t_2 in zip(ts, ts[1:]):
ds.addSample(t_1, t_2)
# RNN with 1-5-1 architecture: 1 input, 5 hidden, 1 output layer
rnn = buildNetwork(1, 5, 1,
hiddenclass=LSTMLayer, outputbias=False, recurrent=True)
# Initialize trainer
trainer = RPropMinusTrainer(rnn, dataset=ds)
# Predefine iterations: epochs & cycles
CYCLES = 100
# Training loop
for i in xrange(CYCLES):
error = trainer.testOnData()
epoch = (i + 1) * EPOCHS_PER_CYCLE
print("\r Epoch: {}/{} Error: {}".format(epoch, EPOCHS, error), end="")
# Save model
NetworkWriter.writeToFile(rnn, 'rnn3.xml')
# Ad hoc test
for test in test_data:
for i in xrange(0, len(test) - 6, 5):
# Get 5 obs, 6th we wish to predict
obs, nxt = test[i:i + 5], test[i + 6]
# Predict all
prds = map(rnn.activate, obs)
# Get 6th prediction
prd = prds.pop()[0]
# Test if prd is anomalous
anm = prd > (1 + np.mean(obs) + 2 * np.std(obs))
# Get previous 5 obs,prd error rate
mse = ((np.array(obs[1:]) - np.concatenate(prds)) ** 2).mean()
print("\nSaw: {}\nNext/Prediction: {} / {}\nIs Anomaly: {}\nPrior MSE: {}".format(
obs, round(nxt, 3), round(prd, 3), anm, mse, end=""))
raw_input("[PRESS ENTER] for next prediction...\n")
print("[NEXT DOMAIN] ... \n")

This comment has been minimized.

Show comment Hide comment

vinayakumarr Mar 26, 2017

where do we get dns.json data

where do we get dns.json data

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment