Skip to content

Instantly share code, notes, and snippets.

@cestella
Last active June 6, 2017 07:26
Show Gist options
  • Save cestella/8dd83031b8898a732b6a5a60fce1b616 to your computer and use it in GitHub Desktop.
Save cestella/8dd83031b8898a732b6a5a60fce1b616 to your computer and use it in GitHub Desktop.
import sklearn
import numpy as np
import math
import pickle
import collections
class DGA:
def __init__(self):
self.model = { 'clf': pickle.loads(open('./dga_model_random_forest.model','rb').read())
, 'alexa_vc': pickle.loads(open('./dga_model_alexa_vectorizor.model','rb').read())
, 'alexa_counts': pickle.loads(open('./dga_model_alexa_counts.model','rb').read())
, 'dict_vc': pickle.loads(open('./dga_model_dict_vectorizor.model','rb').read())
, 'dict_counts': pickle.loads(open('./dga_model_dict_counts.model','rb').read()) }
def evaluate_domain(self, domain):
alexa_match = self.model['alexa_counts'] * self.model['alexa_vc'].transform([domain]).T
dict_match = self.model['dict_counts'] * self.model['dict_vc'].transform([domain]).T
# Assemble feature matrix (for just one domain)
X = [len(domain), self.entropy(domain), alexa_match, dict_match]
y_pred = self.model['clf'].predict([ X ])[0]
return y_pred
def entropy(self, s):
p, lns = collections.Counter(s), float(len(s))
return -sum( count/lns * math.log(count/lns, 2) for count in p.values())
import json
import model
from flask import Flask
from flask import request,jsonify
import socket
app = Flask(__name__)
@app.route("/apply", methods=['GET'])
def predict():
# We expect one argument, the hostname without TLD.
h = request.args.get('host')
r = {}
r['is_malicious'] = model.evaluate_domain(h)
# We will return a JSON map with one field, 'is_malicious' which will be
# 'legit' or 'dga', the two possible outputs of our model.
return jsonify(r)
if __name__ == "__main__":
# Create my model object that I want to expose.
model = model.DGA()
# In order to register with model as a service, we need to bind to a port
# and inform the discovery service of the endpoint. Therefore,
# we will bind to a port and close the socket to reserve it.
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('localhost', 0))
port = sock.getsockname()[1]
sock.close()
with open("endpoint.dat", "w") as text_file:
# To inform the discovery service, we need to write a file with a simple
# JSON Map indicating the full URL that we've bound to.
text_file.write("{\"url\" : \"http://0.0.0.0:%d\"}" % port)
# Make sure flask uses the port we reserved
app.run(threaded=True, host="0.0.0.0", port=port)
#!/bin/bash
PYTHONPATH="${PYTHONPATH}:." /opt/anaconda/bin/python rest.py
@cestella
Copy link
Author

cestella commented Feb 21, 2017

The model pickle files referenced by model.py's constructor are the ones output from https://github.com/ClickSecurity/data_hacking/blob/master/dga_detection/dga_model_gen.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment