Skip to content

Instantly share code, notes, and snippets.

@drwahl
Last active February 6, 2017 19:01
Show Gist options
  • Save drwahl/3971e133a002cdc4bb4013314f42c78f to your computer and use it in GitHub Desktop.
Save drwahl/3971e133a002cdc4bb4013314f42c78f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# This script will attempt to detect fraudy DHC accounts using
# machine learning.
from os import environ
from datetime import datetime
from sklearn import tree
from keystoneclient.v2_0 import client as keyclient
from keystoneauth1.identity import v2
from keystoneauth1 import session
from novaclient import client as novaclient
def parse_tenant(tenant_id):
"""
Parse tenant_id and return an array in the same format as FEATURES
"""
nclient = novaclient.Client(
2.0,
username=environ['OS_USERNAME'],
password=environ['OS_PASSWORD'],
tenant_name=environ['OS_TENANT_ID'],
auth_url=environ['OS_AUTH_URL']
)
search_opts = {'all_tenants': 1,
'tenant_id': tenant_id}
import ipdb; ipdb.set_trace()
tenant_vms = nclient.servers.list(search_opts=search_opts)
vm_count = len(tenant_vms)
avg_creation_diff = get_avg_time_between_creations(tenant_vms)
return [vm_count, avg_creation_diff]
def get_avg_time_between_creations(vms):
"""Return average time (in seconds) between VM creations"""
epoch = datetime.utcfromtimestamp(0)
create_times = []
for vm in vms:
created = datetime.strptime(vm.created, "%Y-%m-%dT%H:%M:%SZ")
create_times.append((created - epoch))
return sum(create_times)/len(create_times)
def train(features, labels):
"""Train the machine learning framework"""
clf = tree.DecisionTreeClassifier()
return clf.fit(features, labels)
def verify_tenants(tenants, model):
"""Use machine learning to detect if tenant is fraudy"""
for tenant in tenants:
status = model.predict(tenant[1])
if status == 0:
print "%s is a Fraudy McFraudface!" % tenant[0]
elif status != 1:
print "Something weird happened with tenant %s" % tenant[0]
def main():
"""The main loop"""
auth = v2.Password(
username=environ['OS_USERNAME'],
password=environ['OS_PASSWORD'],
tenant_name=environ['OS_TENANT_NAME'],
auth_url=environ['OS_AUTH_URL']
)
sess = session.Session(auth=auth)
kclient = keyclient.Client(session=sess)
features = []
labels = []
# read input tenants
model_tenants = open('model_tenants.txt', 'r')
for tenant in model_tenants.readlines():
tenant, label = tenant.split()
features.append(parse_tenant(tenant))
labels.append(label)
model_tenants.close()
# train ML
model = train(features, labels)
# now iterate over all tenants, checking for fraudy accounts
tenants = []
for tenant in kclient.tenants.list():
tenants.append([tenant, parse_tenant(tenant)])
verify_tenants(tenants, model)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment