Last active
February 6, 2017 19:01
-
-
Save drwahl/3971e133a002cdc4bb4013314f42c78f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This script will attempt to detect fraudy DHC accounts using | |
# machine learning. | |
from os import environ | |
from datetime import datetime | |
from sklearn import tree | |
from keystoneclient.v2_0 import client as keyclient | |
from keystoneauth1.identity import v2 | |
from keystoneauth1 import session | |
from novaclient import client as novaclient | |
def parse_tenant(tenant_id): | |
""" | |
Parse tenant_id and return an array in the same format as FEATURES | |
""" | |
nclient = novaclient.Client( | |
2.0, | |
username=environ['OS_USERNAME'], | |
password=environ['OS_PASSWORD'], | |
tenant_name=environ['OS_TENANT_ID'], | |
auth_url=environ['OS_AUTH_URL'] | |
) | |
search_opts = {'all_tenants': 1, | |
'tenant_id': tenant_id} | |
import ipdb; ipdb.set_trace() | |
tenant_vms = nclient.servers.list(search_opts=search_opts) | |
vm_count = len(tenant_vms) | |
avg_creation_diff = get_avg_time_between_creations(tenant_vms) | |
return [vm_count, avg_creation_diff] | |
def get_avg_time_between_creations(vms): | |
"""Return average time (in seconds) between VM creations""" | |
epoch = datetime.utcfromtimestamp(0) | |
create_times = [] | |
for vm in vms: | |
created = datetime.strptime(vm.created, "%Y-%m-%dT%H:%M:%SZ") | |
create_times.append((created - epoch)) | |
return sum(create_times)/len(create_times) | |
def train(features, labels): | |
"""Train the machine learning framework""" | |
clf = tree.DecisionTreeClassifier() | |
return clf.fit(features, labels) | |
def verify_tenants(tenants, model): | |
"""Use machine learning to detect if tenant is fraudy""" | |
for tenant in tenants: | |
status = model.predict(tenant[1]) | |
if status == 0: | |
print "%s is a Fraudy McFraudface!" % tenant[0] | |
elif status != 1: | |
print "Something weird happened with tenant %s" % tenant[0] | |
def main(): | |
"""The main loop""" | |
auth = v2.Password( | |
username=environ['OS_USERNAME'], | |
password=environ['OS_PASSWORD'], | |
tenant_name=environ['OS_TENANT_NAME'], | |
auth_url=environ['OS_AUTH_URL'] | |
) | |
sess = session.Session(auth=auth) | |
kclient = keyclient.Client(session=sess) | |
features = [] | |
labels = [] | |
# read input tenants | |
model_tenants = open('model_tenants.txt', 'r') | |
for tenant in model_tenants.readlines(): | |
tenant, label = tenant.split() | |
features.append(parse_tenant(tenant)) | |
labels.append(label) | |
model_tenants.close() | |
# train ML | |
model = train(features, labels) | |
# now iterate over all tenants, checking for fraudy accounts | |
tenants = [] | |
for tenant in kclient.tenants.list(): | |
tenants.append([tenant, parse_tenant(tenant)]) | |
verify_tenants(tenants, model) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment