Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/bin/env python
#
# Usage:
# ./dump.py
# - It will stop after period specified seconds or after getting max size.
# - log is sent to s3 bucket.
# - notification
# > EC2 needed profile (s3 put)
# > You need edit INFRA s3 bucket policy.
#
# Packages
# yum install python2-boto3 pcapy python2-impacket python-netaddr python-netifaces pytz
#
# general log format
# - ${timestamp}\t${src_port}\t${dst_addr}\t${query}
#----- example -----
#171026 11:14:00\t34567\t10.1.2.3\tselect 1 from db1
#\t34567\t10.1.2.3\tselect 2 from db1
#171026 11:14:01\t45678\t10.1.2.4\tselect 3 from db2
#-------------------
#
# query packet
# * There are 4bytes binary before mysql query packet.
#
# SQL changed
# * Delete \n and \t to make it one line
import boto3
import os
import sys
import re
import bz2
import json
import urllib2
import socket
import time
from datetime import datetime
from pytz import timezone
import pcapy
from impacket.ImpactDecoder import *
import netifaces
from netaddr import *
#
# Functions
# - linux iface: eth0
# windows iface: \\Device\\NPF_{5D21FB4F-D9BF-47AC-AD3E-CE2E172F27A0}
# iname: {5D21FB4F-D9BF-47AC-AD3E-CE2E172F27A0}
def get_private_interface():
for iface in pcapy.findalldevs():
iname = iface
search = re.search(r'.+({.+})', iface)
if search: iname = search.group(1)
for ipv4 in netifaces.ifaddresses(iname).get(netifaces.AF_INET, []):
if IPAddress(ipv4.get('addr')).is_private():
return iface
def tags2dict(tags):
dict = {}
for v in tags:
dict[v['Key']] = v['Value']
return dict
#
# config
#
now = datetime.now(timezone('Asia/Tokyo'))
max_size = 1024 * 1024 * 100
period = 60
dev = get_private_interface()
snaplen = 1024 * 1024 * 1
promiscious = False
timeout = 100
log_bucket = 'gedow-general-log-test'
log_prefix = 'generallog'
log_region = 'ap-northeast-1'
#
# AWS CLI
#
key_id = os.environ.get('AWS_ACCESS_KEY_ID')
secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
region = os.environ.get('AWS_REGION')
on_ec2 = False
instance_id = None
meta_data_url = 'http://169.254.169.254/latest/dynamic/instance-identity/document/'
if region is None:
try:
meta_data = json.loads(urllib2.urlopen(meta_data_url, timeout=1).read())
region = meta_data['region']
instance_id = meta_data['instanceId']
on_ec2 = True
except Exception as e:
raise Exception("Export AWS_REGION.")
if on_ec2 is None and (key_id is None or secret_key is None):
raise Exception("Export AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.")
# CLI
ec2 = boto3.client('ec2', region_name=region)
s3 = boto3.resource('s3', region_name=log_region)
# Tag
service = None
role = None
if not on_ec2:
service = "test"
role = "dev"
else:
res = ec2.describe_tags(
Filters = [{'Name': 'resource-id', 'Values': [instance_id]}]
)['Tags']
tags = tags2dict(res)
if 'service' not in tags or ('role' not in tags and 'roles' not in tags):
raise Exception("Not found service and role/roles tags.")
service = tags['service']
role = None
if 'role' in tags:
role = tags['role']
else:
role = tags['roles'].split(",")[0]
#
# packet dump
#
p = pcapy.open_live(dev, snaplen, promiscious, timeout)
p.setfilter("tcp dst port 3306")
start_utime = int(time.time())
before_utime = start_utime - 1
log = ''
log_len = 0
while True:
current_utime = int(time.time())
if current_utime - start_utime > period: break
try:
(header, payload) = p.next()
except pcapy.PcapError:
continue
if not header: continue
eth = EthDecoder().decode(payload)
ip = eth.child()
tcp = ip.child()
search = re.search(r'^[^a-z]*([a-z].+)$', tcp.get_data_as_string()[4:], re.DOTALL | re.IGNORECASE)
if not search: continue
sql = re.sub(r'[\n\t]', '', search.group(1))
dst = ip.get_ip_dst()
sport = tcp.get_th_sport()
timestamp = "\t"
if current_utime != before_utime:
timestamp = datetime.fromtimestamp(current_utime).strftime("%y%m%d %H:%M:%S")
row = "%s\t%s\t%s\t%s\n" % (timestamp, str(sport).rjust(5), dst, sql)
log += row
log_len += len(row)
if log_len > max_size: break
before_utime = current_utime
#
# compress & upload
#
date = now.strftime("%Y%m%d")
hostname = socket.gethostname()
file_name = now.strftime("%Y%m%d_%H%M") + "-" + hostname + ".log.bz2"
obj_key = "%s/%s/%s/%s/%s" % (log_prefix, service, role, date, file_name)
bucket = s3.Bucket(log_bucket)
obj = bucket.Object(obj_key)
obj.put(Body=bz2.compress(log), ACL="bucket-owner-full-control")
print "Save sql to s3://%s/%s" % (log_bucket, obj_key)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment