Skip to content

Instantly share code, notes, and snippets.

@HeartSaVioR
Created October 8, 2016 23:04
Show Gist options
  • Save HeartSaVioR/79fd4e461604fabecf535ffece47e6c2 to your computer and use it in GitHub Desktop.
Save HeartSaVioR/79fd4e461604fabecf535ffece47e6c2 to your computer and use it in GitHub Desktop.
kiritbasu/Fake-Apache-Log-Generator modified for storm-sql-example
#!/usr/bin/python
import time
import datetime
import pytz
import numpy
import random
import gzip
import zipfile
import sys
import argparse
from faker import Faker
from random import randrange
#todo:
# allow writing different patterns (Common Log, Apache Error log etc)
# log rotation
class switch(object):
def __init__(self, value):
self.value = value
self.fall = False
def __iter__(self):
"""Return the match method once, then stop"""
yield self.match
raise StopIteration
def match(self, *args):
"""Indicate whether or not to enter a case suite"""
if self.fall or not args:
return True
elif self.value in args: # changed for v1.5, see below
self.fall = True
return True
else:
return False
parser = argparse.ArgumentParser(__file__, description="Fake Apache Log Generator")
parser.add_argument("--output", "-o", dest='output_type', help="Write to a Log file, a gzip file or to STDOUT", choices=['LOG','GZ','CONSOLE'] )
parser.add_argument("--num", "-n", dest='num_lines', help="Number of lines to generate (0 for infinite)", type=int, default=1)
parser.add_argument("--prefix", "-p", dest='file_prefix', help="Prefix the output file name", type=str)
args = parser.parse_args()
log_lines = args.num_lines
file_prefix = args.file_prefix
output_type = args.output_type
faker = Faker()
timestr = time.strftime("%Y%m%d-%H%M%S")
otime = datetime.datetime.now()
outFileName = 'access_log_'+timestr+'.log' if not file_prefix else file_prefix+'_access_log_'+timestr+'.log'
for case in switch(output_type):
if case('LOG'):
f = open(outFileName,'w')
break
if case('GZ'):
f = gzip.open(outFileName+'.gz','w')
break
if case('CONSOLE'): pass
if case():
f = sys.stdout
response=["200","404","500","301"]
verb=["GET","POST","DELETE","PUT"]
resources=["/list","/wp-content","/wp-admin","/explore","/search/tag/list","/app/main/posts","/posts/posts/explore","/apps/cart.jsp?appID="]
ualist = [faker.firefox, faker.chrome, faker.safari, faker.internet_explorer, faker.opera]
flag = True
while (flag):
increment = datetime.timedelta(seconds=random.randint(30,300))
otime += increment
ip = faker.ipv4()
dt = otime.strftime('%d/%b/%Y:%H:%M:%S')
tz = datetime.datetime.now(pytz.timezone('US/Pacific')).strftime('%z')
vrb = numpy.random.choice(verb,p=[0.6,0.1,0.1,0.2])
uri = random.choice(resources)
if uri.find("apps")>0:
uri += `random.randint(1000,10000)`
resp = numpy.random.choice(response,p=[0.9,0.04,0.02,0.04])
byt = int(random.gauss(5000,50))
referer = faker.uri()
useragent = numpy.random.choice(ualist,p=[0.5,0.3,0.1,0.05,0.05] )()
f.write('%s - - [%s %s] "%s %s HTTP/1.0" %s %s "%s" "%s"\n' % (ip,dt,tz,vrb,uri,resp,byt,referer,useragent))
log_lines = log_lines - 1
flag = False if log_lines == 0 else True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment