hillar/apache2elastic.py

## apache2elastic.py
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
from os import listdir
from os.path import isfile, join
import gzip
import apache_log_parser
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk


def unzipundparse(pathname,filenames,parser):
    line_parser = apache_log_parser.make_parser(parser)
    for filename in filenames:
        path = pathname +"/"+ filename
        with gzip.open(path, 'r') as f:
            counter = 0
            for line in f:
                counter = counter + 1
                if counter > 300: break
                data = line_parser(line)
                data['path'] = path
                doc = {
                    '_op_type': 'create',
                    '_index': 'apache-test',
                    '_type': 'apache',
                    '_source': data
                    }
                yield doc


if __name__ == '__main__':

    if len(sys.argv) != 2:
      print "directory missing ;("
      sys.exit()

    mypath = sys.argv[1]
    onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
    es = Elasticsearch()
    parser = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
    stats = bulk(es, unzipundparse(mypath, onlyfiles, parser))
    print 'saved :', stats
	import sys
	reload(sys)
	sys.setdefaultencoding("utf-8")
	from os import listdir
	from os.path import isfile, join
	import gzip
	import apache_log_parser
	from elasticsearch import Elasticsearch
	from elasticsearch.helpers import bulk


	def unzipundparse(pathname,filenames,parser):
	line_parser = apache_log_parser.make_parser(parser)
	for filename in filenames:
	path = pathname +"/"+ filename
	with gzip.open(path, 'r') as f:
	counter = 0
	for line in f:
	counter = counter + 1
	if counter > 300: break
	data = line_parser(line)
	data['path'] = path
	doc = {
	'_op_type': 'create',
	'_index': 'apache-test',
	'_type': 'apache',
	'_source': data
	}
	yield doc


	if __name__ == '__main__':

	if len(sys.argv) != 2:
	print "directory missing ;("
	sys.exit()

	mypath = sys.argv[1]
	onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
	es = Elasticsearch()
	parser = "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
	stats = bulk(es, unzipundparse(mypath, onlyfiles, parser))
	print 'saved :', stats