Skip to content

Instantly share code, notes, and snippets.

@luohao-brian
Created November 3, 2018 02:49
Show Gist options
  • Save luohao-brian/a9577ee2bab754441e82ab3d116bcc3c to your computer and use it in GitHub Desktop.
Save luohao-brian/a9577ee2bab754441e82ab3d116bcc3c to your computer and use it in GitHub Desktop.
nginx_access_log_to_mysql.py
#log_format main '$remote_addr - $remote_user [$time_local] "$request" '
# '$status $body_bytes_sent "$http_referer" '
# '"$http_user_agent" "$http_x_forwarded_for"';
import time
import sys
import os
import MySQLdb
def parse_logfile(logfile):
fi = open(logfile, 'r')
records = []
for line in fi:
r = line.split()
if len(r) < 12:
print '[WARN] ignore bad line: %s' % line
continue
ip = r[0]
host = r[1]
ts = time.strptime(r[3][1:], "%d/%b/%Y:%H:%M:%S")
request = r[5][1:] + ' ' + r[6] + ' ' + r[7][:-1]
status = int(r[8])
try:
resp_len = int(r[9])
except:
resp_len = 0
refer = r[10][1:-1]
agent = ' '.join(r[11:])[1:-1]
rec = [ip, host, ts, request, status, resp_len, refer, agent]
records.append(rec)
fi.close()
return records
db_name = 'httpd_log'
db_user = 'root'
db_passwd = 'root'
db_host = 'localhost'
access_log_schema = """
access_log(
ip VARCHAR(64),
host VARCHAR(255),
ts TIMESTAMP,
request VARCHAR(1024),
status INT,
resp_len INT,
refer VARCHAR(1024),
agent VARCHAR(1024)
);
"""
def compose_batch_insert_sql(records, start, end):
sql = 'INSERT INTO access_log (ip, host, ts, request, status, resp_len, refer, agent) VALUES '
while start < end -1:
sql += '("%s", "%s", "%s", "%s", %d, %d, "%s", "%s"),' % (
records[start][0],
records[start][1],
time.strftime("%Y-%m-%d %H:%M:%S", records[start][2]),
records[start][3],
records[start][4],
records[start][5],
records[start][6],
records[start][7]
)
start += 1
sql += '("%s", "%s", "%s", "%s", %d, %d, "%s", "%s");' % (
records[end-1][0],
records[end-1][1],
time.strftime("%Y-%m-%d %H:%M:%S", records[end-1][2]),
records[end-1][3],
records[end-1][4],
records[end-1][5],
records[end-1][6],
records[end-1][7]
)
#print sql
return sql
def write_mysql_db(records):
db = MySQLdb.connect(host=db_host, user=db_user, passwd=db_passwd)
cur = db.cursor()
cur.execute('CREATE DATABASE IF NOT EXISTS %s;' % db_name)
cur.execute('USE %s;' % db_name)
cur.execute('CREATE TABLE IF NOT EXISTS %s' % access_log_schema)
batch_size = 1000
n_batches = len(records)/batch_size
if n_batches > 1:
for x in range(n_batches):
print "Write data batch no. %s" % (x+1)
sql = compose_batch_insert_sql(records, x*batch_size, (x+1)*batch_size)
cur.execute(sql)
db.commit()
print "Write data batch no. %s" % (n_batches+1)
sql = compose_batch_insert_sql(
records,
n_batches*batch_size,
n_batches*batch_size + len(records)%batch_size
)
cur.execute(sql)
db.commit()
if __name__ == '__main__':
if len(sys.argv) !=2 or not os.path.exists(sys.argv[1]):
print "Usage: write_access_log.py access.log"
sys.exit(-1)
print "Use log file %s" % sys.argv[1]
records = parse_logfile(sys.argv[1])
write_mysql_db(records)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment