Skip to content

Instantly share code, notes, and snippets.

@HeRoMo
Created August 10, 2020 10:21
Show Gist options
  • Save HeRoMo/7758b141123ae38075b0150791b06de5 to your computer and use it in GitHub Desktop.
Save HeRoMo/7758b141123ae38075b0150791b06de5 to your computer and use it in GitHub Desktop.
from cloudfront_log_parser import parse # https://github.com/heynemann/cloudfront-log-parser
import sqlite3
import glob
import urllib.parse
def create_table(cursor):
create_table = '''
create table if not exists cloudfront_logs (
timestamp NUMERIC,
http_method TEXT,
path TEXT,
querystring TEXT,
status_code NUMERIC,
referrer TEXT,
user_agent TEXT,
browser_family TEXT,
is_bot NUMERIC
);
'''
cursor.execute(create_table)
def insert_logs(cursor, filename):
f = open(filename)
log_lines = f.read()
logs = parse(log_lines)
for log in logs:
insert_sql = f"""
insert into cloudfront_logs values(
'{log.timestamp}',
'{log.http_method}',
'{log.path}',
'{log.querystring}',
'{log.status_code}',
'{log.referrer}',
'{urllib.parse.unquote(log.user_agent)}',
'{log.browser_family}',
{log.is_bot}
);
"""
cursor.execute(insert_sql)
# main
conn = sqlite3.connect('log.sqlite3')
c = conn.cursor()
create_table(c)
log_files = glob.glob('./data/*')
log_files.sort()
for file in log_files:
print(file)
insert_logs(c, file)
conn.commit()
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment