#! /usr/bin/env python import os import requests import pytz import datetime import json import logging import time # # Config parameters # H = 24 # Period for data to be parsed, in hours. IF 0, will include all S = 5000 # Size of chunks for bulk sending of log data to Loggly OS = 'mac' # OS where Tableau Desktop runs - mac or windows USERNAME = 'your_name' # your username in your os as it appears in your path FILENAME = ['log.txt','log_1.txt'] # File with log data (a smarter version will look into their dates) TOKEN = "046601bd-xxxx-xxxx-xxxx-36a977ecda02" TAG = 'tableau' local = pytz.timezone ("America/Los_Angeles") # Time zone in your computer where Tableau resides def transform_timestamp(timestamp): """ Transforms a Tableau log string timestamp into an ISO-formatted string timestamp Note: If timestamp is too far in the past it will not be parsed by Loggly as a time field and it will give the following notification: message: json timestamp rejected because it is too far in the past. type: TimestampBelowFloor """ naive = datetime.datetime.strptime (timestamp, "%Y-%m-%dT%H:%M:%S.%f") tdelta = datetime.datetime.now() - naive local_dt = local.localize(naive, is_dst=None) utc_dt = local_dt.astimezone (pytz.utc) return utc_dt.isoformat(), tdelta.days*24 + tdelta.seconds/3600.0 def set_path(): """ Sets the path to the Tableau log according to the OS """ if OS == 'mac': return "/Users/%s/Documents/My Tableau Repository/Logs" % USERNAME elif OS == 'windows': return "C:/Users/%s/Documents/My Tableau Repository/Logs" % USERNAME else: print "Verify your OS setting" exit(1) def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in xrange(0, len(l), n): yield l[i:i+n] level=logging.INFO logging.basicConfig(format='%(asctime)s [%(levelname)s] [%(name)s] %(message)s',level=level) logger = logging.getLogger("Tableau parser for Loggly") path = set_path() logger.info("Setting path to log file as %s" % path) os.chdir( path ) logger.info("Reading log data as list of strings") for f in FILENAME: logger.info("Parsing file %s" % f) with open(f) as file: log_data_as_str = reversed(file.readlines()) logger.info("Transforming log data to a list of dictionaries") log_data_as_dict = [json.loads(x) for x in log_data_as_str] logger.info("Adding user and timestamp field with ISO-8601 format") for msg in log_data_as_dict: msg['user'] = USERNAME msg['timestamp'], msg['tdelta_sec'] = transform_timestamp(msg['ts']) # Replace "-" in the value with "None" in order to be interpretable by Loggly for k,v in msg.items(): if v=="-": msg[k] = "None" logger.info("Found %d log messages" % len(log_data_as_dict)) if H > 0: logger.info("Filtering result set to last %d hours" % H) log_data_filtered = [ x for x in log_data_as_dict if x['tdelta_sec'] < H] else: log_data_filtered = log_data_as_dict filtered_msg_count=len(log_data_filtered) logger.info("Writing %d log messages" % filtered_msg_count) if filtered_msg_count > 0: ending_timestamp = log_data_filtered[0]['timestamp'] initial_timestamp = log_data_filtered[-1]['timestamp'] logger.info("Output messages begin at %s UTC and end at %s UTC" % ( initial_timestamp, ending_timestamp)) logger.info("Transforming dictionary to a list of strings") log_data_as_txt = [json.dumps(x) for x in log_data_filtered] logger.info("Splitting messages into chunks of size %d each" % S) log_data_as_chunks = chunks(log_data_as_txt, S) logger.info("Sending log data to Loggly in chunks") for c in log_data_as_chunks: r = requests.post("https://logs-01.loggly.com/bulk/%s/tag/%s/" % (TOKEN, TAG), '\n'.join(c)) logger.info("Loggly response: %s" % r.status_code) time.sleep(1)