Skip to content

Instantly share code, notes, and snippets.

View shiumachi's full-sized avatar

Sho Shimauchi shiumachi

View GitHub Profile
import datetime
def convert_to_datetime(date_string):
""" input: %Y-%m-%d %H:%M:%S,%f
example: 2014-01-05 22:20:50,307
return: datetime object
"""
date_format = "%Y-%m-%d %H:%M:%S,%f"
def print_arr_right_aligned(arr):
""" input: string array ['a', 'ab', 'abc']
output: None. print with right aligned.
a:
ab:
abc:
"""
len_a = max(map(lambda x: len(x), arr))
for i in arr:
print("{0:>{1}}: ".format(i, len_a))
# sample of ast module
# reference:
# http://docs.python.jp/2.7/library/ast.html
# http://stackoverflow.com/questions/1515357/simple-example-of-how-to-use-ast-nodevisitor
import ast
import sys
import logging
from convert_to_datetime import convert_to_datetime
def add_date_to_log(line):
""" add YYMMDDhh to the beginning of the log.
Argument:
line (hadoop log line)
"""
arr = line.rstrip().split()
date_string = ' '.join(arr[0:2])
# -*- coding: utf-8 -*-
import hypchat
import ConfigParser
import time
import random
import re
import sqlite3
# 設定のロード
import argparse
import random
from datetime import date, timedelta
from random import shuffle
# option settings
parser = argparse.ArgumentParser(description='retail data generator')
parser.add_argument('--no-file', type=int, default=1, help='number of files. default is 1.')
parser.add_argument('--no-line', type=int, default=10000, help='number of lines. default is 10000.')
# -*- coding: utf-8 -*-
"""
Copyright 2015 Sho Shimauchi
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
@shiumachi
shiumachi / datagen.py
Created November 28, 2018 06:37
data generator for Hive / Impala demo
import argparse
import random
usage = """\
%(prog)s [options]
"""
def init_parser():
from kafka import KafkaConsumer
from kafka.client import KafkaClient
import kudu
from kudu.client import Partitioning
import argparse
def init_argumentparser():
parser = argparse.ArgumentParser()
parser.add_argument('--kudu_master_address', default='', type=str, required=True)
parser.add_argument('--kudu_master_port', default='7051', type=str)
#!/bin/bash
# utility functions
# wait_seconds N
# sleep N seconds
#
function wait_seconds()
{
func_name="wait_seconds"
if ! expr "$1" : '[0-9]*' > /dev/null ;