Skip to content

Instantly share code, notes, and snippets.

Avatar

Sho Shimauchi shiumachi

View GitHub Profile
View retail_data_gen.py
import argparse
import random
from datetime import date, timedelta
from random import shuffle
# option settings
parser = argparse.ArgumentParser(description='retail data generator')
parser.add_argument('--no-file', type=int, default=1, help='number of files. default is 1.')
parser.add_argument('--no-line', type=int, default=10000, help='number of lines. default is 10000.')
View hive_create_table_with_many_partitions.py
# -*- coding: utf-8 -*-
"""
Copyright 2015 Sho Shimauchi
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
@shiumachi
shiumachi / datagen.py
Created Nov 28, 2018
data generator for Hive / Impala demo
View datagen.py
import argparse
import random
usage = """\
%(prog)s [options]
"""
def init_parser():
View kafka-kudu-demo.py
from kafka import KafkaConsumer
from kafka.client import KafkaClient
import kudu
from kudu.client import Partitioning
import argparse
def init_argumentparser():
parser = argparse.ArgumentParser()
parser.add_argument('--kudu_master_address', default='', type=str, required=True)
parser.add_argument('--kudu_master_port', default='7051', type=str)
View wait_seconds.sh
#!/bin/bash
# utility functions
# wait_seconds N
# sleep N seconds
#
function wait_seconds()
{
func_name="wait_seconds"
if ! expr "$1" : '[0-9]*' > /dev/null ;
View hadoop-logaggr-timeline.sh
#!/bin/bash
aggregate_min=30 # default value
usage()
{
echo "hadoop-logaggr-timeline.sh [-h] [-t N] file" >&2
echo " -t N[min]: must be integer (default:30) " >&2
echo " aggregates logs each N minutes." >&2
echo " -h: help (this message)" >&2
View hadoop-logaggr.sh
#!/bin/bash
usage()
{
echo "hadoop-logaggr.sh [-h] file" >&2
echo " -h: help (this message)" >&2
exit 0
}
TEMP=`getopt h $*`
View mapreduce-test.sh
#!/bin/sh
TMP_DIR=/tmp
HADOOP_BIN_DIR=${HADOOP_MAPRED_HOME}/bin
HADOOP_COMMAND=${HADOOP_BIN_DIR}/hadoop
HADOOP_JAR_COMMAND="${HADOOP_COMMAND} jar"
MAPRED_SHARE_HOME=${HADOOP_MAPRED_HOME}/share/hadoop/mapreduce
HADOOP_EXAMPLE_JAR=${MAPRED_SHARE_HOME}/hadoop*examples*.jar
HADOOP_EXAMPLE_COMMAND="${HADOOP_JAR_COMMAND} ${HADOOP_EXAMPLE_JAR}"
View init-hdfs.sh
#!/bin/sh
source util.sh
TMP_DIR=/tmp
HADOOP_BIN_DIR=${HADOOP_HOME}/bin
HADOOP_COMMAND=${HADOOP_BIN_DIR}/hadoop
HADOOP_START_HDFS_COMMAND=`which start-dfs.sh`
HADOOP_STOP_HDFS_COMMAND=`which stop-dfs.sh`
View hbase-log-checker.sh
#!/bin/bash
usage(){
printf "%s:\n" "usage" 1>&2
printf "%s file \n" `basename $0` 1>&2
printf " %-15s: help (this message)" "-h" >&2
}
TEMP=`getopt h $*`
You can’t perform that action at this time.