Skip to content

Instantly share code, notes, and snippets.

View shiumachi's full-sized avatar

Sho Shimauchi shiumachi

View GitHub Profile
#!/bin/bash
usage()
{
echo "hadoop-logaggr.sh [-h] file" >&2
echo " -h: help (this message)" >&2
exit 0
}
TEMP=`getopt h $*`
#!/bin/sh
TMP_DIR=/tmp
HADOOP_BIN_DIR=${HADOOP_MAPRED_HOME}/bin
HADOOP_COMMAND=${HADOOP_BIN_DIR}/hadoop
HADOOP_JAR_COMMAND="${HADOOP_COMMAND} jar"
MAPRED_SHARE_HOME=${HADOOP_MAPRED_HOME}/share/hadoop/mapreduce
HADOOP_EXAMPLE_JAR=${MAPRED_SHARE_HOME}/hadoop*examples*.jar
HADOOP_EXAMPLE_COMMAND="${HADOOP_JAR_COMMAND} ${HADOOP_EXAMPLE_JAR}"
#!/bin/sh
source util.sh
TMP_DIR=/tmp
HADOOP_BIN_DIR=${HADOOP_HOME}/bin
HADOOP_COMMAND=${HADOOP_BIN_DIR}/hadoop
HADOOP_START_HDFS_COMMAND=`which start-dfs.sh`
HADOOP_STOP_HDFS_COMMAND=`which stop-dfs.sh`
#!/bin/bash
usage(){
printf "%s:\n" "usage" 1>&2
printf "%s file \n" `basename $0` 1>&2
printf " %-15s: help (this message)" "-h" >&2
}
TEMP=`getopt h $*`
#!/bin/bash
usage()
{
echo "usage: zip_logs.sh [directory] [-prod]" >&2
exit 0
}
TEMP=`getopt :h $*`
@shiumachi
shiumachi / alternatives-hadoop.sh
Created November 28, 2018 06:20
alternatives like script for hadoop
#!/bin/sh
HOME_LIB_DIR=${HOME}/lib
# symlink list
HADOOP_SYMLINK=${HOME_LIB_DIR}/hadoop
HBASE_SYMLINK=${HOME_LIB_DIR}/hbase
ZOOKEEPER_SYMLINK=${HOME_LIB_DIR}/zookeeper
HIVE_SYMLINK=${HOME_LIB_DIR}/hive
PIG_SYMLINK=${HOME_LIB_DIR}/pig
@shiumachi
shiumachi / bloomfilter.py
Created November 26, 2018 07:59
Bloomfilter sample
#!/usr/bin/python
import hashlib
startKey = 2
endKey = 6
inputNum = 1000
testNum = 100000
def check_bl(bloom, a):
aa = hashlib.md5(a).hexdigest()[startKey:endKey]
@shiumachi
shiumachi / myargparse.py
Created November 26, 2018 07:58
argparse sample
#!/usr/bin/python
# -*- coding: utf-8 -*-
import argparse
import sys
class MyArgParse(object):
def __init__(self):
pass
def sum(self):
@shiumachi
shiumachi / bootstrap-master.sh
Created April 27, 2018 07:31
Kafka Kudu Demo (WIP)
#!/bin/sh
# logging stdout/stderr
set -x
exec >> /root/bootstrap-master-init.log 2>&1
date
# Master node identifier
touch /root/kafka-kudu-demo_edge-node.flag
@shiumachi
shiumachi / csv_to_parquet.py
Last active December 28, 2018 06:22
日付単位に分けられた複数のCSVファイルを月単位のParquetファイルに変換する
# This script compacts daily based csv files to monthly based parquet file.
# The CSV files should be named like "YYYY-MM-DD.csv" format.
#
# このスクリプトは日付毎のcsvファイルを月毎のparquetファイルに変換します。
# CSVファイルの名前は"YYYY-MM-DD.csv"の形式にしてください。
#
import pandas as pd
import numpy as np
import pyarrow as pa