Skip to content

Instantly share code, notes, and snippets.

View okomestudio's full-sized avatar

Taro Sato okomestudio

View GitHub Profile
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
"""S3DistCp
Run S3DistCp via boto. Currently only a limited set of S3DistCp
options useful for preparing AWS/EMR input data are exposed.
For details of S3DistCp options, see Distributed Copy Using S3DistCP:
http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/UsingEMR_s3distcp.html
@okomestudio
okomestudio / decorators.py
Created April 3, 2015 23:50
check_value decorator -- catches and logs an exception happening within a decorated function or method
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
import functools
import inspect
from itertools import izip_longest
import json
import logging
import os
import sys
from traceback import format_tb
#!/usr/bin/bash
#
# For configuring EC2 Debian/Jessie instance.
#
sudo apt-get update
sudo apt-get -y upgrade
################
# general system
@okomestudio
okomestudio / findstr.sh
Last active August 29, 2015 14:04
Search for files containing a specified string.
#!/bin/bash
##############################################################################
# findstr
#
# Search for files containing a specified string. Show the file path,
# line number, and the line itself containing a specified string.
#
# EXAMPLE:
#
# To search for all the files with the file extension .html containing
@okomestudio
okomestudio / ab_test.py
Created May 12, 2014 19:19
An example of A/B test using the chi-squared test for independence.
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
"""An example of A/B test using the chi-squared test for independence."""
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency
def main():
data = pd.io.parsers.read_csv('n10000.csv')
@okomestudio
okomestudio / chisq_distribution.py
Created May 12, 2014 18:26
Chi-squared distribution
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import chi2
def main():
colors = 'bgrcmyk'
n = 40000
@okomestudio
okomestudio / ca.py
Created May 7, 2014 22:20
An example of correspondence analysis.
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import svd
class CA(object):
"""Simple corresondence analysis.
@okomestudio
okomestudio / pca_kmeans_biplot.py
Last active August 29, 2015 14:00
This is an example of using PCA and biplot. The observations are colored by k-means clustering.
#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
"""Biplot example using pcasvd from statsmodels and matplotlib.
This is an example of how a biplot (like that in R) can be produced
using pcasvd and matplotlib. Additionally, this example does k-means
clustering and color observations by which cluster they belong to.
"""
import matplotlib.pyplot as plt