Skip to content

Instantly share code, notes, and snippets.

View thekensta's full-sized avatar

Chris Kenwright thekensta

View GitHub Profile
@thekensta
thekensta / datecolumn_to_datepart.py
Last active August 29, 2015 14:22
Extract date components from Date column in pandas dataframe
# Extracting date components from a Date column in Pandas using IPython
# Converting to DatetimeIndex is 100x faster than using DataFrame.apply()
import pandas as pd
dates = pd.DataFrame({"Date": pd.date_range(start="1970-01-01", end="2037-12-31")})
print(dates.head())
# Date
# 0 1970-01-01
# 1 1970-01-02
@thekensta
thekensta / AutoArima.R
Last active August 29, 2015 14:25
Auto Arima from data.frame embedding forecast in actuals
##
## Wrap forecast auto.arima(..) and forecast(..) into a data.frame
## Embeds the forecast into the data.frame
##
## Allow passing an EndDate so that the forecast can start mid-actuals
## (helps with visualization and exaplantion)
##
## Usage:
## Forecast.df <- AutoArimaForecast(Monthly.df, # DataFrame with
## H = 6, # Predict 6 months forward
@thekensta
thekensta / numpy_date_arithmetic.py
Last active November 25, 2015 12:37
Numpy and Pandas date arithmetic
import pandas as pd
import numpy as np
#
td = pd.DataFrame({'Date': pd.date_range('2014-01-01', '2015-12-31')})
td['Timedelta'] = td['Date'].max() - td['Date']
td.dtypes
# Date datetime64[ns]
# Timedelta timedelta64[ns]
@thekensta
thekensta / rpy2_from_ipython.py
Last active August 26, 2015 17:07
IPython Rpy2 passing parameters
# Quick summary to access stuff in R from ipython
# Useful link but summary somehwat buried
# http://rpy.sourceforge.net/rpy2/doc-2.4/html/interactive.html
import numpy as np
%load_ext rpy2.ipython
# %R [-i INPUT] [-o OUTPUT] [-n] [-w WIDTH] [-h HEIGHT] [-p POINTSIZE]
# [-b BG] [–noisolation] [-u {px,in,cm,mm}] [-r RES] [code [code ...]]
@thekensta
thekensta / grouping_sets_and_rollup.sql
Last active August 27, 2015 16:33
Grouping and Rollup SQL aggregation
-- Reference
-- https://technet.microsoft.com/en-us/library/bb522495(v=sql.105).aspx
-- TODO: add more detail, this is syntax reference for me
Select fname, food, sum(total)
From lateral(
values
('Bob', 'Pies', 3),
('Charlie', 'Pies', 1),
@thekensta
thekensta / least_squares.py
Last active November 16, 2015 00:07
Summary of least squares in Python
# Quick reminder of least squares calculations in python
import numpy as np
def least_sq_numpy(x, y):
"""Calculate y = mx + c from x, y returning m, c using numpy."""
A = np.vstack([x, np.ones(x.size)]).T
fit = np.linalg.lstsq(A, y)
return fit[0]
@thekensta
thekensta / svd_image_compression.py
Created September 21, 2015 12:07
SVD Image Compression
# Ipython code using SVD to extract components of an image
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cmap
import numpy as np
from scipy import ndimage
# Any image file here, this is colourso convert to greyscale
DOG_IMAGE_FILE = "dog2.jpg"
@thekensta
thekensta / numpy_basic_ops.py
Last active September 23, 2015 14:50
Numpy Basic Operations Cheat Sheet
# Storing basic operations here, as I tend to forget them!
# Fill as required (or fill as forgotten?? :-)
# Repeat and Tile
# Repeat copies by element and flattens
# Tile copies sequences and preserves shape
a = np.array([1, 2, 3])
print(np.tile(a, 2))
@thekensta
thekensta / spark_s3a_instructions.sh
Created October 14, 2015 20:27
Set up Apache Spark 1.5+ with Hadoop 2.6+ s3a
# For a local environment
# Install hadoop and apache-spark via homebrew
# Apache Spark conf file
# libexec/conf/spark-defaults.conf
# Make the AWS jars available to Spark
spark.executor.extraClassPath /usr/local/Cellar/hadoop/2.7.1/libexec/share/hadoop/tools/lib/aws-java-sdk-1.7.4.jar:/usr/local/Cellar/hadoop/2.7.1/libexec/share/hadoop/tools/lib/hadoop-aws-2.7.1.jar
spark.driver.extraClassPath /usr/local/Cellar/hadoop/2.7.1/libexec/share/hadoop/tools/lib/aws-java-sdk-1.7.4.jar:/usr/local/Cellar/hadoop/2.7.1/libexec/share/hadoop/tools/lib/hadoop-aws-2.7.1.jar
# Add file
@thekensta
thekensta / hello_pyspark.py
Created October 27, 2015 16:04
Spark Shell Script
# submit with spark-submit hello_pyspark.py
# Spark 1.5.1
from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext
conf = SparkConf().setAppName("showMeTheSchema").setMaster("local")
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)