This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(forecast) | |
# mdeaths: Monthly Deaths from Lung Diseases in the UK | |
fit <- auto.arima(mdeaths) | |
#customize your confidence intervals | |
forecast(fit, level=c(80, 95, 99), h=3) | |
# Point Forecast Lo 80 Hi 80 Lo 95 Hi 95 Lo 99 Hi 99 | |
#Jan 1980 1822.863 1564.192 2081.534 1427.259 2218.467 1302.952 2342.774 | |
#Feb 1980 1923.190 1635.530 2210.851 1483.251 2363.130 1345.012 2501.368 | |
#Mar 1980 1789.153 1495.048 2083.258 1339.359 2238.947 1198.023 2380.283 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(qcc) | |
# series of value w/ mean of 10 with a little random noise added in | |
x <- rep(10, 100) + rnorm(100) | |
# a test series w/ a mean of 11 | |
new.x <- rep(11, 15) + rnorm(15) | |
# qcc will flag the new points | |
qcc(x, newdata=new.x, type="xbar.one") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(reshape2) | |
# generate a unique id for each row; this let's us go back to wide format later | |
iris$id <- 1:nrow(iris) | |
iris.lng <- melt(iris, id=c("id", "Species")) | |
head(iris.lng) | |
# id Species variable value | |
#1 1 setosa Sepal.Length 5.1 | |
#2 2 setosa Sepal.Length 4.9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(randomForest) | |
# download Titanic Survivors data | |
data <- read.table("http://math.ucdenver.edu/RTutorial/titanic.txt", h=T, sep="\t") | |
# make survived into a yes/no | |
data$Survived <- as.factor(ifelse(data$Survived==1, "yes", "no")) | |
# split into a training and test set | |
idx <- runif(nrow(data)) <= .75 | |
data.train <- data[idx,] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RPostgreSQL) | |
drv <- dbDriver("PostgreSQL") | |
db <- dbConnect(drv, dbname="ncaa", | |
user="YOUR USER NAME", password="YOUR PASSWORD") | |
q <- "SELECT | |
* | |
FROM | |
game_scores;" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import pylab as pl | |
baseball = pd.read_csv("http://bit.ly/144sh7t") | |
# group by year and get a summary of each numeric column | |
baseball.groupby(["year"]).describe() | |
# for each year, get the mean of each column | |
baseball.groupby(["year"]).aggregate(np.mean) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pandas.core.reshape import melt, pivot | |
df = pd.DataFrame(iris.data, columns=iris.feature_names) | |
df['id'] = np.arange(0, len(df)) | |
df['species'] = pd.Factor(iris.target, levels=iris.target_names) | |
df_lng = melt(df, id_vars=['id', 'species']) | |
df_lng.head() | |
df_lng.tail() | |
# back to wide |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from datetime import datetime | |
# generate some fake tick data with 1 million observations | |
n = 1000000 | |
df = pd.DataFrame({ | |
"timestamp": [datetime.now() for t in range(n)], | |
"value": np.random.uniform(-1, 1, n) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.datasets import load_iris | |
iris = load_iris() | |
df = pd.DataFrame(iris.data, columns=iris.feature_names) | |
# apply | |
cols = df.columns[0:4] | |
# divide each numeric column by 2 | |
df[cols].apply(lambda x: x / 2).head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dateutil.parser import parse | |
import pandas as pd | |
# monthly slaughter records since 1921 | |
df = pd.read_csv("http://bit.ly/119792b") | |
# parse the data (we could also use pd.to_datetime) | |
df.date = df.date.apply(parse) | |
# sort the data frame by date | |
df = df.sort(['date']) | |
# create an index |