Keiichi Kuroyanagi Keiku

## OrderedDict_sample.py
from collections import OrderedDict

d = {'A': 3,
     'B': 2,
     'C': 1}

OrderedDict(sorted(d.items(), key=lambda x: x[0])).values()
# Out[1]: odict_values([3, 2, 1])
OrderedDict(sorted(d.items(), key=lambda x: x[1])).values()
# Out[2]: odict_values([1, 2, 3])

## extract_onehot_vector.py
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

X_str = np.array([['a', 'dog', 'red'], ['b', 'cat', 'green']])
# transform to integer
X_int = LabelEncoder().fit_transform(X_str.ravel()).reshape(*X_str.shape)
# transform to binary
X_bin = OneHotEncoder().fit_transform(X_int).toarray()

print(X_bin)
# [[ 1.  0.  0.  1.  0.  1.]

## extract_tfidf_vector.py
text = ['This is a string', 'This is another string', 'TFIDF computation calculation', 'TfIDF is the product of TF and IDF']

from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', norm = None)

X = vectorizer.fit_transform(text)
X_vovab = vectorizer.get_feature_names()
# Out[1]: ['calculation', 'computation', 'idf', 'product', 'string', 'tf', 'tfidf']
X_mat = X.todense()
# Out[2]:

## Modeling_GermanCredit.r
# パッケージをインストールする
pkgs <- c("dplyr", "rpart", "rpart.plot", "rattle", "mlr", "evtree")
install.packages(pkgs, quiet = TRUE)

# パッケージを読み込む
library("dplyr")
library("rattle")
library("mlr")
library("evtree")

## dplyr_se.r
library(dplyr)
library(lazyeval)

df <- data_frame(group = c(1, 2, 2, 3, 3, 3))

g <- "group"

df %>%
  group_by_(g) %>%
  summarise_(

## impute.py
import pandas as pd

df = pd.DataFrame({'A':['A1', 'A2', 'A3'], 'B':[None, 'B2', None]})
df
# Out[51]:
#     A     B
# 0  A1  None
# 1  A2    B2
# 2  A3  None

## misc.r
options(scipen = 100, dplyr.width = Inf, dplyr.print_max = Inf)

'%nin%' <- Negate('%in%')
keep_vecs <- function(x, y) x[x %in% y]
drop_vecs <- function(x, y) x[!x %in% y]
keep_vars <- function(.data, x) dplyr::select_(.data, .dots = x)
drop_vars <- function(.data, x) dplyr::select(.data, -one_of(x))

intersect_all <- function(...) Reduce(intersect, list(...))
union_all <- function(...) Reduce(union, list(...))

## dplyr_examples.r
library(dplyr)

iris_df <- as_data_frame(iris)
iris_df %>% rename_(.dots = setNames(names(.), toupper(names(.)))) %>% head(2)
# A tibble: 2 × 5
# SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
# <dbl>       <dbl>        <dbl>       <dbl>  <fctr>
# 1          5.1         3.5          1.4         0.2  setosa
# 2          4.9         3.0          1.4         0.2  setosa

## tidyr_reshape.r
library("dplyr")
library("tidyr")
library("data.table")

smp <- data_frame(
  ID = rep(1:3, 2),
  BMI = rep(c(21, 26), 3),
  sbp = rep(c(150, 120), 3),
  nendo = rep(2008:2009, 3)
)

## extract_subset.r
a <- c(1, 3, 5, 7, 9)
b <- c(3, 6, 8, 9, 10)
c <- c(2, 3, 4, 5, 7, 9)

intersect_all <- function(...) Reduce(intersect, list(...))
union_all <- function(...) Reduce(union, list(...))

intersect_all(a, b, c)
# [1] 3 9
union_all(a, b, c)
	from collections import OrderedDict

	d = {'A': 3,
	'B': 2,
	'C': 1}

	OrderedDict(sorted(d.items(), key=lambda x: x[0])).values()
	# Out[1]: odict_values([3, 2, 1])
	OrderedDict(sorted(d.items(), key=lambda x: x[1])).values()
	# Out[2]: odict_values([1, 2, 3])
	from sklearn.preprocessing import LabelEncoder, OneHotEncoder

	X_str = np.array([['a', 'dog', 'red'], ['b', 'cat', 'green']])
	# transform to integer
	X_int = LabelEncoder().fit_transform(X_str.ravel()).reshape(*X_str.shape)
	# transform to binary
	X_bin = OneHotEncoder().fit_transform(X_int).toarray()

	print(X_bin)
	# [[ 1. 0. 0. 1. 0. 1.]
	text = ['This is a string', 'This is another string', 'TFIDF computation calculation', 'TfIDF is the product of TF and IDF']

	from sklearn.feature_extraction.text import TfidfVectorizer
	vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', norm = None)

	X = vectorizer.fit_transform(text)
	X_vovab = vectorizer.get_feature_names()
	# Out[1]: ['calculation', 'computation', 'idf', 'product', 'string', 'tf', 'tfidf']
	X_mat = X.todense()
	# Out[2]:
	# パッケージをインストールする
	pkgs <- c("dplyr", "rpart", "rpart.plot", "rattle", "mlr", "evtree")
	install.packages(pkgs, quiet = TRUE)

	# パッケージを読み込む
	library("dplyr")
	library("rattle")
	library("mlr")
	library("evtree")
	library(dplyr)
	library(lazyeval)

	df <- data_frame(group = c(1, 2, 2, 3, 3, 3))

	g <- "group"

	df %>%
	group_by_(g) %>%
	summarise_(
	import pandas as pd

	df = pd.DataFrame({'A':['A1', 'A2', 'A3'], 'B':[None, 'B2', None]})
	df
	# Out[51]:
	# A B
	# 0 A1 None
	# 1 A2 B2
	# 2 A3 None
	options(scipen = 100, dplyr.width = Inf, dplyr.print_max = Inf)

	'%nin%' <- Negate('%in%')
	keep_vecs <- function(x, y) x[x %in% y]
	drop_vecs <- function(x, y) x[!x %in% y]
	keep_vars <- function(.data, x) dplyr::select_(.data, .dots = x)
	drop_vars <- function(.data, x) dplyr::select(.data, -one_of(x))

	intersect_all <- function(...) Reduce(intersect, list(...))
	union_all <- function(...) Reduce(union, list(...))
	library(dplyr)

	iris_df <- as_data_frame(iris)
	iris_df %>% rename_(.dots = setNames(names(.), toupper(names(.)))) %>% head(2)
	# A tibble: 2 × 5
	# SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH SPECIES
	# <dbl> <dbl> <dbl> <dbl> <fctr>
	# 1 5.1 3.5 1.4 0.2 setosa
	# 2 4.9 3.0 1.4 0.2 setosa
	library("dplyr")
	library("tidyr")
	library("data.table")

	smp <- data_frame(
	ID = rep(1:3, 2),
	BMI = rep(c(21, 26), 3),
	sbp = rep(c(150, 120), 3),
	nendo = rep(2008:2009, 3)
	)
	a <- c(1, 3, 5, 7, 9)
	b <- c(3, 6, 8, 9, 10)
	c <- c(2, 3, 4, 5, 7, 9)

	intersect_all <- function(...) Reduce(intersect, list(...))
	union_all <- function(...) Reduce(union, list(...))

	intersect_all(a, b, c)
	# [1] 3 9
	union_all(a, b, c)