View get_wordnet_synonyms.py
from itertools import chain
from nltk.corpus import wordnet
synonyms = wordnet.synsets('change')
lemmas = set(chain.from_iterable([word.lemma_names() for word in synonyms]))
lemmas
# Out[31]:
# {'alter',
# 'alteration',
# 'change',
View stack_sparse_matrix.py
import numpy as np
import scipy as sp
import pandas as pd
df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
df2 = pd.DataFrame({"C": [5, 6]})
X1 = sp.sparse.csr_matrix(df1.values)
X1_dense = X1.todense()
# Out[28]:
View list_operations.py
import numpy as pd
# Python
list(map(lambda x: x + 1, range(1, 6, 1)))
# Out[1]: [2, 3, 4, 5, 6]
# Numpy
list(np.array(range(1, 6, 1)) + 1)
# Out[2]: [2, 3, 4, 5, 6]
View tmux.sh
# show prefix
tmux show-options -g prefix
# new session
tmux
tmux work
# check sessions
tmux ls
View OrderedDict_sample.py
from collections import OrderedDict
d = {'A': 3,
'B': 2,
'C': 1}
OrderedDict(sorted(d.items(), key=lambda x: x[0])).values()
# Out[1]: odict_values([3, 2, 1])
OrderedDict(sorted(d.items(), key=lambda x: x[1])).values()
# Out[2]: odict_values([1, 2, 3])
View extract_onehot_vector.py
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
X_str = np.array([['a', 'dog', 'red'], ['b', 'cat', 'green']])
# transform to integer
X_int = LabelEncoder().fit_transform(X_str.ravel()).reshape(*X_str.shape)
# transform to binary
X_bin = OneHotEncoder().fit_transform(X_int).toarray()
print(X_bin)
# [[ 1. 0. 0. 1. 0. 1.]
View extract_tfidf_vector.py
text = ['This is a string', 'This is another string', 'TFIDF computation calculation', 'TfIDF is the product of TF and IDF']
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', norm = None)
X = vectorizer.fit_transform(text)
X_vovab = vectorizer.get_feature_names()
# Out[1]: ['calculation', 'computation', 'idf', 'product', 'string', 'tf', 'tfidf']
X_mat = X.todense()
# Out[2]:
View Modeling_GermanCredit.r
# パッケージをインストールする
pkgs <- c("dplyr", "rpart", "rpart.plot", "rattle", "mlr", "evtree")
install.packages(pkgs, quiet = TRUE)
# パッケージを読み込む
library("dplyr")
library("rattle")
library("mlr")
library("evtree")
View dplyr_se.r
library(dplyr)
library(lazyeval)
df <- data_frame(group = c(1, 2, 2, 3, 3, 3))
g <- "group"
df %>%
group_by_(g) %>%
summarise_(
View impute.py
import pandas as pd
df = pd.DataFrame({'A':['A1', 'A2', 'A3'], 'B':[None, 'B2', None]})
df
# Out[51]:
# A B
# 0 A1 None
# 1 A2 B2
# 2 A3 None