Skip to content

Instantly share code, notes, and snippets.

View Keiku's full-sized avatar
🐢
Slowly but surely.

Keiichi Kuroyanagi Keiku

🐢
Slowly but surely.
View GitHub Profile
@Keiku
Keiku / get_image_paths.py
Created June 18, 2020 03:17
Get image paths.
import pathlib
# get image paths list in a directory
image_dir = pathlib.Path('images').resolve()
exts = ['.jpg', '.png']
image_paths = [path for path in image_dir.rglob('*') if path.suffix.lower() in exts]
# include parent directory
image_paths = [pathlib.Path(*path.parts[-2:]).as_posix() for path in image_dir.rglob('*') if path.suffix.lower() in exts]
@Keiku
Keiku / reset_seaborn_settings.py
Created June 9, 2020 03:50
Reset the seaborn setting once set.
# Reset the seaborn setting once set. It can be used in the middle of a notebook.
# Reference: python seaborn to reset back to the matplotlib - Stack Overflow https://stackoverflow.com/questions/26899310/python-seaborn-to-reset-back-to-the-matplotlib
# Either of the following may be used
# in matplotlib
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
# in seaborn
@Keiku
Keiku / read_copytext.py
Created January 19, 2018 10:25
Read copy text to pandas DataFrame.
import pandas as pd
from io import StringIO
def read_copytext(text):
text1 = StringIO(text)
df = pd.read_table(text1)
df.columns = ["col1"]
df["col1"] = df["col1"].str.replace("\s+", ",")
@Keiku
Keiku / split_KFold.py
Last active May 2, 2017 07:10
Split K-fold validation dataset.
import string
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, StratifiedKFold
X_train = np.random.random((10, 2))
y_train = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
column = "pred"
n_fold = 5
@Keiku
Keiku / get_wordnet_synonyms.py
Created April 28, 2017 07:04
Extract the synonyms by using wordnet.
from itertools import chain
from nltk.corpus import wordnet
synonyms = wordnet.synsets('change')
lemmas = set(chain.from_iterable([word.lemma_names() for word in synonyms]))
lemmas
# Out[31]:
# {'alter',
# 'alteration',
# 'change',
@Keiku
Keiku / stack_sparse_matrix.py
Created April 28, 2017 02:18
Stack the sparse matrices.
import numpy as np
import scipy as sp
import pandas as pd
df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
df2 = pd.DataFrame({"C": [5, 6]})
X1 = sp.sparse.csr_matrix(df1.values)
X1_dense = X1.todense()
# Out[28]:
@Keiku
Keiku / list_operations.py
Created April 18, 2017 07:43
list operations.
import numpy as pd
# Python
list(map(lambda x: x + 1, range(1, 6, 1)))
# Out[1]: [2, 3, 4, 5, 6]
# Numpy
list(np.array(range(1, 6, 1)) + 1)
# Out[2]: [2, 3, 4, 5, 6]
@Keiku
Keiku / OrderedDict_sample.py
Last active April 13, 2017 03:35
Get keys/values from sorted OrderedDict.
from collections import OrderedDict
d = {'A': 3,
'B': 2,
'C': 1}
OrderedDict(sorted(d.items(), key=lambda x: x[0])).values()
# Out[1]: odict_values([3, 2, 1])
OrderedDict(sorted(d.items(), key=lambda x: x[1])).values()
# Out[2]: odict_values([1, 2, 3])
@Keiku
Keiku / extract_onehot_vector.py
Created April 12, 2017 06:30
Extract the one-hot encoding vector.
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
X_str = np.array([['a', 'dog', 'red'], ['b', 'cat', 'green']])
# transform to integer
X_int = LabelEncoder().fit_transform(X_str.ravel()).reshape(*X_str.shape)
# transform to binary
X_bin = OneHotEncoder().fit_transform(X_int).toarray()
print(X_bin)
# [[ 1. 0. 0. 1. 0. 1.]
@Keiku
Keiku / extract_tfidf_vector.py
Last active April 11, 2017 07:40
Extract the tf-idf vector.
text = ['This is a string', 'This is another string', 'TFIDF computation calculation', 'TfIDF is the product of TF and IDF']
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', norm = None)
X = vectorizer.fit_transform(text)
X_vovab = vectorizer.get_feature_names()
# Out[1]: ['calculation', 'computation', 'idf', 'product', 'string', 'tf', 'tfidf']
X_mat = X.todense()
# Out[2]: