Keiichi Kuroyanagi Keiku

## read_column_containing_list.py
# Reference: python - Reading csv containing a list in Pandas - Stack Overflow https://stackoverflow.com/questions/20799593/reading-csv-containing-a-list-in-pandas

# Copy this text.
"""
HK,"[u'5328.1', u'5329.3', '2013-12-27 13:58:57.973614']"
HK,"[u'5328.1', u'5329.3', '2013-12-27 13:58:59.237387']"
HK,"[u'5328.1', u'5329.3', '2013-12-27 13:59:00.346325']"
"""

import ast

## concat_strings_in_all_combinations.py
import itertools
['%s%s' % (x, y) for x, y in itertools.product(['a', 'b'], ['1', '2'])]
# ['a1', 'a2', 'b1', 'b2']

## remove_s3_files_before_specified_last_update_time.sh
# Remove S3 files before the specified last update time
# Reference: amazon s3 - aws cli s3 bucket remove object with date condition - Stack Overflow https://stackoverflow.com/questions/51375531/aws-cli-s3-bucket-remove-object-with-date-condition
aws s3 ls --recursive s3://path/to/ | awk '$1 < "2020-06-25 12:00:00" {print $4}' | xargs -n1 -t -I 'KEY' aws s3 rm s3://path/to/'KEY'

## get_image_paths.py
import pathlib

# get image paths list in a directory
image_dir = pathlib.Path('images').resolve()
exts = ['.jpg', '.png']
image_paths = [path for path in image_dir.rglob('*') if path.suffix.lower() in exts]

# include parent directory
image_paths = [pathlib.Path(*path.parts[-2:]).as_posix() for path in image_dir.rglob('*') if path.suffix.lower() in exts]

## reset_seaborn_settings.py
# Reset the seaborn setting once set. It can be used in the middle of a notebook.
# Reference: python seaborn to reset back to the matplotlib - Stack Overflow https://stackoverflow.com/questions/26899310/python-seaborn-to-reset-back-to-the-matplotlib

# Either of the following may be used

# in matplotlib
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

# in seaborn

## read_copytext.py
import pandas as pd
from io import StringIO


def read_copytext(text):
    text1 = StringIO(text)
    df = pd.read_table(text1)
    df.columns = ["col1"]

    df["col1"] = df["col1"].str.replace("\s+", ",")

## split_KFold.py
import string
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, StratifiedKFold

X_train = np.random.random((10, 2))
y_train = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

column = "pred"
n_fold = 5

## get_wordnet_synonyms.py
from itertools import chain
from nltk.corpus import wordnet

synonyms = wordnet.synsets('change')
lemmas = set(chain.from_iterable([word.lemma_names() for word in synonyms]))
lemmas
# Out[31]:
# {'alter',
#  'alteration',
#  'change',

## stack_sparse_matrix.py
import numpy as np
import scipy as sp
import pandas as pd

df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
df2 = pd.DataFrame({"C": [5, 6]})

X1 = sp.sparse.csr_matrix(df1.values)
X1_dense = X1.todense()
# Out[28]:

## list_operations.py
import numpy as pd

# Python
list(map(lambda x: x + 1, range(1, 6, 1)))
# Out[1]: [2, 3, 4, 5, 6]

# Numpy
list(np.array(range(1, 6, 1)) + 1)
# Out[2]: [2, 3, 4, 5, 6]
	# Reference: python - Reading csv containing a list in Pandas - Stack Overflow https://stackoverflow.com/questions/20799593/reading-csv-containing-a-list-in-pandas

	# Copy this text.
	"""
	HK,"[u'5328.1', u'5329.3', '2013-12-27 13:58:57.973614']"
	HK,"[u'5328.1', u'5329.3', '2013-12-27 13:58:59.237387']"
	HK,"[u'5328.1', u'5329.3', '2013-12-27 13:59:00.346325']"
	"""

	import ast
	import itertools
	['%s%s' % (x, y) for x, y in itertools.product(['a', 'b'], ['1', '2'])]
	# ['a1', 'a2', 'b1', 'b2']
	# Remove S3 files before the specified last update time
	# Reference: amazon s3 - aws cli s3 bucket remove object with date condition - Stack Overflow https://stackoverflow.com/questions/51375531/aws-cli-s3-bucket-remove-object-with-date-condition
	aws s3 ls --recursive s3://path/to/ \| awk '$1 < "2020-06-25 12:00:00" {print $4}' \| xargs -n1 -t -I 'KEY' aws s3 rm s3://path/to/'KEY'
	import pathlib

	# get image paths list in a directory
	image_dir = pathlib.Path('images').resolve()
	exts = ['.jpg', '.png']
	image_paths = [path for path in image_dir.rglob('*') if path.suffix.lower() in exts]

	# include parent directory
	image_paths = [pathlib.Path(path.parts[-2:]).as_posix() for path in image_dir.rglob('') if path.suffix.lower() in exts]
	# Reset the seaborn setting once set. It can be used in the middle of a notebook.
	# Reference: python seaborn to reset back to the matplotlib - Stack Overflow https://stackoverflow.com/questions/26899310/python-seaborn-to-reset-back-to-the-matplotlib

	# Either of the following may be used

	# in matplotlib
	import matplotlib as mpl
	mpl.rcParams.update(mpl.rcParamsDefault)

	# in seaborn
	import pandas as pd
	from io import StringIO


	def read_copytext(text):
	text1 = StringIO(text)
	df = pd.read_table(text1)
	df.columns = ["col1"]

	df["col1"] = df["col1"].str.replace("\s+", ",")
	import string
	import numpy as np
	import pandas as pd
	from sklearn.model_selection import KFold, StratifiedKFold

	X_train = np.random.random((10, 2))
	y_train = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

	column = "pred"
	n_fold = 5
	from itertools import chain
	from nltk.corpus import wordnet

	synonyms = wordnet.synsets('change')
	lemmas = set(chain.from_iterable([word.lemma_names() for word in synonyms]))
	lemmas
	# Out[31]:
	# {'alter',
	# 'alteration',
	# 'change',
	import numpy as np
	import scipy as sp
	import pandas as pd

	df1 = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
	df2 = pd.DataFrame({"C": [5, 6]})

	X1 = sp.sparse.csr_matrix(df1.values)
	X1_dense = X1.todense()
	# Out[28]:
	import numpy as pd

	# Python
	list(map(lambda x: x + 1, range(1, 6, 1)))
	# Out[1]: [2, 3, 4, 5, 6]

	# Numpy
	list(np.array(range(1, 6, 1)) + 1)
	# Out[2]: [2, 3, 4, 5, 6]