fumiakiy/sk2.py

## sk2.py
import os
import re

# import sklearn
import numpy as np
# from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

LABELS = ['a', 'b', 'c', 'd', 'e', 'h', 'o', 'y']

# read all data and make an array of array of data
path = './normalized/'
data = {'a': [], 'b': [], 'c': [], 'd': [], 'e': [], 'h': [], 'o': [], 'y': []}
for f in os.listdir(path):
    datum = []
    m = re.match(r'(\w)_\d\.txt', f)
    if (m is None):
        continue
    char = m.group(1)
    with open(os.path.join(path + f), 'r') as lines:
        for line in lines:
            datum.append(int(line.rstrip()))
    data[char].append(datum)

# construct the ml stuff

target = []
samples = []
for char, ar in data.items():
    for datum in ar:
        target.append(LABELS.index(char))
        samples.append(datum)

print len(samples)
# clf = SVC()
clf = GaussianNB()
X = np.array(samples)
y = np.array(target)
X_train, X_test, y_train, y_test = train_test_split(samples, target, test_size=0.1, random_state=0)

clf.fit(X_train, y_train)
print clf.score(X_train, y_train)

print clf.predict(X_test)
print y_test
	import os
	import re

	# import sklearn
	import numpy as np
	# from sklearn.svm import SVC
	from sklearn.naive_bayes import GaussianNB
	from sklearn.model_selection import train_test_split

	LABELS = ['a', 'b', 'c', 'd', 'e', 'h', 'o', 'y']

	# read all data and make an array of array of data
	path = './normalized/'
	data = {'a': [], 'b': [], 'c': [], 'd': [], 'e': [], 'h': [], 'o': [], 'y': []}
	for f in os.listdir(path):
	datum = []
	m = re.match(r'(\w)_\d\.txt', f)
	if (m is None):
	continue
	char = m.group(1)
	with open(os.path.join(path + f), 'r') as lines:
	for line in lines:
	datum.append(int(line.rstrip()))
	data[char].append(datum)

	# construct the ml stuff

	target = []
	samples = []
	for char, ar in data.items():
	for datum in ar:
	target.append(LABELS.index(char))
	samples.append(datum)

	print len(samples)
	# clf = SVC()
	clf = GaussianNB()
	X = np.array(samples)
	y = np.array(target)
	X_train, X_test, y_train, y_test = train_test_split(samples, target, test_size=0.1, random_state=0)

	clf.fit(X_train, y_train)
	print clf.score(X_train, y_train)

	print clf.predict(X_test)
	print y_test