Skip to content

Instantly share code, notes, and snippets.

@fumiakiy
Created June 14, 2019 19:44
Show Gist options
  • Save fumiakiy/b8018b441d51639d3690ad003bebce0a to your computer and use it in GitHub Desktop.
Save fumiakiy/b8018b441d51639d3690ad003bebce0a to your computer and use it in GitHub Desktop.
import os
import re
# import sklearn
import numpy as np
# from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
LABELS = ['a', 'b', 'c', 'd', 'e', 'h', 'o', 'y']
# read all data and make an array of array of data
path = './normalized/'
data = {'a': [], 'b': [], 'c': [], 'd': [], 'e': [], 'h': [], 'o': [], 'y': []}
for f in os.listdir(path):
datum = []
m = re.match(r'(\w)_\d\.txt', f)
if (m is None):
continue
char = m.group(1)
with open(os.path.join(path + f), 'r') as lines:
for line in lines:
datum.append(int(line.rstrip()))
data[char].append(datum)
# construct the ml stuff
target = []
samples = []
for char, ar in data.items():
for datum in ar:
target.append(LABELS.index(char))
samples.append(datum)
print len(samples)
# clf = SVC()
clf = GaussianNB()
X = np.array(samples)
y = np.array(target)
X_train, X_test, y_train, y_test = train_test_split(samples, target, test_size=0.1, random_state=0)
clf.fit(X_train, y_train)
print clf.score(X_train, y_train)
print clf.predict(X_test)
print y_test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment