""" | |
Identify users by mouse click timings. | |
Train a POHMM for each user, one sample, and test using the remaining samples. | |
Using the clicks from task 3 (Star Bubbles) in the HCI dataset: | |
https://bitbucket.org/vmonaco/dataset-four-hci-tasks/ | |
$ python hci_clicks_example.py data/task3.click.csv | |
Accuracy (88 samples): 0.375 | |
Training clicks/sample: | |
count 19.000000 | |
mean 80.526316 | |
std 46.797873 | |
min 4.000000 | |
25% 43.000000 | |
50% 84.000000 | |
75% 107.000000 | |
max 160.000000 | |
""" | |
import sys | |
import numpy as np | |
import pandas as pd | |
from pohmm import Pohmm, PohmmClassifier | |
def features(x): | |
tau = x['timepress'].diff().fillna(x['timepress'].diff().median()) | |
duration = x['timerelease'] - x['timepress'] | |
tau[tau==0] = tau.median() | |
duration[duration==0] = duration.median() | |
return pd.DataFrame({'tau': tau, 'duration': duration, 'button': x['button']}, index=x.index) | |
def pohmm_factory(): | |
hmm = Pohmm(n_hidden_states=2, init_spread=2, thresh=1e-6, max_iter=1000, | |
emissions=[('tau','lognormal'),('duration','lognormal')], smoothing='freq') | |
return hmm | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
print('Usage: $ python hci_clicks_example.py <clicks_file.csv>') | |
sys.exit(1) | |
fname = sys.argv[1] | |
df_raw = pd.read_csv(fname, index_col=[0,1]) | |
df = df_raw.groupby(level=[0,1]).apply(features) | |
train = df.groupby(level=[0]).apply(lambda x: x[x.index.get_level_values(1) == x.index.get_level_values(1).unique()[0]]).reset_index(level=0, drop=True) | |
test = df.groupby(level=[0]).apply(lambda x: x[x.index.get_level_values(1).isin(x.index.get_level_values(1).unique()[1:])]).reset_index(level=0, drop=True) | |
cl = PohmmClassifier(pohmm_factory) | |
train_user_session, train_samples = zip(*train.groupby(level=[0,1])) | |
train_labels, _ = zip(*train_user_session) | |
cl.fit_df(train_labels, train_samples, pstate_col='button') | |
test_user_session, test_samples = zip(*test.groupby(level=[0,1])) | |
test_labels, _ = zip(*test_user_session) | |
predict_labels = [] | |
for sample in test_samples: | |
predict_labels.append(cl.predict_df(sample, pstate_col='button')[0]) | |
acc = (np.array(predict_labels)==np.array(test_labels)).sum()/len(test_labels) | |
print('Accuracy (%d samples):' % len(test_labels), acc) | |
print('Training clicks/sample:\n', train.groupby(level=[0,1]).size().describe()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment