Skip to content

Instantly share code, notes, and snippets.

@vmonaco
Last active January 30, 2017 00:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save vmonaco/209647bc6438b1d045d738156179367f to your computer and use it in GitHub Desktop.
Save vmonaco/209647bc6438b1d045d738156179367f to your computer and use it in GitHub Desktop.
"""
Identify users by mouse click timings.
Train a POHMM for each user, one sample, and test using the remaining samples.
Using the clicks from task 3 (Star Bubbles) in the HCI dataset:
https://bitbucket.org/vmonaco/dataset-four-hci-tasks/
$ python hci_clicks_example.py data/task3.click.csv
Accuracy (88 samples): 0.375
Training clicks/sample:
count 19.000000
mean 80.526316
std 46.797873
min 4.000000
25% 43.000000
50% 84.000000
75% 107.000000
max 160.000000
"""
import sys
import numpy as np
import pandas as pd
from pohmm import Pohmm, PohmmClassifier
def features(x):
tau = x['timepress'].diff().fillna(x['timepress'].diff().median())
duration = x['timerelease'] - x['timepress']
tau[tau==0] = tau.median()
duration[duration==0] = duration.median()
return pd.DataFrame({'tau': tau, 'duration': duration, 'button': x['button']}, index=x.index)
def pohmm_factory():
hmm = Pohmm(n_hidden_states=2, init_spread=2, thresh=1e-6, max_iter=1000,
emissions=[('tau','lognormal'),('duration','lognormal')], smoothing='freq')
return hmm
if __name__ == '__main__':
if len(sys.argv) != 2:
print('Usage: $ python hci_clicks_example.py <clicks_file.csv>')
sys.exit(1)
fname = sys.argv[1]
df_raw = pd.read_csv(fname, index_col=[0,1])
df = df_raw.groupby(level=[0,1]).apply(features)
train = df.groupby(level=[0]).apply(lambda x: x[x.index.get_level_values(1) == x.index.get_level_values(1).unique()[0]]).reset_index(level=0, drop=True)
test = df.groupby(level=[0]).apply(lambda x: x[x.index.get_level_values(1).isin(x.index.get_level_values(1).unique()[1:])]).reset_index(level=0, drop=True)
cl = PohmmClassifier(pohmm_factory)
train_user_session, train_samples = zip(*train.groupby(level=[0,1]))
train_labels, _ = zip(*train_user_session)
cl.fit_df(train_labels, train_samples, pstate_col='button')
test_user_session, test_samples = zip(*test.groupby(level=[0,1]))
test_labels, _ = zip(*test_user_session)
predict_labels = []
for sample in test_samples:
predict_labels.append(cl.predict_df(sample, pstate_col='button')[0])
acc = (np.array(predict_labels)==np.array(test_labels)).sum()/len(test_labels)
print('Accuracy (%d samples):' % len(test_labels), acc)
print('Training clicks/sample:\n', train.groupby(level=[0,1]).size().describe())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment