vmonaco/hci_clicks_example.py

## hci_clicks_example.py
"""
Identify users by mouse click timings.
Train a POHMM for each user, one sample, and test using the remaining samples.

Using the clicks from task 3 (Star Bubbles) in the HCI dataset:
https://bitbucket.org/vmonaco/dataset-four-hci-tasks/

$ python hci_clicks_example.py data/task3.click.csv

Accuracy (88 samples): 0.375
Training clicks/sample:
 count     19.000000
mean      80.526316
std       46.797873
min        4.000000
25%       43.000000
50%       84.000000
75%      107.000000
max      160.000000
"""
import sys
import numpy as np
import pandas as pd
from pohmm import Pohmm, PohmmClassifier


def features(x):
    tau = x['timepress'].diff().fillna(x['timepress'].diff().median())
    duration = x['timerelease'] - x['timepress']
    tau[tau==0] = tau.median()
    duration[duration==0] = duration.median()
    return pd.DataFrame({'tau': tau, 'duration': duration, 'button': x['button']}, index=x.index)

def pohmm_factory():
    hmm = Pohmm(n_hidden_states=2, init_spread=2, thresh=1e-6, max_iter=1000,
                emissions=[('tau','lognormal'),('duration','lognormal')], smoothing='freq')
    return hmm

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print('Usage: $ python hci_clicks_example.py <clicks_file.csv>')
        sys.exit(1)

    fname = sys.argv[1]
    df_raw = pd.read_csv(fname, index_col=[0,1])

    df = df_raw.groupby(level=[0,1]).apply(features)

    train = df.groupby(level=[0]).apply(lambda x: x[x.index.get_level_values(1) == x.index.get_level_values(1).unique()[0]]).reset_index(level=0, drop=True)
    test = df.groupby(level=[0]).apply(lambda x: x[x.index.get_level_values(1).isin(x.index.get_level_values(1).unique()[1:])]).reset_index(level=0, drop=True)

    cl = PohmmClassifier(pohmm_factory)

    train_user_session, train_samples = zip(*train.groupby(level=[0,1]))
    train_labels, _ = zip(*train_user_session)

    cl.fit_df(train_labels, train_samples, pstate_col='button')

    test_user_session, test_samples = zip(*test.groupby(level=[0,1]))
    test_labels, _ = zip(*test_user_session)

    predict_labels = []
    for sample in test_samples:
        predict_labels.append(cl.predict_df(sample, pstate_col='button')[0])

    acc = (np.array(predict_labels)==np.array(test_labels)).sum()/len(test_labels)
    print('Accuracy (%d samples):' % len(test_labels), acc)
    print('Training clicks/sample:\n', train.groupby(level=[0,1]).size().describe())
	"""
	Identify users by mouse click timings.
	Train a POHMM for each user, one sample, and test using the remaining samples.

	Using the clicks from task 3 (Star Bubbles) in the HCI dataset:
	https://bitbucket.org/vmonaco/dataset-four-hci-tasks/

	$ python hci_clicks_example.py data/task3.click.csv

	Accuracy (88 samples): 0.375
	Training clicks/sample:
	count 19.000000
	mean 80.526316
	std 46.797873
	min 4.000000
	25% 43.000000
	50% 84.000000
	75% 107.000000
	max 160.000000
	"""
	import sys
	import numpy as np
	import pandas as pd
	from pohmm import Pohmm, PohmmClassifier


	def features(x):
	tau = x['timepress'].diff().fillna(x['timepress'].diff().median())
	duration = x['timerelease'] - x['timepress']
	tau[tau==0] = tau.median()
	duration[duration==0] = duration.median()
	return pd.DataFrame({'tau': tau, 'duration': duration, 'button': x['button']}, index=x.index)

	def pohmm_factory():
	hmm = Pohmm(n_hidden_states=2, init_spread=2, thresh=1e-6, max_iter=1000,
	emissions=[('tau','lognormal'),('duration','lognormal')], smoothing='freq')
	return hmm

	if __name__ == '__main__':
	if len(sys.argv) != 2:
	print('Usage: $ python hci_clicks_example.py <clicks_file.csv>')
	sys.exit(1)

	fname = sys.argv[1]
	df_raw = pd.read_csv(fname, index_col=[0,1])

	df = df_raw.groupby(level=[0,1]).apply(features)

	train = df.groupby(level=[0]).apply(lambda x: x[x.index.get_level_values(1) == x.index.get_level_values(1).unique()[0]]).reset_index(level=0, drop=True)
	test = df.groupby(level=[0]).apply(lambda x: x[x.index.get_level_values(1).isin(x.index.get_level_values(1).unique()[1:])]).reset_index(level=0, drop=True)

	cl = PohmmClassifier(pohmm_factory)

	train_user_session, train_samples = zip(*train.groupby(level=[0,1]))
	train_labels, _ = zip(*train_user_session)

	cl.fit_df(train_labels, train_samples, pstate_col='button')

	test_user_session, test_samples = zip(*test.groupby(level=[0,1]))
	test_labels, _ = zip(*test_user_session)

	predict_labels = []
	for sample in test_samples:
	predict_labels.append(cl.predict_df(sample, pstate_col='button')[0])

	acc = (np.array(predict_labels)==np.array(test_labels)).sum()/len(test_labels)
	print('Accuracy (%d samples):' % len(test_labels), acc)
	print('Training clicks/sample:\n', train.groupby(level=[0,1]).size().describe())