olinguyen/shogun_cv_error.py

## shogun_cv_error.py
"""
The following code crashes with this error message:

---> 15 result = cv.evaluate()

SystemError: [ERROR] In file /build/shogun-v9ad6W/shogun-6.0.0+1SNAPSHOT201704270057/src/shogun/labels/Labels.cpp
line 67: assertion m_current_values.vector && idx < get_num_labels() failed in
virtual float64_t shogun::CLabels::get_value(int32_t)
file /build/shogun-v9ad6W/shogun-6.0.0+1SNAPSHOT201704270057/src/shogun/labels/Labels.cpp line 67
"""

import numpy as np
import pandas as pd
from modshogun import *

X = np.random.randn(50, 8)
y = np.random.randint(0, 2, 50)
y[y == 0] = -1

# MulticlassLabels don't work with int64
# np.array(y).astype(np.float) works though
"""
print(y.dtype)
y_train = MulticlassLabels(np.array(y[:split]))
y_test = MulticlassLabels(np.array(y[split:]))
"""

features = RealFeatures(np.array(X).T)
labels = BinaryLabels(np.array(y))

k = 10
stratified_split = StratifiedCrossValidationSplitting(labels, k)

C = 0.1
clf = LibLinear(C, features, labels)
clf.train()

metric = ROCEvaluation()
# The line below works
#metric = AccuracyMeasure()

cv = CrossValidation(clf, features, labels, stratified_split, metric)

result = cv.evaluate()
result = CrossValidationResult.obtain_from_generic(result)

# this class contains a field "mean" which contain the mean performance metric
print("Testing", metric.get_name(), result.mean)
	"""
	The following code crashes with this error message:

	---> 15 result = cv.evaluate()

	SystemError: [ERROR] In file /build/shogun-v9ad6W/shogun-6.0.0+1SNAPSHOT201704270057/src/shogun/labels/Labels.cpp
	line 67: assertion m_current_values.vector && idx < get_num_labels() failed in
	virtual float64_t shogun::CLabels::get_value(int32_t)
	file /build/shogun-v9ad6W/shogun-6.0.0+1SNAPSHOT201704270057/src/shogun/labels/Labels.cpp line 67
	"""

	import numpy as np
	import pandas as pd
	from modshogun import *

	X = np.random.randn(50, 8)
	y = np.random.randint(0, 2, 50)
	y[y == 0] = -1

	# MulticlassLabels don't work with int64
	# np.array(y).astype(np.float) works though
	"""
	print(y.dtype)
	y_train = MulticlassLabels(np.array(y[:split]))
	y_test = MulticlassLabels(np.array(y[split:]))
	"""

	features = RealFeatures(np.array(X).T)
	labels = BinaryLabels(np.array(y))

	k = 10
	stratified_split = StratifiedCrossValidationSplitting(labels, k)

	C = 0.1
	clf = LibLinear(C, features, labels)
	clf.train()

	metric = ROCEvaluation()
	# The line below works
	#metric = AccuracyMeasure()

	cv = CrossValidation(clf, features, labels, stratified_split, metric)

	result = cv.evaluate()
	result = CrossValidationResult.obtain_from_generic(result)

	# this class contains a field "mean" which contain the mean performance metric
	print("Testing", metric.get_name(), result.mean)