Last active
April 17, 2017 05:30
-
-
Save disa-mhembere/a4f2c8461390b735a011f16eb1dc8ce7 to your computer and use it in GitHub Desktop.
A serial code composed of 2 examples adapted from intel daal examples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import environ | |
from os.path import join as jp | |
import numpy as np | |
from time import time | |
from sys import argv | |
from daal.data_management import HomogenNumericTable, BlockDescriptor_Float64, readOnly | |
from daal.data_management import ( | |
FileDataSource, DataSourceIface | |
) | |
from daal.algorithms.kmeans import ( | |
Batch_Float64LloydDense, init, data, inputCentroids, | |
assignments, centroids, goalFunction, nIterations | |
) | |
import inspect, sys, os.path | |
def getArrayFromNT(table, nrows=0): | |
bd = BlockDescriptor_Float64() | |
if nrows == 0: | |
nrows = table.getNumberOfRows() | |
table.getBlockOfRows(0, nrows, readOnly, bd) | |
npa = bd.getArray() | |
table.releaseBlockOfRows(bd) | |
return npa | |
def printNT(table, nrows = 0, message=''): | |
npa = getArrayFromNT(table, nrows) | |
print(message, '\n', npa) | |
# K-Means algorithm parameters | |
assert len(argv) > 1, "<dataset> <k> <niters>" | |
datasetFileName = argv[1] | |
nClusters = int(argv[2]) | |
niter = int(argv[3]) | |
print("Running: {}, with k: {}, and niters: {}\n".format(datasetFileName, nClusters, niter)) | |
if __name__ == "__main__": | |
start = time() | |
# Initialize FileDataSource to retrieve the input data from a .csv file | |
dataSource = FileDataSource( | |
datasetFileName, | |
DataSourceIface.doAllocateNumericTable, | |
DataSourceIface.doDictionaryFromContext | |
) | |
# Retrieve the data from the input file | |
dataSource.loadDataBlock() | |
# Get initial clusters for the K-Means algorithm | |
initAlg = init.Batch_Float64RandomDense(nClusters) | |
initAlg.input.set(init.data, dataSource.getNumericTable()) | |
res = initAlg.compute() | |
centroidsResult = res.get(init.centroids) | |
# Create an algorithm object for the K-Means algorithm | |
algorithm = Batch_Float64LloydDense(nClusters, niter) | |
algorithm.input.set(data, dataSource.getNumericTable()) | |
algorithm.input.set(inputCentroids, centroidsResult) | |
res = algorithm.compute() | |
print("\nAlg computation time: {} sec\n".format(time()-start)) | |
printNT(res.get(nIterations)); | |
# Print the clusterization results | |
printNT(res.get(assignments), 10, "First 10 cluster assignments:") | |
printNT(res.get(centroids), 10, "First 10 dimensions of centroids:") | |
printNT(res.get(goalFunction), "Goal function value:") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment