Skip to content

Instantly share code, notes, and snippets.

@disa-mhembere
Last active April 17, 2017 05:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save disa-mhembere/a4f2c8461390b735a011f16eb1dc8ce7 to your computer and use it in GitHub Desktop.
Save disa-mhembere/a4f2c8461390b735a011f16eb1dc8ce7 to your computer and use it in GitHub Desktop.
A serial code composed of 2 examples adapted from intel daal examples
from os import environ
from os.path import join as jp
import numpy as np
from time import time
from sys import argv
from daal.data_management import HomogenNumericTable, BlockDescriptor_Float64, readOnly
from daal.data_management import (
FileDataSource, DataSourceIface
)
from daal.algorithms.kmeans import (
Batch_Float64LloydDense, init, data, inputCentroids,
assignments, centroids, goalFunction, nIterations
)
import inspect, sys, os.path
def getArrayFromNT(table, nrows=0):
bd = BlockDescriptor_Float64()
if nrows == 0:
nrows = table.getNumberOfRows()
table.getBlockOfRows(0, nrows, readOnly, bd)
npa = bd.getArray()
table.releaseBlockOfRows(bd)
return npa
def printNT(table, nrows = 0, message=''):
npa = getArrayFromNT(table, nrows)
print(message, '\n', npa)
# K-Means algorithm parameters
assert len(argv) > 1, "<dataset> <k> <niters>"
datasetFileName = argv[1]
nClusters = int(argv[2])
niter = int(argv[3])
print("Running: {}, with k: {}, and niters: {}\n".format(datasetFileName, nClusters, niter))
if __name__ == "__main__":
start = time()
# Initialize FileDataSource to retrieve the input data from a .csv file
dataSource = FileDataSource(
datasetFileName,
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Retrieve the data from the input file
dataSource.loadDataBlock()
# Get initial clusters for the K-Means algorithm
initAlg = init.Batch_Float64RandomDense(nClusters)
initAlg.input.set(init.data, dataSource.getNumericTable())
res = initAlg.compute()
centroidsResult = res.get(init.centroids)
# Create an algorithm object for the K-Means algorithm
algorithm = Batch_Float64LloydDense(nClusters, niter)
algorithm.input.set(data, dataSource.getNumericTable())
algorithm.input.set(inputCentroids, centroidsResult)
res = algorithm.compute()
print("\nAlg computation time: {} sec\n".format(time()-start))
printNT(res.get(nIterations));
# Print the clusterization results
printNT(res.get(assignments), 10, "First 10 cluster assignments:")
printNT(res.get(centroids), 10, "First 10 dimensions of centroids:")
printNT(res.get(goalFunction), "Goal function value:")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment