disa-mhembere/kmeans_intel_daal.py

## kmeans_intel_daal.py
from os import environ
from os.path import join as jp
import numpy as np
from time import time
from sys import argv

from daal.data_management import HomogenNumericTable, BlockDescriptor_Float64, readOnly

from daal.data_management import (
    FileDataSource, DataSourceIface
)

from daal.algorithms.kmeans import (
    Batch_Float64LloydDense, init, data, inputCentroids,
        assignments, centroids, goalFunction, nIterations
)

import inspect, sys, os.path

def getArrayFromNT(table, nrows=0):
    bd = BlockDescriptor_Float64()
    if nrows == 0:
        nrows = table.getNumberOfRows()
    table.getBlockOfRows(0, nrows, readOnly, bd)
    npa = bd.getArray()
    table.releaseBlockOfRows(bd)
    return npa

def printNT(table, nrows = 0, message=''):
    npa = getArrayFromNT(table, nrows)
    print(message, '\n', npa)

# K-Means algorithm parameters

assert len(argv) > 1, "<dataset> <k> <niters>"
datasetFileName = argv[1]
nClusters = int(argv[2])
niter = int(argv[3])

print("Running: {}, with k: {}, and niters: {}\n".format(datasetFileName, nClusters, niter))

if __name__ == "__main__":

    start = time()
    # Initialize FileDataSource to retrieve the input data from a .csv file
    dataSource = FileDataSource(
        datasetFileName,
        DataSourceIface.doAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )
    # Retrieve the data from the input file
    dataSource.loadDataBlock()


    # Get initial clusters for the K-Means algorithm
    initAlg = init.Batch_Float64RandomDense(nClusters)

    initAlg.input.set(init.data, dataSource.getNumericTable())
    res = initAlg.compute()
    centroidsResult = res.get(init.centroids)

    # Create an algorithm object for the K-Means algorithm
    algorithm = Batch_Float64LloydDense(nClusters, niter)

    algorithm.input.set(data, dataSource.getNumericTable())
    algorithm.input.set(inputCentroids, centroidsResult)

    res = algorithm.compute()

    print("\nAlg computation time: {} sec\n".format(time()-start))
    printNT(res.get(nIterations));

    # Print the clusterization results
    printNT(res.get(assignments), 10, "First 10 cluster assignments:")
    printNT(res.get(centroids), 10, "First 10 dimensions of centroids:")
    printNT(res.get(goalFunction), "Goal function value:")
	from os import environ
	from os.path import join as jp
	import numpy as np
	from time import time
	from sys import argv

	from daal.data_management import HomogenNumericTable, BlockDescriptor_Float64, readOnly

	from daal.data_management import (
	FileDataSource, DataSourceIface
	)

	from daal.algorithms.kmeans import (
	Batch_Float64LloydDense, init, data, inputCentroids,
	assignments, centroids, goalFunction, nIterations
	)

	import inspect, sys, os.path

	def getArrayFromNT(table, nrows=0):
	bd = BlockDescriptor_Float64()
	if nrows == 0:
	nrows = table.getNumberOfRows()
	table.getBlockOfRows(0, nrows, readOnly, bd)
	npa = bd.getArray()
	table.releaseBlockOfRows(bd)
	return npa

	def printNT(table, nrows = 0, message=''):
	npa = getArrayFromNT(table, nrows)
	print(message, '\n', npa)

	# K-Means algorithm parameters

	assert len(argv) > 1, "<dataset> <k> <niters>"
	datasetFileName = argv[1]
	nClusters = int(argv[2])
	niter = int(argv[3])

	print("Running: {}, with k: {}, and niters: {}\n".format(datasetFileName, nClusters, niter))

	if __name__ == "__main__":

	start = time()
	# Initialize FileDataSource to retrieve the input data from a .csv file
	dataSource = FileDataSource(
	datasetFileName,
	DataSourceIface.doAllocateNumericTable,
	DataSourceIface.doDictionaryFromContext
	)
	# Retrieve the data from the input file
	dataSource.loadDataBlock()


	# Get initial clusters for the K-Means algorithm
	initAlg = init.Batch_Float64RandomDense(nClusters)

	initAlg.input.set(init.data, dataSource.getNumericTable())
	res = initAlg.compute()
	centroidsResult = res.get(init.centroids)

	# Create an algorithm object for the K-Means algorithm
	algorithm = Batch_Float64LloydDense(nClusters, niter)

	algorithm.input.set(data, dataSource.getNumericTable())
	algorithm.input.set(inputCentroids, centroidsResult)

	res = algorithm.compute()

	print("\nAlg computation time: {} sec\n".format(time()-start))
	printNT(res.get(nIterations));

	# Print the clusterization results
	printNT(res.get(assignments), 10, "First 10 cluster assignments:")
	printNT(res.get(centroids), 10, "First 10 dimensions of centroids:")
	printNT(res.get(goalFunction), "Goal function value:")