NeptuneProjects/rapids_cuml_demo.py

## rapids_cuml_demo.py
"""
Written by Billy Jenkins, 27 Oct 2020, Scripps Institution of Oceanography
wjenkins  (at)  ucsd.edu

Demonstration of interchangeability between sk-learn & CUML libraries.
For more information, visit https://docs.rapids.ai/api/cuml/stable/

Unfortunately, CUML only supports 'Linux-like' platforms at this time, so for
Apple or Windows machines, you are stuck with the scikit-learn library and your
CPU.  If you have a Linux machine with a CUDA device available, the CUML
library is nearly interchangeable with scikit-learn syntax, so you can keep the
body of your code the same and just import the library according to the system
on which you are running your code.  The larger the dataset, the greater the
disparity in execution time.
"""
import sys
import time

if sys.platform == 'linux': # If on Linux and have CUDA device, use GPU
    print('CUDA available, using GPU.')
    from cuml import KMeans, TSNE
else:
    print('CUDA unavailable, using CPU.')
    from sklearn.cluster import KMeans
    from sklearn.manifold import TSNE
import numpy as np
from sklearn.datasets import make_blobs

# Start the stopwatch!
start_time = time.time()

# Create some random data with 3 blobs:
centers = [(-10, -10), (0, 0), (10, 10)]
cluster_std = [0.1, 1, 10]
n_clusters = 3
x, labels_true = make_blobs(
    n_samples=10000, # <---- Make this bigger or smaller.
    cluster_std = cluster_std,
    centers=centers,
    n_features=n_clusters,
    random_state=2009
)

# Run k-means clustering:
labels = KMeans(n_clusters=n_clusters,random_state=2009).fit_predict(x)
# Run t-SNE:
x_embedded = TSNE(n_components=2,random_state=2009).fit_transform(x)

# Stop the stopwatch!
end_time = time.time()
print(f"Completed in {end_time - start_time:.2f} s.")
	"""
	Written by Billy Jenkins, 27 Oct 2020, Scripps Institution of Oceanography
	wjenkins (at) ucsd.edu

	Demonstration of interchangeability between sk-learn & CUML libraries.
	For more information, visit https://docs.rapids.ai/api/cuml/stable/

	Unfortunately, CUML only supports 'Linux-like' platforms at this time, so for
	Apple or Windows machines, you are stuck with the scikit-learn library and your
	CPU. If you have a Linux machine with a CUDA device available, the CUML
	library is nearly interchangeable with scikit-learn syntax, so you can keep the
	body of your code the same and just import the library according to the system
	on which you are running your code. The larger the dataset, the greater the
	disparity in execution time.
	"""
	import sys
	import time

	if sys.platform == 'linux': # If on Linux and have CUDA device, use GPU
	print('CUDA available, using GPU.')
	from cuml import KMeans, TSNE
	else:
	print('CUDA unavailable, using CPU.')
	from sklearn.cluster import KMeans
	from sklearn.manifold import TSNE
	import numpy as np
	from sklearn.datasets import make_blobs

	# Start the stopwatch!
	start_time = time.time()

	# Create some random data with 3 blobs:
	centers = [(-10, -10), (0, 0), (10, 10)]
	cluster_std = [0.1, 1, 10]
	n_clusters = 3
	x, labels_true = make_blobs(
	n_samples=10000, # <---- Make this bigger or smaller.
	cluster_std = cluster_std,
	centers=centers,
	n_features=n_clusters,
	random_state=2009
	)

	# Run k-means clustering:
	labels = KMeans(n_clusters=n_clusters,random_state=2009).fit_predict(x)
	# Run t-SNE:
	x_embedded = TSNE(n_components=2,random_state=2009).fit_transform(x)

	# Stop the stopwatch!
	end_time = time.time()
	print(f"Completed in {end_time - start_time:.2f} s.")