First, a quick code example of K-Means in Scikit-learn
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
n_centers = 5
X, _ = make_blobs(n_samples=10000, n_centers=n_centers)
import socket | |
import sys | |
import time | |
import struct | |
host = 'localhost' | |
port = 8888 | |
buffersize = 1024 | |
N = 1000000 | |
server_address = (host, port) |
These commands are based on a askubuntu answer http://askubuntu.com/a/581497 | |
To install gcc-6 (gcc-6.1.1), I had to do more stuff as shown below. | |
USE THOSE COMMANDS AT YOUR OWN RISK. I SHALL NOT BE RESPONSIBLE FOR ANYTHING. | |
ABSOLUTELY NO WARRANTY. | |
If you are still reading let's carry on with the code. | |
sudo apt-get update && \ | |
sudo apt-get install build-essential software-properties-common -y && \ | |
sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y && \ |
import gc | |
import os | |
import time | |
import numpy as np | |
import pandas as pd | |
from pyarrow.compat import guid | |
import pyarrow as pa | |
import pyarrow.parquet as pq | |
import snappy |
#!/bin/bash | |
## This gist contains step by step instructions to install cuda v9.0 and cudnn 7.3 in ubuntu 18.04 | |
### steps #### | |
# verify the system has a cuda-capable gpu | |
# download and install the nvidia cuda toolkit and cudnn | |
# setup environmental variables | |
# verify the installation | |
### |
#!/usr/bin/env bash | |
set -eu | |
PWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | |
SRC_DIR=$(realpath "${PWD}/..") | |
CXX_SRC=${SRC_DIR}/cpp | |
# The following can be set | |
: "${CMAKE:=cmake}" |
#!/bin/bash | |
#SBATCH --job-name spark-cluster | |
#SBATCH --account=qh82 | |
#SBATCH --time=02:00:00 | |
# --- Master resources --- | |
#SBATCH --nodes=1 | |
#SBATCH --mem-per-cpu=1G | |
#SBATCH --cpus-per-task=1 | |
#SBATCH --ntasks-per-node=1 | |
# --- Worker resources --- |