Skip to content

Instantly share code, notes, and snippets.

@kumagi
Last active August 29, 2015 14:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kumagi/2834fbcc00800ae64952 to your computer and use it in GitHub Desktop.
Save kumagi/2834fbcc00800ae64952 to your computer and use it in GitHub Desktop.
FROM ubuntu
MAINTAINER Hiroki Kumazaki
RUN echo "deb http://download.jubat.us/apt binary/" >> /etc/apt/sources.list
RUN apt-get update
RUN apt-get install -y --allow-unauthenticated jubatus
EXPOSE 9199
ENV JUBATUS_HOME /opt/jubatus
ENV PATH /opt/jubatus/bin:${PATH}
ENV LD_LIBRARY_PATH /opt/jubatus/lib:${LD_LIBRARY_PATH}
ENV LDFLAGS -L$/opt/jubatus/lib ${LDFLAGS}
ENV CPLUS_INCLUDE_PATH /opt/jubatus/include:${CPLUS_INCLUDE_Path}
# set filename
ADD kmeans.json /tmp/config.json
CMD jubaclustering -f /tmp/config.json
#!/usr/bin/env python
# coding: utf-8
import sys
import json
import random
import jubatus
from jubatus.common import Datum
host = '127.0.0.1'
port = 9199
name = 'test'
client = jubatus.Clustering(host, port, name)
clusters = [[], []]
for x in xrange(10000):
if x & 1 == 0:
# cluster 1
d = Datum({
'x' :100 + random.randint(-10, 10),
'y' : 50 + random.randint(-20, 20)
})
clusters[0].append(d)
else:
# cluster 2
d = Datum({
'x' :-200 + random.randint(-10, 10),
'y' : 10 + random.randint(-10, 10)
})
clusters[1].append(d)
for cluster in clusters:
client.push(cluster)
print("{size} data pushed".format(size=len(cluster)))
centers = client.get_k_center()
print("total {size} centers clustered".format(size=len(centers)))
for center in centers:
print(center.num_values)
data = {'x': -130, 'y': -2}
nearest = client.get_nearest_center(Datum(data))
print("data nearest {d} is {n}".format(d=data, n=nearest.num_values))
{
"converter" : {
"num_rules" : [
{ "key" : "*", "type" : "num" }
]
},
"parameter" : {
"k" : 2,
"compressor_method" : "compressive_kmeans",
"bucket_size" : 1000,
"compressed_bucket_size" : 100,
"bicriteria_base_size" : 10,
"bucket_length" : 4,
"forgetting_factor" : 0,
"forgetting_threshold" : 0.5
},
"method" : "kmeans"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment