Last active
December 9, 2015 21:45
-
-
Save jdurbin/843066247d090a176e47 to your computer and use it in GitHub Desktop.
K-means clustering using Weka and WekaMine wrappers for Groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env groovy | |
import durbin.weka.* | |
import static WM.* | |
dataFile = args[0] | |
k = args[1] as int | |
wm = new WM() | |
// Read a features x samples tab delimited file | |
data = wm.readNumericTab(dataFile) | |
// Many weka algorithms do not like string attributes, so remove string instance ID.. | |
noIDinstances = wm.removeID(data) | |
// Create clusterer with a factory method... | |
km = SimpleKMeans(I:100,N:k,O:true) | |
km.buildClusterer(noIDinstances) | |
// Print out the original instance IDs along with cluster assignments | |
// data[instanceID][featureName] is some of the syntax sugar that comes | |
// with WekaMine | |
noIDinstances.eachWithIndex{instance,i-> | |
println "${data[i]['ID']}\t"+km.clusterInstance(instance) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment