Skip to content

Instantly share code, notes, and snippets.

@cympfh
Last active February 23, 2017 04:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cympfh/2c970e4656f52adf57f91483f9b1e75c to your computer and use it in GitHub Desktop.
Save cympfh/2c970e4656f52adf57f91483f9b1e75c to your computer and use it in GitHub Desktop.
import sys
import subprocess
import tempfile
import click
import numpy
from sklearn.manifold import TSNE
def shorten(label):
m = 20
if len(label) > m:
label = label[:m//2] + '..' + label[-m//2:]
return label
def gnuplot_template(dim):
if dim == 2:
plot_line = 'plot \"{}\" using 1:2:3 with labels notitle boxed center'
else:
plot_line = 'splot \"{}\" using 1:2:3:4 with labels notitle boxed center'
return """
set terminal qt
set grid
set datafile separator "\t"
set style textbox opaque
{}
pause mouse close
""".format(plot_line)
def load(d1, d2):
labels = []
X = []
d1 = '\t'
d2 = ' '
for line in sys.stdin:
fs = line.strip().split(d1)
label = shorten(fs[0])
rest = d1.join(fs[1:])
vect = [float(x) for x in rest.split(d2)]
labels.append(label)
X.append(vect)
X = numpy.array(X).astype(numpy.float32)
return labels, X
def tsne(X, dim=2):
numpy.set_printoptions(suppress=True)
X = TSNE(n_components=dim).fit_transform(X)
return X
@click.command()
@click.option('-d', default='\t', help='deliminator between the label and its vector')
@click.option('-c', default=' ', help='deliminator for the vector')
@click.option('--dim', default=2, type=click.IntRange(2, 3))
@click.option('--output', '-o', help='When no --output is specified, run gnuplot/Qt directly')
def main(d, c, dim, output):
labels, X = load(d, c)
X = tsne(X, dim)
if output is None:
dat = tempfile.NamedTemporaryFile(mode='w')
else:
dat = open(output, 'w')
for i in range(len(X)):
vec = '\t'.join(str(x) for x in X[i].tolist())
dat.write("{}\t\"{}\"\n".format(vec, labels[i]))
dat.flush()
if output is None:
gp = tempfile.NamedTemporaryFile(mode='w')
gp.write(gnuplot_template(dim).format(dat.name))
gp.flush()
result = subprocess.call(['gnuplot', gp.name, '-p'])
if result != 0:
click.secho('gnuplot returns not 0', fg='red', err=True)
gp.close()
dat.close()
if __name__ == '__main__':
main()
#!/bin/bash
echo "generating toy data..."
ruby -e '
100.times { |i|
vec = (1..10).map { rand }
puts "A#{i}\t#{vec.join " "}"
}
100.times { |i|
vec = (1..10).map { rand + rand + rand }
puts "B#{i}\t#{vec.join " "}"
}
' > test.dat
echo "visualizing..."
python test.py --dim 2 <test.dat
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment