Skip to content

Instantly share code, notes, and snippets.

@bdesham
Created September 3, 2013 13:55
Show Gist options
  • Save bdesham/6424289 to your computer and use it in GitHub Desktop.
Save bdesham/6424289 to your computer and use it in GitHub Desktop.
Visualizing song repetition with Python. For more information, see <http://www.bdesham.info/2013/09/visualizing-repetition>.
#!/usr/bin/env python
# repetition.py
#
# Usage: python repetition.py input.txt output.svg
#
# Given a text file containing song lyrics, generates an SVG image showing the
# relationships between the lines of text. For more information, read the
# article at <http://www.bdesham.info/2013/09/visualizing-repetition>.
#
# Copyright (c) 2013, Benjamin Esham. This software is released under the
# following version of the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following condition: the above copyright
# notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# The software is provided "as is", without warranty of any kind, express or
# implied, including but not limited to the warranties of merchantability,
# fitness for a particular purpose and noninfringement. In no event shall the
# authors or copyright holders be liable for any claim, damages or other
# liability, whether in an action of contract, tort or otherwise, arising
# from, out of or in connection with the software or the use or other dealings
# in the software.
import sys
from re import findall, UNICODE
from math import sqrt
from svg_grid import svg_for_matrix
def normalize_line(line):
return findall(r"(?:\w|')+", line.rstrip().lower(), UNICODE)
def lines_for_file(text):
normalized_lines = [normalize_line(line) for line in text]
return [line for line in normalized_lines if line]
def normalized_lines_in_file(filename):
with open(filename, 'r') as f:
return [normalize_line(line) for line in f if len(normalize_line(line)) > 0]
def corpus_from_lines(lines):
result = []
for line in lines:
for word in line:
if word not in result:
result.append(word)
return result
def vector_for_line(line, corpus):
return [len([word for word in line if word == test_word]) for test_word in corpus]
def vectors_from_lines(lines):
corpus = corpus_from_lines(lines)
return [vector_for_line(line, corpus) for line in lines]
def vector_length(vector):
return sqrt(sum([x*x for x in vector]))
def dot_product(vector1, vector2):
return sum([vector1[i] * vector2[i] for i in range(len(vector1))])
def cosine_between(vector1, vector2):
return dot_product(vector1, vector2) / (vector_length(vector1) * vector_length(vector2))
def generate_matrix(lines):
size = len(lines)
return [[cosine_between(lines[i], lines[j]) for i in range(size)] for j in range(size)]
def matrix_for_file(filename):
with open(filename, 'r') as f:
return generate_matrix(lines_for_file(filename))
if __name__ == '__main__':
if len(sys.argv) != 3:
print 'Usage: repetition.py input.txt output.svg'
exit(1)
lines = normalized_lines_in_file(sys.argv[1])
vectors = vectors_from_lines(lines)
matrix = generate_matrix(vectors)
with open(sys.argv[2], 'w') as f:
f.write(svg_for_matrix(matrix, low_color='132B43', high_color='55B1F7'))
# svg_grid.py
#
# Given a matrix (i.e. list of lists), the function svg_for_matrix produces an
# SVG image made up of a grid of squares. Each square is colored according to
# the corresponding element of the matrix, with a value of `low` giving a
# square colored `low_color`, a value of `high` giving a `high_color` square,
# and intermediate values an interpolated color. For an application of this
# function, see <http://www.bdesham.info/2013/09/visualizing-repetition>.
#
# Copyright (c) 2013, Benjamin Esham. This software is released under the
# following version of the MIT license:
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following condition: the above copyright
# notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# The software is provided "as is", without warranty of any kind, express or
# implied, including but not limited to the warranties of merchantability,
# fitness for a particular purpose and noninfringement. In no event shall the
# authors or copyright holders be liable for any claim, damages or other
# liability, whether in an action of contract, tort or otherwise, arising
# from, out of or in connection with the software or the use or other dealings
# in the software.
from xml.etree.ElementTree import Element, SubElement, tostring
xml_declaration = '<?xml version="1.0" encoding="utf-8"?>'
def svg_for_matrix(matrix, low=0.0, high=1.0, low_color='ffffff',
high_color='0000ff'):
total_size = 512
box_size = total_size / len(matrix)
# box_size was coerced into an integer, so make the total size smaller
# if appropriate
total_size = box_size * len(matrix)
svg = Element('svg')
svg.set('xmlns:svg', 'http://www.w3.org/2000/svg')
svg.set('xmlns', 'http://www.w3.org/2000/svg')
svg.set('width', str(total_size))
svg.set('height', str(total_size))
svg.set('version', '1.0')
y_position = 0
for row in matrix:
x_position = 0
for item in row:
color = interpolate_color(item, low, high, low_color, high_color)
box = SubElement(svg, 'rect')
box.set('style', 'fill: #{0}'.format(color))
# make the boxes 2px larger than necessary to prevent borders from
# showing up when the image is scaled
box.set('width', str(box_size + 2))
box.set('height', str(box_size + 2))
box.set('x', str(x_position))
box.set('y', str(y_position))
x_position += box_size
y_position += box_size
return xml_declaration + tostring(svg)
def interpolate_color(value, low, high, low_color, high_color):
low_color = list_for_hex_color(low_color)
high_color = list_for_hex_color(high_color)
return hex_color_for_list([((high_color[i] - low_color[i])/(high - low)) *
(value - low) + low_color[i] for i in range(3)])
def list_for_hex_color(string):
return [int(string[i:i+2], 16) for i in range(0, 6, 2)]
def hex_color_for_list(color):
return ''.join(['%02x' % c for c in color])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment