Skip to content

Instantly share code, notes, and snippets.

@frafra
Last active August 29, 2015 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save frafra/55817f9d0369a290761a to your computer and use it in GitHub Desktop.
Save frafra/55817f9d0369a290761a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
#
# frafra-display-levenshtein.py
#
# Copyright (C) 2014 - Francesco Frassinelli
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
from Levenshtein import editops, apply_edit
from random import shuffle
from math import factorial
from multiprocessing import Process, Array, cpu_count
from sys import maxint
words = 'lorem ipsum dolor sit amet'.split()
def clean(plain, words):
used = set([0])
for word in words:
index = 0
for letter in word:
index = plain.index(letter, index)
used.add(index)
index += 1
return ''.join([plain[i] for i in sorted(used)])
def discover(value, iterations, lowerbound=-1):
try:
for index in xrange(1, iterations):
shuffle(words)
current = words[0]
for word in words[1:]:
edit = editops(current, word)
customEdit = [('insert', s, d) for op, s, d in edit if op != 'delete']
current = apply_edit(customEdit, current, word)
current = clean(current, words)
if len(current) < len(value.value):
value.value = current[:]
print('%s (%d)' % (value.value, len(value.value)))
if len(value.value) <= lowerbound:
raise KeyboardInterrupt
except:
pass
processes = []
value = Array('c', ''.join(words))
for i in xrange(cpu_count()):
iterations = factorial(sum(map(len, words)))
lowerbound = len(set(''.join(words)))+sum(map(lambda s: len(s)-len(set(s)), words))
if iterations > maxint:
iterations = maxint
p = Process(target=discover, args=(value, iterations/cpu_count()+1, lowerbound))
p.start()
processes.append(p)
try:
for process in processes:
process.join()
except KeyboardInterrupt:
pass
print()
plain = value.value
print("Original size: %d" % sum(map(len, words)))
print("Compressed: %d" % len(plain), end='\n'*2)
for word in words:
index = 0
for letter in plain:
if index >= len(word):
print(letter, end='')
elif letter == word[index]:
print('\033[96m%s\033[0m' % letter, end='')
index += 1
else:
print(letter, end='')
print(' (%s)' % word)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment