Skip to content

Instantly share code, notes, and snippets.

Created October 25, 2013 00:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save StephenKrewson/7147625 to your computer and use it in GitHub Desktop.
Save StephenKrewson/7147625 to your computer and use it in GitHub Desktop.
JPEG Semantic Glitcher
import time
start_time = time.clock()
# | inputDir | print? | min_word | delete? | shift | Oulipo? | N + ?
import sys, re, string, random # Import required modules
from collections import defaultdict
from itertools import chain
from os import walk, path
hist = int(sys.argv[2]) # Parameter for printing histogram
word_min = int(sys.argv[3]) # Parameter for ignoring short words
delete = int(sys.argv[4]) # Parameter for deleting semantic content
shift = int(sys.argv[5]) # Parameter for shifting chars
oulipo = int(sys.argv[6]) # Parameter for Oulipean word swap
shift2 = int(sys.argv[7]) # Parameter for N + ?
lexicon = set(open('dict2.txt','r').read().lower().split())
def indexChars(input_string): # Function 1
'''Gets freqs of all chars, indexes alphabetical chars'''
d = {} # Stores char types and freqs
l = [] # Indexes all [a-zA-Z] chars
for i in xrange(len(input_string)):
if re.match('[a-zA-Z]', input_string[i]):
l.append((input_string[i], i))
if input_string[i] in d:
d[input_string[i]] += 1
d[input_string[i]] = 1
except UnicodeDecodeError:
return (d, l)
def printHistogram(d): # Function 2
'''Prints out chars by frequency'''
ordered = sorted(d, key=d.get, reverse=False)
max_val = d[ordered[-1]] # Normalize max value to 50 chars
total = 0 #
for key in ordered:
total += d[key]
hist = key * (int(d[key]) * 50 / max_val)
print key, d[key], hist, total
def findWords(l, lexicon): # Function 3
'''Finds English words in alphabetical chars'''
max_len = max(map(len, lexicon)) # Longest word in the set of words
words_found = [] # set of words found, starts empty
for i in xrange(len(l)): # for each possible starting position in the corpus
chunk = l[i:i+max_len] # chunk that is the size of the longest word
for j in xrange(len(chunk)): # loop to check each possible subchunk
word = chunk[:j]
test = '' # Grap letters from the tuple
for k in xrange(len(word)):
test += word[k][0].lower() # Build up word
if test in lexicon and len(test) > word_min:
if oulipo == 1: # Option for N + ? glitch
same_length = sorted([x for x in lexicon if len(x) == len(test)])
new_word = same_length[(same_length.index(test) + shift2) % len(same_length)]
#print test, new_word, shift2, len(same_length)
for y in xrange(len(word)):
if word[y][0].isupper():
word[y] = (new_word[y].upper(), word[y][1])
else: word[y] = (new_word[y], word[y][1])
words_found.append(word)# Add list of tuples to master word list
return words_found # Returns array of valid words and their indexes
def glitchFile(filename):
'''Transforms found words in JPG and writes a modified file'''
with open(filename,'rb') as f: # Open JPEG and read it in binary mode
input_string =
results = indexChars(input_string) # Call indexChars function
if hist == 1:
#print "\nChars in " + filename + ":\n"
printHistogram(results[0]) # Optional print histogram call
#print "\nWords in " + filename + ":\n" # Title separates word lists
words_found = findWords(results[1], lexicon)
new_string = input_string # Build up new string from substrings of input
for i in words_found:
for j in i:
char_val = string.ascii_letters.index(j[0])
if delete == 1:
replace = ' ' # Semantic content 'whited' out
else: replace = string.ascii_letters[(char_val + shift) % 52]
new_string = new_string[:j[1]] + replace + new_string[j[1]+1:]
with open(filename.split('.')[0] + 'MOD.' + filename.split('.')[1], 'wb') as target:
for dirpath, dirnames, filenames in walk(path.abspath(sys.argv[1])):
for i in filenames:
files = path.join(dirpath, i)
if files[-7:-4] != 'MOD':
glitchFile(files) # Call glitch function on original files
print 'Program:\t{0}\nDuration (s):\t{1}\nDirectory:\t{2}\nNo. of files:\t{3}\nHistogram?\t{4}\nWord length >\t{5}\nDelete words?\t{6}\nChars shifted:\t{7}\nOulipo swap?\t{8}\nIf yes, N + ?:\t{9}'.format(
path.basename(__file__), time.clock() - start_time, dirpath, len(filenames), hist, word_min, delete, shift, oulipo, shift2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment