Last active
March 30, 2016 03:42
-
-
Save coblezc/c1f8618e6a9fcd15b1ef55f06a6795c2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# cribbed from Allison Parrish and Ross Goodwin | |
# https://gist.github.com/aparrish/ea3911c31cec8c858bd0/revisions | |
# https://gist.github.com/rossgoodwin/d45cba970add12c6190d | |
import random | |
from string import punctuation | |
# strip first ~50 lines of metadata | |
def not_with_semicolon(line): | |
if not line.startswith(';;;') and not line[0] in punctuation: | |
return True | |
else: | |
return False | |
# load and clean cmu dictionary | |
with open('cmudict-0.7b') as infile: | |
lines = filter(not_with_semicolon, infile.read().strip().split('\n')) | |
#get rid of alternate pronunciations | |
lines = filter(lambda x: not '(' in x, lines) | |
#put the words in the dictionary | |
cmu_dict = dict() | |
for l in lines: | |
word, phonemes = l.split(' ', 1) | |
cmu_dict[word.lower()] = phonemes.split(' ') | |
#load markov'd file | |
# lines = [line.strip() for line in \ | |
# open("/Users/user/Copy/itp/rwet/wk4/hw/markov-rhyme/drake-rhymes.txt").readlines() | |
# if len(line.strip()) > 0] | |
lines = [line.strip() for line in \ | |
open("/Users/user/Copy/itp/rwet/wk4/hw/markov-rhyme/drake-lyrics.txt").readlines() | |
if len(line.strip()) > 0] | |
# get phones and lines for last words | |
last_words = list() | |
for line in lines: | |
words = line.split() | |
last_word = words[-1].strip('.,?!;:') # .strip() to remove any punctuation | |
last_word_lower = last_word.lower() | |
# if last word is in cmu dictionary (ie no slang, abbreviations, etc) | |
if last_word_lower in cmu_dict: | |
line_with_part = [cmu_dict[last_word_lower], line] | |
last_words.append(line_with_part) | |
# print last_words | |
# generate alliterative lines | |
def get_alliteration_couplet(line_db): | |
matching_phons = list() | |
alliteration_lines = list() | |
# grab random line to start | |
first = random.choice(line_db) | |
# create a list of lines that rhymes with the line randomly selected above | |
for line in line_db: | |
for p1, p2 in zip(first[0], line[0]): | |
if p1 == p2: | |
matching_phons.append(p1) | |
else: | |
break | |
if len(matching_phons) > 1: | |
alliteration_lines.append(first[1]) | |
alliteration_lines.append(line[1]) | |
break | |
# print alliteration_lines | |
second = random.choice(alliteration_lines) | |
return [first[1], second] | |
# generate chorus | |
couplet1 = get_alliteration_couplet(last_words) | |
couplet2 = get_alliteration_couplet(last_words) | |
verse_line_one = couplet1[0] | |
verse_line_two = couplet1[1] | |
verse_line_three = couplet2[0] | |
verse_line_four = couplet2[1] | |
print verse_line_one | |
print verse_line_two | |
print verse_line_three | |
print verse_line_four |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# cribbed from Ross Goodwin | |
# https://gist.github.com/rossgoodwin/d45cba970add12c6190d | |
from string import punctuation | |
# strip first ~50 lines of metadata | |
def not_with_semicolon(line): | |
if not line.startswith(';;;') and not line[0] in punctuation: | |
return True | |
else: | |
return False | |
with open('cmudict-0.7b') as infile: | |
lines = filter(not_with_semicolon, infile.read().strip().split('\n')) | |
#get rid of alternate pronunciations | |
lines = filter(lambda x: not '(' in x, lines) | |
cmu_dict = dict() | |
#put the words in the dictionary | |
for l in lines: | |
word, phonemes = l.split(' ', 1) | |
cmu_dict[word.lower()] = phonemes.split(' ') | |
def rhymes(word1, word2): | |
#all phones in word | |
try: | |
phon1 = cmu_dict[word1] | |
phon2 = cmu_dict[word2] | |
except KeyError: | |
raise Exception('Word not in dictionary.') | |
matching_phons = list() | |
# phon1[::1] puts the phones in reverse order | |
# for p1, p2 in zip(phon1[::-1], phon2[::-1]): | |
for p1, p2 in zip(phon1, phon2): | |
#following line gets all matching phones, regarless of #/stress | |
if p1 == p2: | |
matching_phons.append(p1) | |
else: | |
break | |
if not matching_phons: | |
return False | |
#if matching_phons[-1][-1] in set(['1', '2']): | |
# change '> *'; 1-2 is best; higher gives better output | |
if len(matching_phons) > 2: | |
return True | |
# can maybe do smt like else: if len(matching_phons) > 1: return True, and on down the line | |
# until len(m_p) > 0, return true, else return false | |
else: | |
return False | |
def rhyme_finder(word): | |
matches = list() | |
for w in cmu_dict: | |
if w != word and rhymes(w, word): | |
matches.append(w) | |
return matches | |
z = rhyme_finder('alligator') | |
print z | |
print len(z) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# cribbed from Ross Goodwin | |
# https://gist.github.com/rossgoodwin/d45cba970add12c6190d | |
from string import punctuation | |
# strip first ~50 lines of metadata | |
def not_with_semicolon(line): | |
if not line.startswith(';;;') and not line[0] in punctuation: | |
return True | |
else: | |
return False | |
with open('cmudict-0.7b') as infile: | |
lines = filter(not_with_semicolon, infile.read().strip().split('\n')) | |
#get rid of alternate pronunciations | |
lines = filter(lambda x: not '(' in x, lines) | |
cmu_dict = dict() | |
#put the words in the dictionary | |
for l in lines: | |
word, phonemes = l.split(' ', 1) | |
cmu_dict[word.lower()] = phonemes.split(' ') | |
def rhymes(word1, word2): | |
try: | |
phon1 = cmu_dict[word1] | |
phon2 = cmu_dict[word2] | |
except KeyError: | |
raise Exception('Word not in dictionary.') | |
matching_phons = list() | |
for p1, p2 in zip(phon1[::-1], phon2[::-1]): | |
if p1 == p2: | |
matching_phons.append(p1) | |
else: | |
break | |
if not matching_phons: | |
return False | |
if matching_phons[-1][-1] in set(['0', '1', '2']): | |
return True | |
else: | |
return False | |
def rhyme_finder(word): | |
matches = list() | |
for w in cmu_dict: | |
if w != word and rhymes(w, word): | |
matches.append(w) | |
return matches | |
z = rhyme_finder('alligator') | |
print z | |
print len(z) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# cribbed from Ross Goodwin | |
# https://gist.github.com/rossgoodwin/d45cba970add12c6190d | |
from string import punctuation | |
# strip first ~50 lines of metadata | |
def not_with_semicolon(line): | |
if not line.startswith(';;;') and not line[0] in punctuation: | |
return True | |
else: | |
return False | |
with open('cmudict-0.7b') as infile: | |
lines = filter(not_with_semicolon, infile.read().strip().split('\n')) | |
#get rid of alternate pronunciations | |
lines = filter(lambda x: not '(' in x, lines) | |
cmu_dict = dict() | |
#put the words in the dictionary | |
for l in lines: | |
word, phonemes = l.split(' ', 1) | |
cmu_dict[word.lower()] = phonemes.split(' ') | |
def rhymes(word1, word2): | |
try: | |
phon1 = cmu_dict[word1] | |
phon2 = cmu_dict[word2] | |
except KeyError: | |
raise Exception('Word not in dictionary.') | |
matching_phons = list() | |
for p1, p2 in zip(phon1[::-1], phon2[::-1]): | |
if p1 == p2: | |
matching_phons.append(p1) | |
else: | |
break | |
if not matching_phons: | |
return False | |
# matching_phons[-1][-1] is the stress number | |
# matching_phons[-1] is the last matching phone | |
if matching_phons[-1][-1] in set(['0']): | |
#print '1: ', matching_phons | |
#print '2: ', matching_phons[-1] | |
#print '3: ', matching_phons[-1][-1] | |
return True | |
else: | |
return False | |
def rhyme_finder(word): | |
matches = list() | |
for w in cmu_dict: | |
if w != word and rhymes(w, word): | |
matches.append(w) | |
return matches | |
z = rhyme_finder('hammer') | |
print z | |
print len(z) | |
rhymes('abbeville', 'turbeville') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment