Last active
July 6, 2017 22:23
-
-
Save hanskamin/2ce1277f829b465c42c48827694ea460 to your computer and use it in GitHub Desktop.
Training a Bigram Markov Chain with Song Lyrics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# Hans Kamin | |
# Spring 2017 | |
# Bigram Markov Chain Model | |
def train_markov_chain(lyrics): | |
""" | |
Args: | |
- lyrics: a list of strings, where each string represents | |
the lyrics of one song by an artist. | |
Returns: | |
A dict that maps a tuple of 2 words ("bigram") to a list of | |
words that follow that bigram, representing the Markov | |
chain trained on the lyrics. | |
""" | |
# Initialize the beginning of our chain. | |
chain = { | |
(None, "<START>"): [] | |
} | |
for lyric in lyrics: | |
# Replace newline characters with our tag. | |
lyric_newlines = lyric.replace('\n', ' <N> ') | |
# Create a tuple representing the most recent (current) bigram. | |
last_2 = (None, "<START>") | |
for word in lyric_newlines.split(): | |
# Add the word as one that follows the current bigram. | |
chain[last_2].append(word) | |
# Shift the current bigram to account for the newly added word. | |
last_2 = (last_2[1], word) | |
if last_2 not in chain: | |
chain[last_2] = [] | |
chain[last_2].append("<END>") | |
return chain | |
# Load the pickled lyrics object that we created earlier. | |
import pickle | |
lyrics = pickle.load(open("lyrics.pkl", "rb")) | |
# Train a Markov Chain over all of Logic's lyrics. | |
chain = train_markov_chain(lyrics) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment