Last active
March 2, 2019 05:59
-
-
Save mrysav/4a8901f7c24d40f1304ee7eb97ca31a5 to your computer and use it in GitHub Desktop.
Generating Markov sentences from character dialog
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# frozen_string_literal: true | |
require 'rubygems' | |
require 'bundler/setup' | |
require 'markovite' | |
# from_book: the filename (ending in .txt) of the Freddy book to read in | |
# character: the character's name to find dialog for (ie. Charles or Freddy) | |
def get_dialog(from_book, character) | |
# read the entire contents of the given file (book) into memory | |
book = File.read(from_book) | |
# split the ebook text into paragraphs by splitting it on a double newline, | |
# then trim all newlines out of each paragraph to ensure they are on a single line. | |
paragraphs = book.split("\n\n").map! { |p| p.tr "\n", ' ' } | |
# since the text uses Unicode smartquote characters, instead of regular " | |
# store them here for convenience | |
open_quote = "\u201c" | |
close_quote = "\u201d" | |
# This is a regular expression that matches any string between smart quotes, | |
# ie. “Freddy the detective” | |
dialog_matcher = /#{open_quote}(.+?)#{close_quote}/ | |
all_paragraphs = [] | |
paragraphs.each do |paragraph| | |
# For each paragraph, find all the dialog in it and store it in 'dialogs' | |
dialogs = paragraph.scan(dialog_matcher) | |
paragraph_no_dialog = paragraph | |
has_dialog = true | |
# then for each stored dialog, remove it from the paragraph | |
dialogs.each do |dialog| | |
dialog = dialog[0] if dialog.is_a? Array | |
begin | |
paragraph_no_dialog = paragraph_no_dialog.sub dialog, '' | |
rescue TypeError => e | |
# do nothing | |
end | |
end | |
# a paragraph does NOT have dialog if it is equal to its dialog-stripped version | |
has_dialog = false if paragraph_no_dialog == paragraph | |
# if the dialog-stripped version contains the given character's name, | |
# assume that character is doing the talking | |
has_freddy = paragraph_no_dialog.scan(character).length >= 1 | |
# push all dialogs by the character onto the main dialog stack for future reference | |
all_paragraphs.push dialogs.join ' ' if has_dialog && has_freddy | |
end | |
all_paragraphs.join ' ' | |
end | |
def generate_sentence_for(character) | |
dialog_for_char = [] | |
Dir.entries('books/').each do |book| | |
if File.file? ('books/' + book) | |
dialog_for_char.push get_dialog 'books/' + book, character | |
end | |
end | |
chain = Markovite::Chain.new | |
chain << dialog_for_char.join(' ') | |
chain.make_sentence_of_length 140 | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment