Skip to content

Instantly share code, notes, and snippets.

@kjhenner
Created February 6, 2015 19:43
Show Gist options
  • Save kjhenner/a0983f113ae17fe80cf9 to your computer and use it in GitHub Desktop.
Save kjhenner/a0983f113ae17fe80cf9 to your computer and use it in GitHub Desktop.
require 'ruby_rhymes'
require 'treat'
require 'json'
include Treat::Core::DSL
def bigram_to_s(bigram)
"#{bigram[0]}_#{bigram[1]}"
end
def remove_linebreaks(string)
string.gsub(/[\n\r]/, ' ')
end
def load_tokens_from_text(filename)
p = phrase(remove_linebreaks(open(filename, &:read)))
p.do(:tokenize)
p.collect do |t|
t.value
end
end
def get_bigrams_from_tokens(tokens)
tokens.collect.with_index do |t, i|
[t, tokens[i+1] || '']
end
end
def get_markov_hash_from_bigrams(bigrams)
hash = {}
bigrams.each.with_index do |u, i|
hash[bigram_to_s(u)] ||= []
hash[bigram_to_s(u)] << bigrams[i+2]
end
return hash
end
def serialize_markov_hash(hash, name)
File.open("#{name}.json", "w") do |f|
f.write(hash.to_json)
end
end
def load_hash_from_json(name)
File.open("#{name}.json", "r") do |f|
JSON.parse(File.read(f))
end
end
def generate(hash, n)
text = ''
bigram = hash.values.sample[0]
n.times do
text << "#{bigram[0]} #{bigram[1]} "
bigram = hash[bigram_to_s(bigram)].sample
unless bigram
bigram = hash.values.sample[0]
end
end
return text
end
def generate_line(hash, n, rhyme_key=nil, last_bigram=nil)
line = []
while line.size < n do
bigram ||= last_bigram || hash.values.sample[0]
bigram = hash[bigram_to_s(bigram)].sample || hash.values.sample[0]
line << bigram
if line.size == n
if rhyme_key
unless is_rhyme(line, rhyme_key)
line.pop
end
end
end
end
return line
end
def is_rhyme(a, b)
bigram_rhyme_key(a) == bigram_rhyme_key(b) && is_identity(a, b) == false
end
def is_identity(a, b)
flatten_line(a) == flatten_line(b)
end
def flatten_line(l)
l.flatten.reject{ |t| t =~ /[[:punct:]]/ }[-1].downcase
end
def bigram_rhyme_key(line)
line.flatten.join(' ').gsub(/[[:punct:]]/, '').to_phrase.rhyme_key
end
def generate_lines(hash, n, line_length, scheme="abbacddc")
lines = [generate_line(hash, line_length)]
scheme_hash = {scheme[0] => lines[0]}
while lines.size < n do
puts scheme_hash
rhyme_key = scheme_hash[rhyme_in_scheme(lines.size, scheme)]
next_line = generate_line(hash,
line_length,
rhyme_key=rhyme_key,
last_bigram=lines[-1][-1]
)
scheme_hash[rhyme_in_scheme(lines.size, scheme)] = next_line
lines << next_line
end
return lines
end
def rhyme_in_scheme(i, scheme)
scheme[i % scheme.size]
end
def merge_hashes(hash_one, hash_two)
hash_two.keys.each do |k, v|
if hash_one[k]
hash_one[k].concat(v)
else
hash_one[k] = v
end
end
return hash_one
end
#tokens = load_tokens_from_text('mobydick.txt')
#bigrams = get_bigrams_from_tokens(tokens)
#hash = get_markov_hash_from_bigrams(bigrams)
#serialize_markov_hash(hash)
hash = load_hash_from_json("mobydick_hash")
generate_lines(hash, 16, 3).each do |l|
puts l.flatten.join(' ')
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment