Skip to content

Instantly share code, notes, and snippets.

@suisea
Forked from d-baker/pseudomarkov.py
Created November 3, 2015 14:59
Show Gist options
  • Save suisea/c88615b22caac8c7b041 to your computer and use it in GitHub Desktop.
Save suisea/c88615b22caac8c7b041 to your computer and use it in GitHub Desktop.
word chainer made for nanogenmo 2014. not a markov chain.
import random
class PseudoMarkov():
def __init__(self):
self.associations = {}
# pass it the corpus filepath
def gen(self, filepath):
sentences = []
with open(filepath) as fp:
sentences = fp.readlines()
for sentence in sentences:
self.breakup(sentence.split())
s = random.choice(sentences)
words = random.choice(sentences).split()
i = 0
while random.randint(0, len(words)-1) > len(words)-1:
i+=1
seedword = words[i]
prefix = " ".join(w for w in words[:i])
text = prefix + " " + seedword + " " + self.chain(seedword)
# don't know where the key errors are coming from, so just sweeping them
# under the carpet with this
while text.split()[-1] in self.associations:
text += self.chain(text.split()[-1])
return text.strip()
# ughhh don't ask
def breakup(self, sentence):
max = len(sentence) - 1
for i in range (0, max):
if sentence[i] in self.associations:
l = self.associations.get(sentence[i])
if sentence[i+1] not in l:
if i >= max:
l.append(sentence[i].lower())
else:
l.append(sentence[i+1].lower())
else:
if i >= max:
self.associations[sentence[i]] = [sentence[i]]
else:
self.associations[sentence[i]] = [sentence[i+1]]
if len(sentence) > 1:
self.breakup(sentence[1:])
def chain(self, seedword):
if seedword in self.associations:
return random.choice(self.associations[seedword]) + " "
else:
return seedword + " "
if __name__ == "__main__":
chainer = PseudoMarkov()
print chainer.gen("corpus.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment