Skip to content

Instantly share code, notes, and snippets.

@Zetaphor
Created February 17, 2024 07:11
Show Gist options
  • Save Zetaphor/778e39638763bdecd5bc6fe96343ae11 to your computer and use it in GitHub Desktop.
Save Zetaphor/778e39638763bdecd5bc6fe96343ae11 to your computer and use it in GitHub Desktop.
ChatGPT authored Markov Chain in Python
"""
Read a collection of JSON files from a directory as the inputs to a Markov Chain.
Also adds the user inputs to the dictionary as the conversation progresses.
Assumes it's using the formatted data from the NeoLLaMder project.
Written by ChatGPT
"""
import json
import random
import re
import os
class MarkovChatbot:
def __init__(self, order=2):
self.order = order
self.markov_chain = {}
self.default_response = "I'm not sure how to respond to that."
self.inputs_processed = 0 # Counter for the number of inputs processed
def add_data_from_json(self, file_path):
"""Add data to the Markov chain from a JSON file."""
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
for item in data:
if 'content' in item:
self._process_line(item['content'].strip())
self.inputs_processed += 1 # Increment the counter for each input processed
def add_data_from_directory(self, directory_path):
"""Add data to the Markov chain from all JSON files in a directory."""
for filename in os.listdir(directory_path):
if filename.endswith('.json'):
self.add_data_from_json(os.path.join(directory_path, filename))
def _process_line(self, line):
"""Process a single line of text for the Markov chain."""
words = re.findall(r'\b\w+\b', line.lower())
for i in range(len(words) - self.order):
state = tuple(words[i:i + self.order])
next_word = words[i + self.order]
self.markov_chain.setdefault(state, []).append(next_word)
def generate_response(self, input_text):
"""Generate a response using the Markov chain."""
self._process_line(input_text) # Learn from user input
words = re.findall(r'\b\w+\b', input_text.lower())
start_state = tuple(words[-self.order:])
if start_state not in self.markov_chain:
return self.default_response
response = list(start_state)
for _ in range(20): # Limit response to 20 words
if start_state in self.markov_chain:
next_word = random.choice(self.markov_chain[start_state])
response.append(next_word)
start_state = tuple(response[-self.order:])
else:
break # Exit the loop if no next state is found
self._process_line(' '.join(response)) # Learn from bot's response
return ' '.join(response)
def get_inputs_processed_count(self):
"""Get the number of inputs processed from the files."""
return self.inputs_processed
# Example Usage
chatbot = MarkovChatbot()
chatbot.add_data_from_directory('/home/zetaphor/Code/neollamder/formatted_data') # Replace with your directory path
print(f"Total inputs processed: {chatbot.get_inputs_processed_count()}")
while True:
user_input = input("You: ")
if user_input.lower() == 'quit':
break
response = chatbot.generate_response(user_input)
print("Bot:", response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment