Created
February 17, 2024 07:11
-
-
Save Zetaphor/778e39638763bdecd5bc6fe96343ae11 to your computer and use it in GitHub Desktop.
ChatGPT authored Markov Chain in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Read a collection of JSON files from a directory as the inputs to a Markov Chain. | |
Also adds the user inputs to the dictionary as the conversation progresses. | |
Assumes it's using the formatted data from the NeoLLaMder project. | |
Written by ChatGPT | |
""" | |
import json | |
import random | |
import re | |
import os | |
class MarkovChatbot: | |
def __init__(self, order=2): | |
self.order = order | |
self.markov_chain = {} | |
self.default_response = "I'm not sure how to respond to that." | |
self.inputs_processed = 0 # Counter for the number of inputs processed | |
def add_data_from_json(self, file_path): | |
"""Add data to the Markov chain from a JSON file.""" | |
with open(file_path, 'r', encoding='utf-8') as file: | |
data = json.load(file) | |
for item in data: | |
if 'content' in item: | |
self._process_line(item['content'].strip()) | |
self.inputs_processed += 1 # Increment the counter for each input processed | |
def add_data_from_directory(self, directory_path): | |
"""Add data to the Markov chain from all JSON files in a directory.""" | |
for filename in os.listdir(directory_path): | |
if filename.endswith('.json'): | |
self.add_data_from_json(os.path.join(directory_path, filename)) | |
def _process_line(self, line): | |
"""Process a single line of text for the Markov chain.""" | |
words = re.findall(r'\b\w+\b', line.lower()) | |
for i in range(len(words) - self.order): | |
state = tuple(words[i:i + self.order]) | |
next_word = words[i + self.order] | |
self.markov_chain.setdefault(state, []).append(next_word) | |
def generate_response(self, input_text): | |
"""Generate a response using the Markov chain.""" | |
self._process_line(input_text) # Learn from user input | |
words = re.findall(r'\b\w+\b', input_text.lower()) | |
start_state = tuple(words[-self.order:]) | |
if start_state not in self.markov_chain: | |
return self.default_response | |
response = list(start_state) | |
for _ in range(20): # Limit response to 20 words | |
if start_state in self.markov_chain: | |
next_word = random.choice(self.markov_chain[start_state]) | |
response.append(next_word) | |
start_state = tuple(response[-self.order:]) | |
else: | |
break # Exit the loop if no next state is found | |
self._process_line(' '.join(response)) # Learn from bot's response | |
return ' '.join(response) | |
def get_inputs_processed_count(self): | |
"""Get the number of inputs processed from the files.""" | |
return self.inputs_processed | |
# Example Usage | |
chatbot = MarkovChatbot() | |
chatbot.add_data_from_directory('/home/zetaphor/Code/neollamder/formatted_data') # Replace with your directory path | |
print(f"Total inputs processed: {chatbot.get_inputs_processed_count()}") | |
while True: | |
user_input = input("You: ") | |
if user_input.lower() == 'quit': | |
break | |
response = chatbot.generate_response(user_input) | |
print("Bot:", response) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment