Zetaphor/markov_chain.py

## markov_chain.py
"""
Read a collection of JSON files from a directory as the inputs to a Markov Chain.
Also adds the user inputs to the dictionary as the conversation progresses.
Assumes it's using the formatted data from the NeoLLaMder project.
Written by ChatGPT
"""

import json
import random
import re
import os

class MarkovChatbot:
    def __init__(self, order=2):
        self.order = order
        self.markov_chain = {}
        self.default_response = "I'm not sure how to respond to that."
        self.inputs_processed = 0  # Counter for the number of inputs processed

    def add_data_from_json(self, file_path):
        """Add data to the Markov chain from a JSON file."""
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)
            for item in data:
                if 'content' in item:
                    self._process_line(item['content'].strip())
                    self.inputs_processed += 1  # Increment the counter for each input processed

    def add_data_from_directory(self, directory_path):
        """Add data to the Markov chain from all JSON files in a directory."""
        for filename in os.listdir(directory_path):
            if filename.endswith('.json'):
                self.add_data_from_json(os.path.join(directory_path, filename))

    def _process_line(self, line):
        """Process a single line of text for the Markov chain."""
        words = re.findall(r'\b\w+\b', line.lower())
        for i in range(len(words) - self.order):
            state = tuple(words[i:i + self.order])
            next_word = words[i + self.order]
            self.markov_chain.setdefault(state, []).append(next_word)

    def generate_response(self, input_text):
        """Generate a response using the Markov chain."""
        self._process_line(input_text)  # Learn from user input
        words = re.findall(r'\b\w+\b', input_text.lower())
        start_state = tuple(words[-self.order:])

        if start_state not in self.markov_chain:
            return self.default_response

        response = list(start_state)
        for _ in range(20):  # Limit response to 20 words
            if start_state in self.markov_chain:
                next_word = random.choice(self.markov_chain[start_state])
                response.append(next_word)
                start_state = tuple(response[-self.order:])
            else:
                break  # Exit the loop if no next state is found

        self._process_line(' '.join(response))  # Learn from bot's response
        return ' '.join(response)

    def get_inputs_processed_count(self):
        """Get the number of inputs processed from the files."""
        return self.inputs_processed


# Example Usage
chatbot = MarkovChatbot()
chatbot.add_data_from_directory('/home/zetaphor/Code/neollamder/formatted_data')  # Replace with your directory path
print(f"Total inputs processed: {chatbot.get_inputs_processed_count()}")

while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    response = chatbot.generate_response(user_input)
    print("Bot:", response)
	"""
	Read a collection of JSON files from a directory as the inputs to a Markov Chain.
	Also adds the user inputs to the dictionary as the conversation progresses.
	Assumes it's using the formatted data from the NeoLLaMder project.
	Written by ChatGPT
	"""

	import json
	import random
	import re
	import os

	class MarkovChatbot:
	def __init__(self, order=2):
	self.order = order
	self.markov_chain = {}
	self.default_response = "I'm not sure how to respond to that."
	self.inputs_processed = 0 # Counter for the number of inputs processed

	def add_data_from_json(self, file_path):
	"""Add data to the Markov chain from a JSON file."""
	with open(file_path, 'r', encoding='utf-8') as file:
	data = json.load(file)
	for item in data:
	if 'content' in item:
	self._process_line(item['content'].strip())
	self.inputs_processed += 1 # Increment the counter for each input processed

	def add_data_from_directory(self, directory_path):
	"""Add data to the Markov chain from all JSON files in a directory."""
	for filename in os.listdir(directory_path):
	if filename.endswith('.json'):
	self.add_data_from_json(os.path.join(directory_path, filename))

	def _process_line(self, line):
	"""Process a single line of text for the Markov chain."""
	words = re.findall(r'\b\w+\b', line.lower())
	for i in range(len(words) - self.order):
	state = tuple(words[i:i + self.order])
	next_word = words[i + self.order]
	self.markov_chain.setdefault(state, []).append(next_word)

	def generate_response(self, input_text):
	"""Generate a response using the Markov chain."""
	self._process_line(input_text) # Learn from user input
	words = re.findall(r'\b\w+\b', input_text.lower())
	start_state = tuple(words[-self.order:])

	if start_state not in self.markov_chain:
	return self.default_response

	response = list(start_state)
	for _ in range(20): # Limit response to 20 words
	if start_state in self.markov_chain:
	next_word = random.choice(self.markov_chain[start_state])
	response.append(next_word)
	start_state = tuple(response[-self.order:])
	else:
	break # Exit the loop if no next state is found

	self._process_line(' '.join(response)) # Learn from bot's response
	return ' '.join(response)

	def get_inputs_processed_count(self):
	"""Get the number of inputs processed from the files."""
	return self.inputs_processed


	# Example Usage
	chatbot = MarkovChatbot()
	chatbot.add_data_from_directory('/home/zetaphor/Code/neollamder/formatted_data') # Replace with your directory path
	print(f"Total inputs processed: {chatbot.get_inputs_processed_count()}")

	while True:
	user_input = input("You: ")
	if user_input.lower() == 'quit':
	break
	response = chatbot.generate_response(user_input)
	print("Bot:", response)