Skip to content

Instantly share code, notes, and snippets.

@ruvnet
Last active March 8, 2024 22:43
Show Gist options
  • Star 19 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save ruvnet/b110bd43b66c107393e31fe475a14be9 to your computer and use it in GitHub Desktop.
Save ruvnet/b110bd43b66c107393e31fe475a14be9 to your computer and use it in GitHub Desktop.
Q* (Q-Star)
# - Q* (Q-Star)
# /\__/\ - q.py
# ( o.o ) - v0.0.1
# >^< - by @rUv
# 01110010 01110101 01110110
# This is a proof of concept implementation of the Q* (AGI) leak from OpenAi
# This Python code defines a sophisticated Q-learning agent for reinforcement learning.
# It includes dynamic exploration, learning from experiences, and checks for convergence.
# The agent's capabilities are refined iteratively to optimize its decision-making strategy in a given environment.
import numpy as np
import random
class SophisticatedQLearningAgent:
def __init__(self, states, actions, learning_rate=0.1, discount_factor=0.95, exploration_rate=1.0, min_exploration_rate=0.01, exploration_decay_rate=0.995, max_episodes=10000, max_steps_per_episode=200):
# Initialize the Q-learning agent with specified parameters.
self.states = states # Number of states in the environment
self.actions = actions # Number of possible actions
self.learning_rate = learning_rate # Rate at which the agent learns
self.discount_factor = discount_factor # Factor for discounting future rewards
self.exploration_rate = exploration_rate # Initial exploration rate
self.min_exploration_rate = min_exploration_rate # Minimum exploration rate
self.exploration_decay_rate = exploration_decay_rate # Rate of decay for exploration
self.max_episodes = max_episodes # Maximum number of training episodes
self.max_steps_per_episode = max_steps_per_episode # Maximum steps per episode
self.q_table = np.zeros((states, actions)) # Initialize Q-table with zeros
def choose_action(self, state):
# Choose an action based on the current state and exploration-exploitation strategy.
if random.uniform(0, 1) < self.exploration_rate:
action = random.randint(0, self.actions - 1) # Explore: choose a random action
else:
action = np.argmax(self.q_table[state, :]) # Exploit: choose the best known action
return action
def learn(self, state, action, reward, next_state):
# Update the Q-table based on the action taken and the resulting state.
predict = self.q_table[state, action]
target = reward + self.discount_factor * np.max(self.q_table[next_state, :])
self.q_table[state, action] += self.learning_rate * (target - predict)
def update_exploration_rate(self):
# Decrease exploration rate over time.
self.exploration_rate = max(self.min_exploration_rate, self.exploration_rate * self.exploration_decay_rate)
def has_converged(self, threshold=0.005):
# Check if the Q-values have converged.
return np.all(np.abs(self.q_table - np.max(self.q_table, axis=1, keepdims=True)) < threshold)
def train(self):
# Train the agent over a series of episodes.
for episode in range(self.max_episodes):
state = random.randint(0, self.states - 1)
for step in range(self.max_steps_per_episode):
action = self.choose_action(state)
next_state = random.randint(0, self.states - 1)
reward = 1 if next_state == self.states - 1 else 0
self.learn(state, action, reward, next_state)
state = next_state
self.update_exploration_rate()
if self.has_converged():
return True, episode
return False, self.max_episodes
def resize_q_table(self, new_states, new_actions):
# Resize the Q-table if the number of states or actions increases.
if new_states > self.states or new_actions > self.actions:
new_q_table = np.zeros((new_states, new_actions))
new_q_table[:self.states, :self.actions] = self.q_table
self.q_table = new_q_table
self.states = new_states
self.actions = new_actions
# Initialize and refine the sophisticated agent
sophisticated_agent = SophisticatedQLearningAgent(states=30, actions=4)
max_refinements = 5
refinement_count = 0
converged = False
# Refine the agent until it converges or reaches the maximum number of refinements
while not converged and refinement_count < max_refinements:
new_states = sophisticated_agent.states + 10 # Increasing the number of states
new_actions = sophisticated_agent.actions # Keeping the number of actions constant
sophisticated_agent.resize_q_table(new_states, new_actions) # Resize the Q-table
sophisticated_agent.max_episodes += 5000 # Increase the number of episodes
sophisticated_agent.max_steps_per_episode += 50 # Increase the steps per episode
converged, episodes = sophisticated_agent.train() # Train the agent
refinement_count += 1
# Result of training after refinements
print(f"Converged: {converged}, Episodes: {episodes}, Refinements: {refinement_count}")
print(f"Q-Table:\n{sophisticated_agent.q_table}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment