Skip to content

Instantly share code, notes, and snippets.

@DollarAkshay
Created December 27, 2016 05:43
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save DollarAkshay/0453d8ef144b2dd35056180229ec42f4 to your computer and use it in GitHub Desktop.
Save DollarAkshay/0453d8ef144b2dd35056180229ec42f4 to your computer and use it in GitHub Desktop.
Solving BipedalWalkerHardcore v2 using Genetic Algorithm and Neural Networks
import time, math, random, bisect, copy
import gym
import numpy as np
class NeuralNet :
def __init__(self, nodeCount):
self.fitness = 0.0
self.nodeCount = nodeCount
self.weights = []
self.biases = []
for i in range(len(nodeCount) - 1):
self.weights.append( np.random.uniform(low=-1, high=1, size=(nodeCount[i], nodeCount[i+1])).tolist() )
self.biases.append( np.random.uniform(low=-1, high=1, size=(nodeCount[i+1])).tolist())
def printWeightsandBiases(self):
print("--------------------------------")
print("Weights :\n[", end="")
for i in range(len(self.weights)):
print("\n [ ", end="")
for j in range(len(self.weights[i])):
if j!=0:
print("\n ", end="")
print("[", end="")
for k in range(len(self.weights[i][j])):
print(" %5.2f," % (self.weights[i][j][k]), end="")
print("\b],", end="")
print("\b ],")
print("\n]")
print("\nBiases :\n[", end="")
for i in range(len(self.biases)):
print("\n [ ", end="")
for j in range(len(self.biases[i])):
print(" %5.2f," % (self.biases[i][j]), end="")
print("\b],", end="")
print("\b \n]\n--------------------------------\n")
def getOutput(self, input):
output = input
for i in range(len(self.nodeCount)-1):
output = np.reshape( np.matmul(output, self.weights[i]) + self.biases[i], (self.nodeCount[i+1]))
return output
class Population :
def __init__(self, populationCount, mutationRate, nodeCount):
self.nodeCount = nodeCount
self.popCount = populationCount
self.m_rate = mutationRate
self.population = [ NeuralNet(nodeCount) for i in range(populationCount)]
def createChild(self, nn1, nn2):
child = NeuralNet(self.nodeCount)
for i in range(len(child.weights)):
for j in range(len(child.weights[i])):
for k in range(len(child.weights[i][j])):
if random.random() > self.m_rate:
if random.random() < nn1.fitness / (nn1.fitness+nn2.fitness):
child.weights[i][j][k] = nn1.weights[i][j][k]
else :
child.weights[i][j][k] = nn2.weights[i][j][k]
for i in range(len(child.biases)):
for j in range(len(child.biases[i])):
if random.random() > self.m_rate:
if random.random() < nn1.fitness / (nn1.fitness+nn2.fitness):
child.biases[i][j] = nn1.biases[i][j]
else:
child.biases[i][j] = nn2.biases[i][j]
return child
def createNewGeneration(self, bestNN):
nextGen = []
self.population.sort(key=lambda x: x.fitness, reverse=True)
for i in range(self.popCount):
if random.random() < float(self.popCount-i)/self.popCount:
nextGen.append(copy.deepcopy(self.population[i]));
fitnessSum = [0]
minFit = min([i.fitness for i in nextGen])
for i in range(len(nextGen)):
fitnessSum.append(fitnessSum[i]+(nextGen[i].fitness-minFit)**4)
while(len(nextGen) < self.popCount):
r1 = random.uniform(0, fitnessSum[len(fitnessSum)-1] )
r2 = random.uniform(0, fitnessSum[len(fitnessSum)-1] )
i1 = bisect.bisect_left(fitnessSum, r1)
i2 = bisect.bisect_left(fitnessSum, r2)
if 0 <= i1 < len(nextGen) and 0 <= i2 < len(nextGen) :
nextGen.append( self.createChild(nextGen[i1], nextGen[i2]) )
else :
print("Index Error ");
print("Sum Array =",fitnessSum)
print("Randoms = ", r1, r2)
print("Indices = ", i1, i2)
self.population.clear()
self.population = nextGen
def sigmoid(x):
return 1.0/(1.0 + np.exp(-x))
def replayBestBots(bestNeuralNets, steps, sleep):
choice = input("Do you want to watch the replay ?[Y/N] : ")
if choice=='Y' or choice=='y':
for i in range(len(bestNeuralNets)):
if (i+1)%steps == 0 :
observation = env.reset()
totalReward = 0
for step in range(MAX_STEPS):
env.render()
time.sleep(sleep)
action = bestNeuralNets[i].getOutput(observation)
observation, reward, done, info = env.step(action)
totalReward += reward
if done:
break
print("Generation %3d | Expected Fitness of %4d | Actual Fitness = %4d" % (i+1, bestNeuralNets[i].fitness, totalReward))
def recordBestBots(bestNeuralNets):
print("\n Recording Best Bots ")
print("---------------------")
env.monitor.start('Artificial Intelligence/'+GAME, force=True)
for i in range(len(bestNeuralNets)):
totalReward = 0
observation = env.reset()
for step in range(MAX_STEPS):
env.render()
action = bestNeuralNets[i].getOutput(observation)
observation, reward, done, info = env.step(action)
totalReward += reward
if done:
break
print("Generation %3d | Expected Fitness of %4d | Actual Fitness = %4d" % (i+1, bestNeuralNets[i].fitness, totalReward))
env.monitor.close()
def uploadSimulation():
API_KEY = open('/home/dollarakshay/Documents/API Keys/Open AI Key.txt', 'r').read().rstrip()
gym.upload('Artificial Intelligence/'+GAME, api_key=API_KEY)
def mapRange(value, leftMin, leftMax, rightMin, rightMax):
# Figure out how 'wide' each range is
leftSpan = leftMax - leftMin
rightSpan = rightMax - rightMin
# Convert the left range into a 0-1 range (float)
valueScaled = float(value - leftMin) / float(leftSpan)
# Convert the 0-1 range into a value in the right range.
return rightMin + (valueScaled * rightSpan)
def normalizeArray(aVal, aMin, aMax):
res = []
for i in range(len(aVal)):
res.append( mapRange(aVal[i], aMin[i], aMax[i], -1, 1) )
return res
def scaleArray(aVal, aMin, aMax):
res = []
for i in range(len(aVal)):
res.append( mapRange(aVal[i], -1, 1, aMin[i], aMax[i]) )
return res
GAME = 'BipedalWalkerHardcore-v2'
env = gym.make(GAME)
MAX_STEPS = env.spec.timestep_limit
MAX_GENERATIONS = 1000
POPULATION_COUNT = 100
MUTATION_RATE = 0.01
in_dimen = env.observation_space.shape[0]
out_dimen = env.action_space.shape[0]
obsMin = env.observation_space.low
obsMax = env.observation_space.high
actionMin = env.action_space.low
actionMax = env.action_space.high
pop = Population(POPULATION_COUNT, MUTATION_RATE, [in_dimen, 13, 8, 13, out_dimen])
bestNeuralNets = []
print("\nObservation\n--------------------------------")
print("Shape :", in_dimen, " \n High :", obsMax, " \n Low :", obsMin)
print("\nAction\n--------------------------------")
print("Shape :", out_dimen, " | High :", actionMax, " | Low :", actionMin,"\n")
for gen in range(MAX_GENERATIONS):
genAvgFit = 0.0
minFit = 1000000
maxFit = -1000000
maxNeuralNet = None
for cr, nn in enumerate(pop.population):
observation = env.reset()
totalReward = 0
for step in range(MAX_STEPS):
if cr==0:
env.render()
action = nn.getOutput(observation)
observation, reward, done, info = env.step(action)
totalReward += reward
if done:
break
nn.fitness = totalReward
minFit = min(minFit, nn.fitness)
genAvgFit += nn.fitness
if nn.fitness > maxFit :
maxFit = nn.fitness
maxNeuralNet = copy.deepcopy(nn);
bestNeuralNets.append(maxNeuralNet)
genAvgFit/=pop.popCount
print("Generation : %3d | Min : %5.0f | Avg : %5.0f | Max : %5.0f " % (gen+1, minFit, genAvgFit, maxFit) )
pop.createNewGeneration(maxNeuralNet)
recordBestBots(bestNeuralNets)
uploadSimulation()
replayBestBots(bestNeuralNets, max(1, int(math.ceil(MAX_GENERATIONS/10.0))), 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment