Skip to content

Instantly share code, notes, and snippets.

View liketheflower's full-sized avatar

jimmy shen liketheflower

View GitHub Profile
from stable_baselines3 import PPO
import gym
env = gym.make("CartPole-v1")
model = PPO(policy = "MlpPolicy",env = env, verbose=1)
model.learn(total_timesteps=25000)
model.save("ppo_cartpole") # saving the model to ppo_cartpole.zip
model = PPO.load("ppo_cartpole") # loading the model from ppo_cartpole.zip
import gym
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.env_util import make_vec_env
import os
import time
import gym
env = gym.make("MountainCar-v0")
print("MoutainCar env")
print(env)
print("action_space")
print(env.action_space)
print("observation_space")
print(env.observation_space)
print("reward_range")
import gym
import time
import numpy as np
import matplotlib.pyplot as plt
env = gym.make("MountainCar-v0")
env.reset()
observations = []
for t in range(1000):
# env.render()
import gym
import time
env = gym.make('MountainCar-v0')
env.reset()
for t in range(10):
print("-"*20 + " "+ str(t) + " "+ "-"*20)
env.render()
observation = env.reset()
action = env.action_space.sample()
import gym
envs = gym.envs.registry.all()
print(f"In total we have {len(envs)} envs available!")
print(f"The first 4 envs are: ")
for i, env in enumerate(list(envs)[:4]):
print("-" * 20)
print(env)
from functools import lru_cache
class Solution:
def largestSumOfAverages(self, a: List[int], k: int) -> float:
cusum = list(itertools.accumulate([0]+a))
@lru_cache(None)
def dp(i, k):
#if i>=len(a):return 0
if k == 1:return (cusum[-1]-cusum[i])/(len(a)-i)
return max((cusum[j+1]-cusum[i])/(j-i+1) + dp(j+1, k-1) for j in range(i, len(a)-k+1))
class Solution:
def largestSumOfAverages(self, a: List[int], k: int) -> float:
cusum = list(itertools.accumulate([0]+a))
N=len(a)
#dp[0][k] means from 0 to N-1 inclusively we have at most k groups
# dp[0][k] = maximum of below cases
#average(a[:1])+dp[1][k-1] from 1 to N-1 inclusively we have at most k-1 groups
#average(a[:2])+dp[2][k-1] from 2 to N-1 inclusively we have at most k-1 groups
#...
#average(a[:N-1])+dp[N-1][k-1] from N-1 to N-1 inclusively we have at most k-1 groups
from functools import lru_cache
@lru_cache(None)
def abbreviation(a, b):
def dp(i,j):
if i==0 and j==0:return True
if i==0:return False
if j==0:
if a[i-1].islower():return dp(i-1, j)
else:return False
if a[i-1]==b[j-1]:
def abbreviation(a, b):
m, n = len(a), len(b)
dp = [[False]*(m+1) for _ in range(n+1)]
dp[0][0] = True
for j in range(1, m+1):
if a[j-1].islower():
dp[0][j] = dp[0][j-1]
for i in range(1, n+1):
for j in range(1,m+1):
if a[j-1] == b[i-1]: