.. _examples-testgymcartpole: ****************** testGymCartpole.py ****************** You can view and download this file on Github: `testGymCartpole.py `_ .. code-block:: python :linenos: #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # This is an EXUDYN example # # Details: This file shows integration with OpenAI gym by testing a cart-pole example # Needs input file testGymCartpoleEnv.py which defines the model in the gym environment # Works well with Python3.8! # # Author: Johannes Gerstmayr, Grzegorz Orzechowski # Date: 2022-05-17 # # Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details. # #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #+++++++++++++++++++++++++++++++++++++++++++++++++ #conda create -n venvGym python=3.10 numpy matplotlib spyder-kernels=2.4 ipykernel -y #pip install pip install wheel==0.38.4 setuptools==66.0.0 # => this downgrades setuptools to be able to install gym==0.21 #pip install stable-baselines3==1.7.0 import time from math import sin, cos from testGymCartpoleEnv import CartPoleEnv useGraphics = True if True: #test the model by just integrating in Exudyn and apply force env = CartPoleEnv() env.useRenderer = False #set this true to show visualization observation, info = env.reset(seed=42, return_info=True) ts = -time.time() for i in range(10000): force = 0.1*(cos(i/50)) env.integrateStep(force) # action = env.action_space.sample() # observation, reward, done, info = env.step(action) # if done: # observation, info = env.reset(return_info=True) # env.render() # time.sleep(0.01) ts = ts+time.time() print('measured max. step FPS:', int(10000/ts)) env.close() #+++++++++++++++++++++++++++++++++++++++++++++++++ #reinforment learning algorithm if True: #do some reinforcement learning with exudyn model import gym env = CartPoleEnv(thresholdFactor=5,forceFactor=2) env.useRenderer = False total_timesteps = 1000 #for quick test only; does not stabilize if useGraphics: total_timesteps = 100_000 #works sometimes, may need more steps from stable_baselines3 import A2C model = A2C('MlpPolicy', env, device='cpu', #usually cpu is faster for this size of networks verbose=1) ts = -time.time() model.learn(total_timesteps=total_timesteps) print('time spent=',ts+time.time()) model.save('solution/cartpoleLearn') #%%+++++++++++++++++++++++++++++++++++++++ if useGraphics: env = CartPoleEnv(10)#test with larger threshold env.useRenderer = True obs = env.reset() for i in range(100): action, _state = model.predict(obs, deterministic=True) obs, reward, done, info = env.step(action) env.render() if done: obs = env.reset() time.sleep(0.05) #to see results ... env.close()