.. _examples-testgymdoublependulum: ************************ testGymDoublePendulum.py ************************ You can view and download this file on Github: `testGymDoublePendulum.py `_ .. code-block:: python :linenos: #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # This is an EXUDYN example # # Details: This file shows integration with OpenAI gym by testing a double pendulum example # Needs input file testGymDoublePendulumEnv.py which defines the model in the gym environment # # Author: Johannes Gerstmayr # Date: 2022-05-18 # # Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details. # #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ import time from math import sin, cos from testGymDoublePendulumEnv import DoublePendulumEnv useGraphics = True #%%+++++++++++++++++++++++++++++++++++++++++++++ if False: #test the model by just integrating in Exudyn and apply force env = DoublePendulumEnv() env.useRenderer = True #set this true to show visualization observation, info = env.reset(seed=42, return_info=True) for i in range(10000): force = 0.1*(cos(i/50)) env.integrateStep(force) env.render() time.sleep(0.01) env.close() #%%+++++++++++++++++++++++++++++++++++++++++++++ if False: #testing the model with some random input import gym env = DoublePendulumEnv(5) env.useRenderer = True #set this true to show visualization observation, info = env.reset(seed=42, return_info=True) ts = -time.time() for _ in range(1000): action = env.action_space.sample() observation, reward, done, info = env.step(action)[:4] #accomodate for steps which return > 4 args env.render() if done: observation, info = env.reset(return_info=True) env.close() print('time spent=',ts+time.time()) #%%+++++++++++++++++++++++++++++++++++++++++++++++++ #reinforment learning algorithm #pip install gym[spaces] #pip install pyglet if True: #do some reinforcement learning with exudyn model import gym from stable_baselines3 import A2C total_timesteps = 1000 #for quick test only; does not stabilize if useGraphics: total_timesteps = 1000_000 #works sometimes already good doLearning = True if doLearning: env = DoublePendulumEnv(1) env.useRenderer = False #env = gym.make('CartPole-v1') ts = -time.time() model = A2C('MlpPolicy', env, verbose=1) model.learn(total_timesteps=total_timesteps) print('time spent=',ts+time.time()) #%%++++++++++++++++++++++++ if useGraphics: env = DoublePendulumEnv(10) #allow larger threshold for testing env.useRenderer = True obs = env.reset() for i in range(5000): action, _state = model.predict(obs, deterministic=True) obs, reward, done, info = env.step(action)[:4] env.render() time.sleep(0.01) if done: obs = env.reset() if env.mbs.GetRenderEngineStopFlag(): #stop if user press Q break env.close()