testGymDoublePendulum.py

You can view and download this file on Github: testGymDoublePendulum.py
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# This is an EXUDYN example
#
# Details:  This file shows integration with OpenAI gym by testing a double pendulum example
#           Needs input file testGymDoublePendulumEnv.py which defines the model in the gym environment
#
# Author:   Johannes Gerstmayr
# Date:     2022-05-18
#
# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
#
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#pip install gym
#pip instal pygame
import time
from math import sin, cos
from testGymDoublePendulumEnv import DoublePendulumEnv

#%%+++++++++++++++++++++++++++++++++++++++++++++
if False: #test the model by just integrating in Exudyn and apply force

    env = DoublePendulumEnv()
    env.useRenderer = True #set this true to show visualization
    observation, info = env.reset(seed=42, return_info=True)

    for i in range(10000):
        force = 0.1*(cos(i/50))
        env.integrateStep(force)
        env.render()
        time.sleep(0.01)

    env.close()


#%%+++++++++++++++++++++++++++++++++++++++++++++
if False: #testing the model with some random input
    import gym
    env = DoublePendulumEnv(5)
    env.useRenderer = True #set this true to show visualization
    observation, info = env.reset(seed=42, return_info=True)

    ts = -time.time()
    for _ in range(1000):
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        env.render()
        if done:
            observation, info = env.reset(return_info=True)
    env.close()

    print('time spent=',ts+time.time())


#%%+++++++++++++++++++++++++++++++++++++++++++++++++
#reinforment learning algorithm
#pip install gym[spaces]
#pip install pyglet

if True: #do some reinforcement learning with exudyn model
    import gym

    from stable_baselines3 import A2C

    doLearning = True
    if doLearning:
        env = DoublePendulumEnv(1)
        env.useRenderer = False
        #env = gym.make('CartPole-v1')

        ts = -time.time()
        model = A2C('MlpPolicy', env, verbose=1)
        model.learn(total_timesteps=10000000)
        print('time spent=',ts+time.time())

    #%%++++++++++++++++++++++++
    env = DoublePendulumEnv(10) #allow larger threshold for testing
    env.useRenderer = True
    obs = env.reset()
    for i in range(5000):
        action, _state = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        env.render()
        time.sleep(0.01)
        if done:
          obs = env.reset()
        if env.mbs.GetRenderEngineStopFlag(): #stop if user press Q
            break

    env.close()