testGymDoublePendulum.py

You can view and download this file on Github: testGymDoublePendulum.py

 1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2# This is an EXUDYN example
 3#
 4# Details:  This file shows integration with OpenAI gym by testing a double pendulum example
 5#           Needs input file testGymDoublePendulumEnv.py which defines the model in the gym environment
 6#
 7# Author:   Johannes Gerstmayr
 8# Date:     2022-05-18
 9#
10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
11#
12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13#pip install gym
14#pip instal pygame
15import time
16from math import sin, cos
17from testGymDoublePendulumEnv import DoublePendulumEnv
18
19#%%+++++++++++++++++++++++++++++++++++++++++++++
20if False: #test the model by just integrating in Exudyn and apply force
21
22    env = DoublePendulumEnv()
23    env.useRenderer = True #set this true to show visualization
24    observation, info = env.reset(seed=42, return_info=True)
25
26    for i in range(10000):
27        force = 0.1*(cos(i/50))
28        env.integrateStep(force)
29        env.render()
30        time.sleep(0.01)
31
32    env.close()
33
34
35#%%+++++++++++++++++++++++++++++++++++++++++++++
36if False: #testing the model with some random input
37    import gym
38    env = DoublePendulumEnv(5)
39    env.useRenderer = True #set this true to show visualization
40    observation, info = env.reset(seed=42, return_info=True)
41
42    ts = -time.time()
43    for _ in range(1000):
44        action = env.action_space.sample()
45        observation, reward, done, info = env.step(action)
46        env.render()
47        if done:
48            observation, info = env.reset(return_info=True)
49    env.close()
50
51    print('time spent=',ts+time.time())
52
53
54#%%+++++++++++++++++++++++++++++++++++++++++++++++++
55#reinforment learning algorithm
56#pip install gym[spaces]
57#pip install pyglet
58
59if True: #do some reinforcement learning with exudyn model
60    import gym
61
62    from stable_baselines3 import A2C
63
64    doLearning = True
65    if doLearning:
66        env = DoublePendulumEnv(1)
67        env.useRenderer = False
68        #env = gym.make('CartPole-v1')
69
70        ts = -time.time()
71        model = A2C('MlpPolicy', env, verbose=1)
72        model.learn(total_timesteps=10000000)
73        print('time spent=',ts+time.time())
74
75    #%%++++++++++++++++++++++++
76    env = DoublePendulumEnv(10) #allow larger threshold for testing
77    env.useRenderer = True
78    obs = env.reset()
79    for i in range(5000):
80        action, _state = model.predict(obs, deterministic=True)
81        obs, reward, done, info = env.step(action)
82        env.render()
83        time.sleep(0.01)
84        if done:
85          obs = env.reset()
86        if env.mbs.GetRenderEngineStopFlag(): #stop if user press Q
87            break
88
89    env.close()