testGymDoublePendulum.py
You can view and download this file on Github: testGymDoublePendulum.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file shows integration with OpenAI gym by testing a double pendulum example
5# Needs input file testGymDoublePendulumEnv.py which defines the model in the gym environment
6#
7# Author: Johannes Gerstmayr
8# Date: 2022-05-18
9#
10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
11#
12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13#pip install gym
14#pip instal pygame
15import time
16from math import sin, cos
17from testGymDoublePendulumEnv import DoublePendulumEnv
18
19#%%+++++++++++++++++++++++++++++++++++++++++++++
20if False: #test the model by just integrating in Exudyn and apply force
21
22 env = DoublePendulumEnv()
23 env.useRenderer = True #set this true to show visualization
24 observation, info = env.reset(seed=42, return_info=True)
25
26 for i in range(10000):
27 force = 0.1*(cos(i/50))
28 env.integrateStep(force)
29 env.render()
30 time.sleep(0.01)
31
32 env.close()
33
34
35#%%+++++++++++++++++++++++++++++++++++++++++++++
36if False: #testing the model with some random input
37 import gym
38 env = DoublePendulumEnv(5)
39 env.useRenderer = True #set this true to show visualization
40 observation, info = env.reset(seed=42, return_info=True)
41
42 ts = -time.time()
43 for _ in range(1000):
44 action = env.action_space.sample()
45 observation, reward, done, info = env.step(action)
46 env.render()
47 if done:
48 observation, info = env.reset(return_info=True)
49 env.close()
50
51 print('time spent=',ts+time.time())
52
53
54#%%+++++++++++++++++++++++++++++++++++++++++++++++++
55#reinforment learning algorithm
56#pip install gym[spaces]
57#pip install pyglet
58
59if True: #do some reinforcement learning with exudyn model
60 import gym
61
62 from stable_baselines3 import A2C
63
64 doLearning = True
65 if doLearning:
66 env = DoublePendulumEnv(1)
67 env.useRenderer = False
68 #env = gym.make('CartPole-v1')
69
70 ts = -time.time()
71 model = A2C('MlpPolicy', env, verbose=1)
72 model.learn(total_timesteps=10000000)
73 print('time spent=',ts+time.time())
74
75 #%%++++++++++++++++++++++++
76 env = DoublePendulumEnv(10) #allow larger threshold for testing
77 env.useRenderer = True
78 obs = env.reset()
79 for i in range(5000):
80 action, _state = model.predict(obs, deterministic=True)
81 obs, reward, done, info = env.step(action)
82 env.render()
83 time.sleep(0.01)
84 if done:
85 obs = env.reset()
86 if env.mbs.GetRenderEngineStopFlag(): #stop if user press Q
87 break
88
89 env.close()