testGymCartpole.py

You can view and download this file on Github: testGymCartpole.py

 1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2# This is an EXUDYN example
 3#
 4# Details:  This file shows integration with OpenAI gym by testing a cart-pole example
 5#           Needs input file testGymCartpoleEnv.py which defines the model in the gym environment
 6#           Works well with Python3.8!
 7#
 8# Author:   Johannes Gerstmayr, Grzegorz Orzechowski
 9# Date:     2022-05-17
10#
11# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
12#
13#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
14
15#+++++++++++++++++++++++++++++++++++++++++++++++++
16#conda create -n venvGym python=3.10 numpy matplotlib spyder-kernels=2.4 ipykernel -y
17#pip install gym[spaces]
18#pip install stable-baselines3==1.7.0
19#pip install exudyn
20
21import time
22from math import sin, cos
23from testGymCartpoleEnv import CartPoleEnv
24
25if True: #test the model by just integrating in Exudyn and apply force
26
27    env = CartPoleEnv()
28    env.useRenderer = False #set this true to show visualization
29    observation, info = env.reset(seed=42, return_info=True)
30    ts = -time.time()
31
32    for i in range(10000):
33        force = 0.1*(cos(i/50))
34        env.integrateStep(force)
35        # action = env.action_space.sample()
36        # observation, reward, done, info = env.step(action)
37        # if done:
38            # observation, info = env.reset(return_info=True)
39        # env.render()
40        # time.sleep(0.01)
41    ts = ts+time.time()
42    print('measured max. step FPS:', int(10000/ts))
43    env.close()
44
45
46#+++++++++++++++++++++++++++++++++++++++++++++++++
47#reinforment learning algorithm
48
49if True: #do some reinforcement learning with exudyn model
50    import gym
51
52    env = CartPoleEnv(thresholdFactor=5,forceFactor=2)
53
54    env.useRenderer = False
55
56    from stable_baselines3 import A2C
57    model = A2C('MlpPolicy', env,
58                device='cpu',  #usually cpu is faster for this size of networks
59                verbose=1)
60    ts = -time.time()
61    model.learn(total_timesteps=10000)
62    print('time spent=',ts+time.time())
63
64    model.save('solution/cartpoleLearn')
65
66    #%%+++++++++++++++++++++++++++++++++++++++
67    env = CartPoleEnv(10)#test with larger threshold
68    env.useRenderer = True
69    obs = env.reset()
70    for i in range(100):
71        action, _state = model.predict(obs, deterministic=True)
72        obs, reward, done, info = env.step(action)
73        env.render()
74        if done:
75          obs = env.reset()
76        time.sleep(0.05) #to see results ...
77
78    env.close()