openAIgymTriplePendulum.py

You can view and download this file on Github: openAIgymTriplePendulum.py

  1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2# This is an EXUDYN example
  3#
  4# Details:  This file shows integration with OpenAI gym by testing a triple pendulum example
  5#
  6# Author:   Johannes Gerstmayr
  7# Date:     2022-05-18
  8#
  9# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
 10#
 11#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 12
 13
 14import exudyn as exu
 15from exudyn.utilities import *
 16from exudyn.artificialIntelligence import *
 17import math
 18
 19
 20class InvertedTriplePendulumEnv(OpenAIGymInterfaceEnv):
 21
 22    #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
 23    #                 you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
 24    def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
 25
 26        #%%++++++++++++++++++++++++++++++++++++++++++++++
 27        #this model uses kwargs: thresholdFactor
 28        thresholdFactor = 3
 29        if 'thresholdFactor' in kwargs:
 30            thresholdFactor = kwargs['thresholdFactor']
 31
 32        gravity = 9.81
 33        self.length = 1.
 34        width = 0.1*self.length
 35        masscart = 1.
 36        massarm = 0.1
 37        total_mass = massarm + masscart
 38        armInertia = self.length**2*0.5*massarm
 39        self.force_mag = 10.0*2 #must be larger for triple pendulum to be more reactive ...
 40        self.stepUpdateTime = 0.02  # seconds between state updates
 41
 42        background = GraphicsDataCheckerBoard(point= [0,0.5*self.length,-0.5*width],
 43                                              normal= [0,0,1], size=10, size2=6, nTiles=20, nTiles2=12)
 44
 45        oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0],  #x-pos,y-pos,angle
 46                                           visualization=VObjectGround(graphicsData= [background])))
 47        nGround=self.mbs.AddNode(NodePointGround())
 48
 49        gCart = GraphicsDataOrthoCubePoint(size=[0.5*self.length, width, width],
 50                                           color=color4dodgerblue)
 51        self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
 52        oCart = self.mbs.AddObject(RigidBody2D(physicsMass=masscart,
 53                                          physicsInertia=0.1*masscart, #not needed
 54                                          nodeNumber=self.nCart,
 55                                          visualization=VObjectRigidBody2D(graphicsData= [gCart])))
 56        mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
 57
 58        gArm1 = GraphicsDataOrthoCubePoint(size=[width, self.length, width], color=color4red)
 59        gArm1joint = GraphicsDataCylinder(pAxis=[0,-0.5*self.length,-0.6*width], vAxis=[0,0,1.2*width],
 60                                          radius=0.0625*self.length, color=color4darkgrey)
 61        self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
 62        oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
 63                                          physicsInertia=armInertia, #not included in original paper
 64                                          nodeNumber=self.nArm1,
 65                                          visualization=VObjectRigidBody2D(graphicsData= [gArm1, gArm1joint])))
 66
 67        mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
 68        mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
 69        mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
 70
 71        gArm2 = GraphicsDataOrthoCubePoint(size=[width, self.length, width], color=color4red)
 72        self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
 73        oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
 74                                          physicsInertia=armInertia, #not included in original paper
 75                                          nodeNumber=self.nArm2,
 76                                          visualization=VObjectRigidBody2D(graphicsData= [gArm2, gArm1joint])))
 77
 78        mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
 79        mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
 80        mArm2JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0, 0.5*self.length,0]))
 81
 82        gArm3 = GraphicsDataOrthoCubePoint(size=[width, self.length, width], color=color4red)
 83        self.nArm3 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,2.5*self.length,0]));
 84        oArm3 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
 85                                          physicsInertia=armInertia, #not included in original paper
 86                                          nodeNumber=self.nArm3,
 87                                          visualization=VObjectRigidBody2D(graphicsData= [gArm3, gArm1joint])))
 88
 89        mArm3COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm3))
 90        mArm3Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm3, localPosition=[0,-0.5*self.length,0]))
 91
 92        mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
 93        mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
 94        mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
 95
 96        #gravity
 97        self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-masscart*gravity,0]))
 98        self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-massarm*gravity,0]))
 99        self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-massarm*gravity,0]))
100        self.mbs.AddLoad(Force(markerNumber=mArm3COM, loadVector=[0,-massarm*gravity,0]))
101
102        #control force
103        self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
104
105        #joints and constraints:
106        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
107        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
108        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm2JointB, mArm3Joint]))
109
110        self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
111
112
113
114
115        #%%++++++++++++++++++++++++
116        self.mbs.Assemble() #computes initial vector
117
118        self.simulationSettings.timeIntegration.numberOfSteps = 1
119        self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
120        self.simulationSettings.timeIntegration.verboseMode = 0
121        self.simulationSettings.solutionSettings.writeSolutionToFile = False
122        #self.simulationSettings.timeIntegration.simulateInRealtime = True
123
124        self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
125
126        self.SC.visualizationSettings.general.drawWorldBasis=True
127        self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
128        self.SC.visualizationSettings.openGL.multiSampling=4
129
130        #self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
131
132        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
133        # Angle at which to fail the episode
134        # these parameters are used in subfunctions
135        self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
136        self.x_threshold = thresholdFactor*2.4
137
138        #must return state size
139        stateSize = 8 #the number of states (position/velocity that are used by learning algorithm)
140        return stateSize
141
142    #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
143    def SetupSpaces(self):
144
145        high = np.array(
146            [
147                self.x_threshold * 2,
148                np.finfo(np.float32).max,
149                self.theta_threshold_radians * 2,
150                np.finfo(np.float32).max,
151                self.theta_threshold_radians * 2,
152                np.finfo(np.float32).max,
153                self.theta_threshold_radians * 2,
154                np.finfo(np.float32).max,
155            ],
156            dtype=np.float32,
157        )
158
159        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
160        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
161        #for Env:
162        self.action_space = spaces.Discrete(2)
163        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
164        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
165
166
167    #**classFunction: OVERRIDE this function to map the action given by learning algorithm to the multibody system, e.g. as a load parameter
168    def MapAction2MBS(self, action):
169        force = self.force_mag if action == 1 else -self.force_mag
170        self.mbs.SetLoadParameter(self.lControl, 'load', force)
171
172    #**classFunction: OVERRIDE this function to collect output of simulation and map to self.state tuple
173    #**output: return bool done which contains information if system state is outside valid range
174    def Output2StateAndDone(self):
175
176        #+++++++++++++++++++++++++
177        #compute some output:
178        cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
179        arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
180        arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
181        arm3Angle = self.mbs.GetNodeOutput(self.nArm3, variableType=exu.OutputVariableType.Coordinates)[2]
182        cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
183        arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
184        arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
185        arm3Angle_t = self.mbs.GetNodeOutput(self.nArm3, variableType=exu.OutputVariableType.Coordinates_t)[2]
186
187        #finally write updated state:
188        self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t, arm3Angle, arm3Angle_t)
189        #++++++++++++++++++++++++++++++++++++++++++++++++++
190
191        done = bool(
192            cartPosX < -self.x_threshold
193            or cartPosX > self.x_threshold
194            or arm1Angle < -self.theta_threshold_radians
195            or arm1Angle > self.theta_threshold_radians
196            or arm2Angle < -self.theta_threshold_radians
197            or arm2Angle > self.theta_threshold_radians
198            or arm3Angle < -self.theta_threshold_radians
199            or arm3Angle > self.theta_threshold_radians
200        )
201        return done
202
203
204    #**classFunction: OVERRIDE this function to maps the current state to mbs initial values
205    #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
206    def State2InitialValues(self):
207        #+++++++++++++++++++++++++++++++++++++++++++++
208        #set specific initial state:
209        (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t, phiArm3, phiArm3_t) = self.state
210
211        initialValues = np.zeros(12) #model has 4*3 redundant states
212        initialValues_t = np.zeros(12)
213
214        #build redundant cordinates from self.state
215        initialValues[0] = xCart
216        initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
217        initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
218        initialValues[3+2] = phiArm1
219
220        initialValues[6+0] = xCart - self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
221        initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
222        initialValues[6+2] = phiArm2
223
224        initialValues[9+0] = xCart - self.length * sin(phiArm1) - self.length * sin(phiArm2) - 0.5*self.length * sin(phiArm3)
225        initialValues[9+1] = self.length * cos(phiArm1) + self.length * cos(phiArm2) + 0.5*self.length * cos(phiArm3) - 2.5*self.length
226        initialValues[9+2] = phiArm3
227
228        initialValues_t[0] = xCart_t
229        initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
230        initialValues_t[3+1] = -0.5*self.length * sin(phiArm1)  * phiArm1_t
231        initialValues_t[3+2] = phiArm1_t
232
233        initialValues_t[6+0] = xCart_t - phiArm1_t*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
234        initialValues_t[6+1] = -self.length * sin(phiArm1)  * phiArm1_t - 0.5*self.length * sin(phiArm2)  * phiArm2_t
235        initialValues_t[6+2] = phiArm2_t
236
237        initialValues_t[9+0] = xCart_t - phiArm1_t*self.length * cos(phiArm1) - phiArm2_t*self.length * cos(phiArm2) - phiArm3_t*0.5*self.length * cos(phiArm3)
238        initialValues_t[9+1] = -self.length * sin(phiArm1)  * phiArm1_t - self.length * sin(phiArm2)  * phiArm2_t - 0.5*self.length * sin(phiArm3)  * phiArm3_t
239        initialValues_t[9+2] = phiArm3_t
240
241        return [initialValues,initialValues_t]
242
243
244
245
246
247
248
249
250
251#%%+++++++++++++++++++++++++++++++++++++++++++++
252if __name__ == '__main__': #this is only executed when file is direct called in Python
253    import time
254
255
256    #%%++++++++++++++++++++++++++++++++++++++++++++++++++
257    #use some learning algorithm:
258    #pip install stable_baselines3
259    from stable_baselines3 import A2C
260
261
262        #create model and do reinforcement learning
263    if False: #'scalar' environment:
264        env = InvertedTriplePendulumEnv() #(thresholdFactor=2)
265        #check if model runs:
266        # env.TestModel(numberOfSteps=1000, seed=42)
267
268        #main learning task; 1e7 steps take 2-3 hours
269        model = A2C('MlpPolicy',
270                    env,
271                    device='cpu',  #usually cpu is faster for this size of networks
272                    #device='cuda',  #usually cpu is faster for this size of networks
273                    verbose=1)
274        ts = -time.time()
275        model.learn(total_timesteps=2000)
276        #model.learn(total_timesteps=2e7)  #not sufficient ...
277        print('*** learning time total =',ts+time.time(),'***')
278
279        #save learned model
280        model.save("openAIgymTriplePendulum1e7d")
281    else:
282        #create vectorized environment, which is much faster for time
283        #  consuming environments (otherwise learning algo may be the bottleneck)
284        #  https://www.programcreek.com/python/example/121472/stable_baselines.common.vec_env.SubprocVecEnv
285        import torch #stable-baselines3 is based on pytorch
286        n_cores=14 #should be number of real cores (not threads)
287        torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
288
289        #test problem with nSteps=400 in time integration
290        #1 core: learning time total = 28.73 seconds
291        #4 core: learning time total = 8.10
292        #8 core: learning time total = 4.48
293        #14 core:learning time total = 3.77
294        #standard DummyVecEnv version: 15.14 seconds
295        print('using',n_cores,'cores')
296
297        from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
298        vecEnv = SubprocVecEnv([InvertedTriplePendulumEnv for i in range(n_cores)])
299
300
301        #main learning task; 1e7 steps take 2-3 hours
302        model = A2C('MlpPolicy',
303                    vecEnv,
304                    device='cpu',  #usually cpu is faster for this size of networks
305                    #device='cuda',  #optimal with 64 SubprocVecEnv, torch.set_num_threads(1)
306                    verbose=1)
307        ts = -time.time()
308        print('start learning...')
309        #model.learn(total_timesteps=50000)
310        model.learn(total_timesteps=7e7)  #not sufficient ...
311        print('*** learning time total =',ts+time.time(),'***')
312
313        #save learned model
314        model.save("openAIgymTriplePendulum1e7d")
315
316    if False:
317        #%%++++++++++++++++++++++++++++++++++++++++++++++++++
318        #only load and test
319        model = A2C.load("openAIgymTriplePendulum1e7")
320        env = InvertedTriplePendulumEnv(thresholdFactor=15) #larger threshold for testing
321        solutionFile='solution/learningCoordinates.txt'
322        env.TestModel(numberOfSteps=2500, model=model, solutionFileName=solutionFile,
323                      stopIfDone=False, useRenderer=False, sleepTime=0) #just compute solution file
324
325        #++++++++++++++++++++++++++++++++++++++++++++++
326        #visualize (and make animations) in exudyn:
327        from exudyn.interactive import SolutionViewer
328        env.SC.visualizationSettings.general.autoFitScene = False
329        solution = LoadSolutionFile(solutionFile)
330        SolutionViewer(env.mbs, solution) #loads solution file via name stored in mbs