openAIgymInterfaceTest.py

You can view and download this file on Github: openAIgymInterfaceTest.py

  1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2# This is an EXUDYN example
  3#
  4# Details:  This file shows integration with OpenAI gym by testing a double pendulum example
  5#
  6# Author:   Johannes Gerstmayr
  7# Date:     2022-05-18
  8#
  9# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
 10#
 11#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 12
 13#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 14#to run this example the following worked:
 15#conda create -n venvGym python=3.10 numpy matplotlib spyder-kernels=2.4 ipykernel -y
 16#pip install gym[spaces]
 17#pip install stable-baselines3==1.7.0
 18#pip install exudyn
 19
 20import exudyn as exu
 21from exudyn.utilities import *
 22from exudyn.artificialIntelligence import *
 23import math
 24
 25# import sys
 26# sys.exit()
 27
 28#%%++++++++++++++++++++++++++++++++++++++++++++++
 29
 30#**class: interface class to set up Exudyn model which can be used as model in open AI gym;
 31#         see specific class functions which contain 'OVERRIDE' to integrate your model;
 32#         in general, set up a model with CreateMBS(), map state to initial values, initial values to state and action to mbs;
 33class InvertedDoublePendulumEnv(OpenAIGymInterfaceEnv):
 34
 35    #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
 36    #                 you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
 37    def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
 38
 39        #this model uses kwargs: thresholdFactor
 40        thresholdFactor = kwargs['thresholdFactor']
 41        gravity = 9.81
 42        self.length = 1.
 43        width = 0.1*self.length
 44        masscart = 1.
 45        massarm = 0.1
 46        total_mass = massarm + masscart
 47        armInertia = self.length**2*0.5*massarm
 48        self.force_mag = 10.0
 49        self.stepUpdateTime = 0.02  # seconds between state updates
 50
 51        background = GraphicsDataCheckerBoard(point= [0,0.5*self.length,-0.5*width],
 52                                              normal= [0,0,1], size=10, size2=6, nTiles=20, nTiles2=12)
 53
 54        oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0],  #x-pos,y-pos,angle
 55                                           visualization=VObjectGround(graphicsData= [background])))
 56        nGround=self.mbs.AddNode(NodePointGround())
 57
 58        gCart = GraphicsDataOrthoCubePoint(size=[0.5*self.length, width, width],
 59                                           color=color4dodgerblue)
 60        self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
 61        oCart = self.mbs.AddObject(RigidBody2D(physicsMass=masscart,
 62                                          physicsInertia=0.1*masscart, #not needed
 63                                          nodeNumber=self.nCart,
 64                                          visualization=VObjectRigidBody2D(graphicsData= [gCart])))
 65        mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
 66
 67        gArm1 = GraphicsDataOrthoCubePoint(size=[width, self.length, width], color=color4red)
 68        gArm1joint = GraphicsDataCylinder(pAxis=[0,-0.5*self.length,-0.6*width], vAxis=[0,0,1.2*width],
 69                                          radius=0.0625*self.length, color=color4darkgrey)
 70        self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
 71        oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
 72                                          physicsInertia=armInertia, #not included in original paper
 73                                          nodeNumber=self.nArm1,
 74                                          visualization=VObjectRigidBody2D(graphicsData= [gArm1, gArm1joint])))
 75
 76        mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
 77        mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
 78        mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
 79
 80        gArm2 = GraphicsDataOrthoCubePoint(size=[width, self.length, width], color=color4red)
 81        self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
 82        oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=massarm,
 83                                          physicsInertia=armInertia, #not included in original paper
 84                                          nodeNumber=self.nArm2,
 85                                          visualization=VObjectRigidBody2D(graphicsData= [gArm2, gArm1joint])))
 86
 87        mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
 88        mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
 89
 90        mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
 91        mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
 92        mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
 93
 94        #gravity
 95        self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-masscart*gravity,0]))
 96        self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-massarm*gravity,0]))
 97        self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-massarm*gravity,0]))
 98
 99        #control force
100        self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
101
102        #joints and constraints:
103        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
104        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
105        self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
106
107
108
109
110        #%%++++++++++++++++++++++++
111        self.mbs.Assemble() #computes initial vector
112
113        self.simulationSettings.timeIntegration.numberOfSteps = 1
114        self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
115        self.simulationSettings.timeIntegration.verboseMode = 0
116        self.simulationSettings.solutionSettings.writeSolutionToFile = False
117        #self.simulationSettings.timeIntegration.simulateInRealtime = True
118
119        self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
120
121        self.SC.visualizationSettings.general.drawWorldBasis=True
122        self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
123        self.SC.visualizationSettings.openGL.multiSampling=4
124
125        #self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
126
127        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
128        # Angle at which to fail the episode
129        # these parameters are used in subfunctions
130        self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
131        self.x_threshold = thresholdFactor*2.4
132
133        #must return state size
134        stateSize = 6 #the number of states (position/velocity that are used by learning algorithm)
135        return stateSize
136
137    #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
138    def SetupSpaces(self):
139
140        high = np.array(
141            [
142                self.x_threshold * 2,
143                np.finfo(np.float32).max,
144                self.theta_threshold_radians * 2,
145                np.finfo(np.float32).max,
146                self.theta_threshold_radians * 2,
147                np.finfo(np.float32).max,
148            ],
149            dtype=np.float32,
150        )
151
152        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
153        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
154        #for Env:
155        self.action_space = spaces.Discrete(2)
156        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
157        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
158
159
160    #**classFunction: OVERRIDE this function to map the action given by learning algorithm to the multibody system, e.g. as a load parameter
161    def MapAction2MBS(self, action):
162        force = self.force_mag if action == 1 else -self.force_mag
163        self.mbs.SetLoadParameter(self.lControl, 'load', force)
164
165    #**classFunction: OVERRIDE this function to collect output of simulation and map to self.state tuple
166    #**output: return bool done which contains information if system state is outside valid range
167    def Output2StateAndDone(self):
168
169        #+++++++++++++++++++++++++
170        #compute some output:
171        cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
172        arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
173        arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
174        cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
175        arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
176        arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
177
178        #finally write updated state:
179        self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
180        #++++++++++++++++++++++++++++++++++++++++++++++++++
181
182        done = bool(
183            cartPosX < -self.x_threshold
184            or cartPosX > self.x_threshold
185            or arm1Angle < -self.theta_threshold_radians
186            or arm1Angle > self.theta_threshold_radians
187            or arm2Angle < -self.theta_threshold_radians
188            or arm2Angle > self.theta_threshold_radians
189        )
190        return done
191
192
193    #**classFunction: OVERRIDE this function to maps the current state to mbs initial values
194    #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
195    def State2InitialValues(self):
196        #+++++++++++++++++++++++++++++++++++++++++++++
197        #set specific initial state:
198        (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state
199
200        initialValues = np.zeros(9) #model has 3*3 = 9  redundant states
201        initialValues_t = np.zeros(9)
202
203        #build redundant cordinates from self.state
204        initialValues[0] = xCart
205        initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
206        initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
207        initialValues[3+2] = phiArm1
208        initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
209        initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
210        initialValues[6+2] = phiArm2
211
212        initialValues_t[0] = xCart_t
213        initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
214        initialValues_t[3+1] = -0.5*self.length * sin(phiArm1)  * phiArm1_t
215        initialValues_t[3+2] = phiArm1_t
216        initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
217        initialValues_t[6+1] = -self.length * sin(phiArm1)  * phiArm1_t - 0.5*self.length * sin(phiArm2)  * phiArm2_t
218        initialValues_t[6+2] = phiArm2_t
219
220        return [initialValues,initialValues_t]
221
222
223
224
225
226
227
228
229
230#%%+++++++++++++++++++++++++++++++++++++++++++++
231if __name__ == '__main__': #this is only executed when file is direct called in Python
232    import time
233
234
235    #%%++++++++++++++++++++++++++++++++++++++++++++++++++
236    #create model and do reinforcement learning
237    env = InvertedDoublePendulumEnv(thresholdFactor=1)
238
239    #check if model runs:
240    #env.TestModel(numberOfSteps=1000, seed=42)
241
242    #use some learning algorithm:
243    from stable_baselines3 import A2C
244
245    #main learning task; 1e7 steps take 2-3 hours
246    ts = -time.time()
247    model = A2C('MlpPolicy', env, verbose=1)
248    model.learn(total_timesteps=1e4) #1e6 may work, 2e7 is quite robust!
249    print('*** learning time total =',ts+time.time(),'***')
250
251    #save learned model
252    model.save("openAIgymDoublePendulum1e4")
253    del model
254
255    #%%++++++++++++++++++++++++++++++++++++++++++++++++++
256    #only load and test
257    model = A2C.load("openAIgymDoublePendulum1e5")
258    env = InvertedDoublePendulumEnv(thresholdFactor=15) #larger threshold for testing
259    solutionFile='solution/learningCoordinates.txt'
260    env.TestModel(numberOfSteps=2500, model=model, solutionFileName=solutionFile,
261                  stopIfDone=False, useRenderer=True, sleepTime=0) #just compute solution file
262
263    #++++++++++++++++++++++++++++++++++++++++++++++
264    #visualize (and make animations) in exudyn:
265    from exudyn.interactive import SolutionViewer
266    env.SC.visualizationSettings.general.autoFitScene = False
267    solution = LoadSolutionFile(solutionFile)
268    env.mbs.SolutionViewer(solution) #loads solution file via name stored in mbs