openAIgymNLinkContinuous.py

You can view and download this file on Github: openAIgymNLinkContinuous.py

  1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2# This is an EXUDYN example
  3#
  4# Details:  This file shows integration with OpenAI gym by testing a inverted quadruple pendulum example
  5#
  6# Author:    Johannes Gerstmayr
  7# edited by: Peter Manzl
  8# Date:      2022-05-25
  9#
 10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
 11#
 12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 13
 14# import sys
 15# sys.exudynFast = True #this variable is used to signal to load the fast exudyn module
 16
 17import exudyn as exu
 18from exudyn.utilities import *
 19from exudyn.robotics import *
 20from exudyn.artificialIntelligence import *
 21import math
 22
 23import os
 24os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 25
 26##%% here the number of links can be changed. Note that for n < 3 the actuator
 27# force might need to be increased
 28nLinks = 2 #number of inverted links ...
 29flagContinuous = True  # to choose for agent between A2C and SAC
 30
 31
 32class InvertedNPendulumEnv(OpenAIGymInterfaceEnv):
 33
 34    #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
 35    #                 you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
 36    def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
 37
 38        #%%++++++++++++++++++++++++++++++++++++++++++++++
 39        #this model uses kwargs: thresholdFactor
 40
 41        global nLinks
 42        global flagContinuous
 43        self.nLinks = nLinks
 44        self.flagContinuous = flagContinuous
 45        self.nTotalLinks = nLinks+1
 46        # self.steps_beyond_done = False
 47        thresholdFactor = 1
 48
 49        gravity = 9.81
 50        self.length = 1.
 51        width = 0.1*self.length
 52        masscart = 1.
 53        massarm = 0.1
 54        total_mass = massarm + masscart
 55        armInertia = self.length**2*0.5*massarm
 56
 57        # environment variables  and force magnitudes and are taken from the
 58        # paper "Reliability evaluation of reinforcement learning methods for
 59        # mechanical systems with increasing complexity", Manzl et al.
 60
 61        self.force_mag = 40 # 10 #10*2 works for 7e7 steps; must be larger for triple pendulum to be more reactive ...
 62        if self.nLinks == 1:
 63            self.force_mage = 12
 64        if self.nLinks == 3:
 65            self.force_mag = self.force_mag*1.5
 66            thresholdFactor = 2
 67        if self.nLinks == 4:
 68            self.force_mag = self.force_mag*3
 69            thresholdFactor = 5
 70
 71        if 'thresholdFactor' in kwargs:
 72            thresholdFactor = kwargs['thresholdFactor']
 73
 74        self.stepUpdateTime = 0.02  # seconds between state updates
 75
 76        background = GraphicsDataCheckerBoard(point= [0,0.5*self.length,-0.5*width],
 77                                              normal= [0,0,1], size=10, size2=6, nTiles=20, nTiles2=12)
 78
 79        oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0],  #x-pos,y-pos,angle
 80                                           visualization=VObjectGround(graphicsData= [background])))
 81
 82
 83        L = self.length
 84        w = width
 85        gravity3D = [0.,-gravity,0]
 86        graphicsBaseList = [GraphicsDataOrthoCubePoint(size=[L*4, 0.8*w, 0.8*w], color=color4grey)] #rail
 87
 88        newRobot = Robot(gravity=gravity3D,
 89                      base = RobotBase(visualization=VRobotBase(graphicsData=graphicsBaseList)),
 90                      tool = RobotTool(HT=HTtranslate([0,0.5*L,0]), visualization=VRobotTool(graphicsData=[
 91                          GraphicsDataOrthoCubePoint(size=[w, L, w], color=color4orange)])),
 92                      referenceConfiguration = []) #referenceConfiguration created with 0s automatically
 93
 94        #cart:
 95        Jlink = RigidBodyInertia(masscart, np.diag([0.1*masscart,0.1*masscart,0.1*masscart]), [0,0,0])
 96        link = RobotLink(Jlink.Mass(), Jlink.COM(), Jlink.InertiaCOM(),
 97                         jointType='Px', preHT=HT0(),
 98                         # PDcontrol=(pControl, dControl),
 99                         visualization=VRobotLink(linkColor=color4lawngreen))
100        newRobot.AddLink(link)
101
102
103
104        for i in range(self.nLinks):
105
106            Jlink = RigidBodyInertia(massarm, np.diag([armInertia,0.1*armInertia,armInertia]), [0,0.5*L,0]) #only inertia_ZZ is important
107            #Jlink = Jlink.Translated([0,0.5*L,0])
108            preHT = HT0()
109            if i > 0:
110                preHT = HTtranslateY(L)
111
112            link = RobotLink(Jlink.Mass(), Jlink.COM(), Jlink.InertiaCOM(),
113                             jointType='Rz', preHT=preHT,
114                             #PDcontrol=(pControl, dControl),
115                             visualization=VRobotLink(linkColor=color4blue))
116            newRobot.AddLink(link)
117
118        self.Jlink = Jlink
119
120
121        sKT = []
122        dKT = newRobot.CreateKinematicTree(mbs)
123        self.oKT = dKT['objectKinematicTree']
124        self.nKT = dKT['nodeGeneric']
125
126        # sKT+=[mbs.AddSensor(SensorKinematicTree(objectNumber=oKT, linkNumber=self.nTotalLinks-1,
127        #                                         localPosition=locPos, storeInternal=True,
128        #                                         outputVariableType=exu.OutputVariableType.Position))]
129
130        #control force
131        mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nKT, coordinate=0))
132        self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=0.))
133
134        #%%++++++++++++++++++++++++
135        self.mbs.Assemble() #computes initial vector
136
137        self.simulationSettings.timeIntegration.numberOfSteps = 1
138        self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
139        self.simulationSettings.timeIntegration.verboseMode = 0
140        self.simulationSettings.solutionSettings.writeSolutionToFile = False
141        #self.simulationSettings.timeIntegration.simulateInRealtime = True
142
143        self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
144
145        self.SC.visualizationSettings.general.drawWorldBasis=True
146        self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01
147        self.SC.visualizationSettings.openGL.multiSampling=4
148
149        #self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
150
151        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
152        # Angle at which to fail the episode
153        # these parameters are used in subfunctions
154        self.theta_threshold_radians = thresholdFactor* 18 * 2 * math.pi / 360
155        self.x_threshold = thresholdFactor*3.6
156
157        #must return state size
158        stateSize = (self.nTotalLinks)*2 #the number of states (position/velocity that are used by learning algorithm)
159
160        return stateSize
161
162    #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
163    def SetupSpaces(self):
164
165        high = np.array(
166            [
167                self.x_threshold * 2,
168            ] +
169            [
170                self.theta_threshold_radians * 2,
171            ] * self.nLinks +
172            [
173                np.finfo(np.float32).max,
174            ] * self.nTotalLinks
175            ,
176            dtype=np.float32,
177        )
178
179
180        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
181        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
182        #for Env:
183        if flagContinuous:
184            self.action_space = spaces.Box(low=np.array([-1 ], dtype=np.float32),
185                                           high=np.array([1], dtype=np.float32), dtype=np.float32)
186        else:
187            self.action_space = spaces.Discrete(2)
188
189        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
190        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
191
192
193    #**classFunction: this function is overwritten to map the action given by learning algorithm to the multibody system (environment)
194    def MapAction2MBS(self, action):
195        if flagContinuous:
196            force = action[0] * self.force_mag
197            # print(force)
198        else:
199            force = self.force_mag if action == 1 else -self.force_mag
200        self.mbs.SetLoadParameter(self.lControl, 'load', force)
201
202    #**classFunction: this function is overwrritten to collect output of simulation and map to self.state tuple
203    #**output: return bool done which contains information if system state is outside valid range
204    def Output2StateAndDone(self):
205
206        #+++++++++++++++++++++++++
207        statesVector =  self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates)
208        statesVector_t =  self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates_t)
209        self.state = tuple(list(statesVector) + list(statesVector_t)) #sorting different from previous implementation
210        cartPosX = statesVector[0]
211
212        done = bool(
213            cartPosX < -self.x_threshold
214            or cartPosX > self.x_threshold
215            or max(statesVector[1:self.nTotalLinks]) > self.theta_threshold_radians
216            or min(statesVector[1:self.nTotalLinks]) < -self.theta_threshold_radians
217            )
218        # print("cartPosX: ", cartPosX, ", done: ", done)
219        return done
220
221
222    #**classFunction: OVERRIDE this function to maps the current state to mbs initial values
223    #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
224    def State2InitialValues(self):
225        #+++++++++++++++++++++++++++++++++++++++++++++
226        initialValues = self.state[0:self.nTotalLinks]
227        initialValues_t = self.state[self.nTotalLinks:]
228
229        return [initialValues,initialValues_t]
230
231    def getReward(self):
232        reward = 1 - 0.5 * abs(self.state[0])/self.x_threshold - 0.5 * abs(self.state[1]) / self.theta_threshold_radians
233        return reward
234
235    #**classFunction: openAI gym interface function which is called to compute one step
236    def step(self, action):
237        err_msg = f"{action!r} ({type(action)}) invalid"
238        assert self.action_space.contains(action), err_msg
239        assert self.state is not None, "Call reset before using step method."
240
241        #++++++++++++++++++++++++++++++++++++++++++++++++++
242        #main steps:
243        [initialValues,initialValues_t] = self.State2InitialValues()
244        self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
245        self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
246
247        self.MapAction2MBS(action)
248
249        #this may be time consuming for larger models!
250        self.IntegrateStep()
251
252        done = self.Output2StateAndDone()
253        # print('state:', self.state, 'done: ', done)
254        #++++++++++++++++++++++++++++++++++++++++++++++++++
255        #compute reward and done
256        # chi_x = []
257        # chi_phi = [0.1, 0.1]
258        x = self.state[0]
259        phi = self.state[1:3]
260        # self.state [0] +
261        # if done:
262            # print('done at step {}'.format(self.step))
263        if not done:
264
265            reward = self.getReward()
266        elif self.steps_beyond_done is None:
267            # Arm1 just fell!
268            self.steps_beyond_done = 0
269            reward = self.getReward()
270        else:
271
272            if self.steps_beyond_done == 0:
273                logger.warn(
274                    "You are calling 'step()' even though this "
275                    "environment has already returned done = True. You "
276                    "should always call 'reset()' once you receive 'done = "
277                    "True' -- any further steps are undefined behavior."
278                )
279            self.steps_beyond_done += 1
280            reward = 0.0
281
282        return np.array(self.state, dtype=np.float32), reward, done, {}
283
284
285
286
287
288
289#%%+++++++++++++++++++++++++++++++++++++++++++++
290if __name__ == '__main__': #this is only executed when file is direct called in Python
291    import time
292
293    #%%++++++++++++++++++++++++++++++++++++++++++++++++++
294    #use some learning algorithm:
295    #pip install stable_baselines3
296    from stable_baselines3 import A2C, SAC
297
298
299    # here the model is loaded (either for vectorized or scalar environment´using SAC or A2C).
300    def getModel(flagContinuous, myEnv, modelType='SAC'):
301        if flagContinuous :
302            if modelType=='SAC':
303                model = SAC('MlpPolicy',
304                       env=myEnv,
305                       learning_rate=8e-4,
306                       device='cpu', #usually cpu is faster for this size of networks
307                       batch_size=128,
308                       verbose=1)
309            elif modelType == 'A2C':
310                model = A2C('MlpPolicy',
311                        myEnv,
312                        device='cpu',
313                        verbose=1)
314            else:
315                print('Please specify the modelType.')
316                raise ValueError
317        else:
318            model = A2C('MlpPolicy',
319                    myEnv,
320                    device='cpu',  #usually cpu is faster for this size of networks
321                    #device='cuda',  #optimal with 64 SubprocVecEnv, torch.set_num_threads(1)
322                    verbose=1)
323        return model
324
325
326    #create model and do reinforcement learning
327    modelName = 'openAIgym{}Link{}'.format(nLinks, 'Continuous'*flagContinuous)
328    if False: #'scalar' environment:
329        env = InvertedNPendulumEnv()
330        #check if model runs:
331        #env.SetSolver(exu.DynamicSolverType.ExplicitMidpoint)
332        #env.SetSolver(exu.DynamicSolverType.RK44) #very acurate
333        #env.TestModel(numberOfSteps=200, seed=42, sleepTime=0.02*0, useRenderer=True)
334        model = getModel(flagContinuous, env)
335        # env.useRenderer = True
336        # env.render()
337
338        ts = -time.time()
339        model.learn(total_timesteps=20000)
340
341        print('*** learning time total =',ts+time.time(),'***')
342
343        #save learned model
344
345        model.save("solution/" + modelName)
346    else:
347        import torch #stable-baselines3 is based on pytorch
348        n_cores= max(1,int(os.cpu_count()/2-1)) #should be number of real cores (not threads)
349        torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
350
351        print('using',n_cores,'cores')
352
353        from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
354        vecEnv = SubprocVecEnv([InvertedNPendulumEnv for i in range(n_cores)])
355
356
357        #main learning task;  with 20 cores 800 000 steps take in the continous
358        # case approximatly 18 minutes (SAC), discrete (A2C) takes 2 minutes.
359        model = getModel(flagContinuous, vecEnv, modelType='SAC')
360
361        ts = -time.time()
362        print('start learning of agent with {}'.format(str(model.policy).split('(')[0]))
363        # model.learn(total_timesteps=50000)
364        model.learn(total_timesteps=int(1_000_000))
365        print('*** learning time total =',ts+time.time(),'***')
366
367        #save learned model
368        model.save("solution/" + modelName)
369
370    if True:
371        #%%++++++++++++++++++++++++++++++++++++++++++++++++++
372        #only load and test
373        if False:
374            if flagContinuous and modelType == 'A2C':
375                model = SAC.load("solution/" + modelName)
376            else:
377                model = A2C.load("solution/" + modelName)
378
379        env = InvertedNPendulumEnv(thresholdFactor=5) #larger threshold for testing
380        solutionFile='solution/learningCoordinates.txt'
381        env.TestModel(numberOfSteps=1000, model=model, solutionFileName=solutionFile,
382                      stopIfDone=False, useRenderer=False, sleepTime=0) #just compute solution file
383
384        #++++++++++++++++++++++++++++++++++++++++++++++
385        #visualize (and make animations) in exudyn:
386        from exudyn.interactive import SolutionViewer
387        env.SC.visualizationSettings.general.autoFitScene = False
388        solution = LoadSolutionFile(solutionFile)
389        SolutionViewer(env.mbs, solution) #loads solution file via name stored in mbs