openAIgymNLinkContinuous.py

You can view and download this file on Github: openAIgymNLinkContinuous.py

  1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2# This is an EXUDYN example
  3#
  4# Details:  This file shows integration with OpenAI gym by testing a inverted quadruple pendulum example
  5#
  6# Author:    Johannes Gerstmayr
  7# edited by: Peter Manzl
  8# Date:      2022-05-25
  9#
 10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
 11#
 12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 13
 14#NOTE: this model is using the stable-baselines3 version 1.7.0, which requires:
 15#pip install exudyn
 16#pip install pip install wheel==0.38.4 setuptools==66.0.0
 17#      => this downgrades setuptools to be able to install gym==0.21
 18#pip install stable-baselines3==1.7.0
 19#tested within a virtual environment: conda create -n venvP311 python=3.11 scipy matplotlib tqdm spyder-kernels=2.5 ipykernel psutil -y
 20
 21# import sys
 22# sys.exudynFast = True #this variable is used to signal to load the fast exudyn module
 23
 24import exudyn as exu
 25from exudyn.utilities import *
 26from exudyn.robotics import *
 27from exudyn.artificialIntelligence import *
 28import math
 29
 30import os
 31os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 32
 33##%% here the number of links can be changed. Note that for n < 3 the actuator
 34# force might need to be increased
 35nLinks = 2 #number of inverted links ...
 36flagContinuous = True  # to choose for agent between A2C and SAC
 37
 38
 39class InvertedNPendulumEnv(OpenAIGymInterfaceEnv):
 40
 41    #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
 42    #                 you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
 43    def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
 44
 45        #%%++++++++++++++++++++++++++++++++++++++++++++++
 46        #this model uses kwargs: thresholdFactor
 47
 48        global nLinks
 49        global flagContinuous
 50        self.nLinks = nLinks
 51        self.flagContinuous = flagContinuous
 52        self.nTotalLinks = nLinks+1
 53        # self.steps_beyond_done = False
 54        thresholdFactor = 1
 55
 56        gravity = 9.81
 57        self.length = 1.
 58        width = 0.1*self.length
 59        masscart = 1.
 60        massarm = 0.1
 61        total_mass = massarm + masscart
 62        armInertia = self.length**2*0.5*massarm
 63
 64        # environment variables  and force magnitudes and are taken from the
 65        # paper "Reliability evaluation of reinforcement learning methods for
 66        # mechanical systems with increasing complexity", Manzl et al.
 67
 68        self.force_mag = 40 # 10 #10*2 works for 7e7 steps; must be larger for triple pendulum to be more reactive ...
 69        if self.nLinks == 1:
 70            self.force_mage = 12
 71        if self.nLinks == 3:
 72            self.force_mag = self.force_mag*1.5
 73            thresholdFactor = 2
 74        if self.nLinks == 4:
 75            self.force_mag = self.force_mag*3
 76            thresholdFactor = 5
 77
 78        if 'thresholdFactor' in kwargs:
 79            thresholdFactor = kwargs['thresholdFactor']
 80
 81        self.stepUpdateTime = 0.02  # seconds between state updates
 82
 83        background = GraphicsDataCheckerBoard(point= [0,0.5*self.length,-0.5*width],
 84                                              normal= [0,0,1], size=10, size2=6, nTiles=20, nTiles2=12)
 85
 86        oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0],  #x-pos,y-pos,angle
 87                                           visualization=VObjectGround(graphicsData= [background])))
 88
 89
 90        L = self.length
 91        w = width
 92        gravity3D = [0.,-gravity,0]
 93        graphicsBaseList = [GraphicsDataOrthoCubePoint(size=[L*4, 0.8*w, 0.8*w], color=color4grey)] #rail
 94
 95        newRobot = Robot(gravity=gravity3D,
 96                      base = RobotBase(visualization=VRobotBase(graphicsData=graphicsBaseList)),
 97                      tool = RobotTool(HT=HTtranslate([0,0.5*L,0]), visualization=VRobotTool(graphicsData=[
 98                          GraphicsDataOrthoCubePoint(size=[w, L, w], color=color4orange)])),
 99                      referenceConfiguration = []) #referenceConfiguration created with 0s automatically
100
101        #cart:
102        Jlink = RigidBodyInertia(masscart, np.diag([0.1*masscart,0.1*masscart,0.1*masscart]), [0,0,0])
103        link = RobotLink(Jlink.Mass(), Jlink.COM(), Jlink.InertiaCOM(),
104                         jointType='Px', preHT=HT0(),
105                         # PDcontrol=(pControl, dControl),
106                         visualization=VRobotLink(linkColor=color4lawngreen))
107        newRobot.AddLink(link)
108
109
110
111        for i in range(self.nLinks):
112
113            Jlink = RigidBodyInertia(massarm, np.diag([armInertia,0.1*armInertia,armInertia]), [0,0.5*L,0]) #only inertia_ZZ is important
114            #Jlink = Jlink.Translated([0,0.5*L,0])
115            preHT = HT0()
116            if i > 0:
117                preHT = HTtranslateY(L)
118
119            link = RobotLink(Jlink.Mass(), Jlink.COM(), Jlink.InertiaCOM(),
120                             jointType='Rz', preHT=preHT,
121                             #PDcontrol=(pControl, dControl),
122                             visualization=VRobotLink(linkColor=color4blue))
123            newRobot.AddLink(link)
124
125        self.Jlink = Jlink
126
127
128        sKT = []
129        dKT = newRobot.CreateKinematicTree(mbs)
130        self.oKT = dKT['objectKinematicTree']
131        self.nKT = dKT['nodeGeneric']
132
133        # sKT+=[mbs.AddSensor(SensorKinematicTree(objectNumber=oKT, linkNumber=self.nTotalLinks-1,
134        #                                         localPosition=locPos, storeInternal=True,
135        #                                         outputVariableType=exu.OutputVariableType.Position))]
136
137        #control force
138        mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nKT, coordinate=0))
139        self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=0.))
140
141        #%%++++++++++++++++++++++++
142        self.mbs.Assemble() #computes initial vector
143
144        self.simulationSettings.timeIntegration.numberOfSteps = 1
145        self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
146        self.simulationSettings.timeIntegration.verboseMode = 0
147        self.simulationSettings.solutionSettings.writeSolutionToFile = False
148        #self.simulationSettings.timeIntegration.simulateInRealtime = True
149
150        self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
151
152        self.SC.visualizationSettings.general.drawWorldBasis=True
153        self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01
154        self.SC.visualizationSettings.openGL.multiSampling=4
155
156        #self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
157
158        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
159        # Angle at which to fail the episode
160        # these parameters are used in subfunctions
161        self.theta_threshold_radians = thresholdFactor* 18 * 2 * math.pi / 360
162        self.x_threshold = thresholdFactor*3.6
163
164        #must return state size
165        stateSize = (self.nTotalLinks)*2 #the number of states (position/velocity that are used by learning algorithm)
166
167        return stateSize
168
169    #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
170    def SetupSpaces(self):
171
172        high = np.array(
173            [
174                self.x_threshold * 2,
175            ] +
176            [
177                self.theta_threshold_radians * 2,
178            ] * self.nLinks +
179            [
180                np.finfo(np.float32).max,
181            ] * self.nTotalLinks
182            ,
183            dtype=np.float32,
184        )
185
186
187        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
188        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
189        #for Env:
190        if flagContinuous:
191            self.action_space = spaces.Box(low=np.array([-1 ], dtype=np.float32),
192                                           high=np.array([1], dtype=np.float32), dtype=np.float32)
193        else:
194            self.action_space = spaces.Discrete(2)
195
196        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
197        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
198
199
200    #**classFunction: this function is overwritten to map the action given by learning algorithm to the multibody system (environment)
201    def MapAction2MBS(self, action):
202        if flagContinuous:
203            force = action[0] * self.force_mag
204            # print(force)
205        else:
206            force = self.force_mag if action == 1 else -self.force_mag
207        self.mbs.SetLoadParameter(self.lControl, 'load', force)
208
209    #**classFunction: this function is overwrritten to collect output of simulation and map to self.state tuple
210    #**output: return bool done which contains information if system state is outside valid range
211    def Output2StateAndDone(self):
212
213        #+++++++++++++++++++++++++
214        statesVector =  self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates)
215        statesVector_t =  self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates_t)
216        self.state = tuple(list(statesVector) + list(statesVector_t)) #sorting different from previous implementation
217        cartPosX = statesVector[0]
218
219        done = bool(
220            cartPosX < -self.x_threshold
221            or cartPosX > self.x_threshold
222            or max(statesVector[1:self.nTotalLinks]) > self.theta_threshold_radians
223            or min(statesVector[1:self.nTotalLinks]) < -self.theta_threshold_radians
224            )
225        # print("cartPosX: ", cartPosX, ", done: ", done)
226        return done
227
228
229    #**classFunction: OVERRIDE this function to maps the current state to mbs initial values
230    #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
231    def State2InitialValues(self):
232        #+++++++++++++++++++++++++++++++++++++++++++++
233        initialValues = self.state[0:self.nTotalLinks]
234        initialValues_t = self.state[self.nTotalLinks:]
235
236        return [initialValues,initialValues_t]
237
238    def getReward(self):
239        reward = 1 - 0.5 * abs(self.state[0])/self.x_threshold - 0.5 * abs(self.state[1]) / self.theta_threshold_radians
240        return reward
241
242    #**classFunction: openAI gym interface function which is called to compute one step
243    def step(self, action):
244        err_msg = f"{action!r} ({type(action)}) invalid"
245        assert self.action_space.contains(action), err_msg
246        assert self.state is not None, "Call reset before using step method."
247
248        #++++++++++++++++++++++++++++++++++++++++++++++++++
249        #main steps:
250        [initialValues,initialValues_t] = self.State2InitialValues()
251        self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
252        self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
253
254        self.MapAction2MBS(action)
255
256        #this may be time consuming for larger models!
257        self.IntegrateStep()
258
259        done = self.Output2StateAndDone()
260        # print('state:', self.state, 'done: ', done)
261        #++++++++++++++++++++++++++++++++++++++++++++++++++
262        #compute reward and done
263        # chi_x = []
264        # chi_phi = [0.1, 0.1]
265        x = self.state[0]
266        phi = self.state[1:3]
267        # self.state [0] +
268        # if done:
269            # print('done at step {}'.format(self.step))
270        if not done:
271
272            reward = self.getReward()
273        elif self.steps_beyond_done is None:
274            # Arm1 just fell!
275            self.steps_beyond_done = 0
276            reward = self.getReward()
277        else:
278
279            if self.steps_beyond_done == 0:
280                logger.warn(
281                    "You are calling 'step()' even though this "
282                    "environment has already returned done = True. You "
283                    "should always call 'reset()' once you receive 'done = "
284                    "True' -- any further steps are undefined behavior."
285                )
286            self.steps_beyond_done += 1
287            reward = 0.0
288
289        return np.array(self.state, dtype=np.float32), reward, done, {}
290
291
292
293
294
295
296#%%+++++++++++++++++++++++++++++++++++++++++++++
297if __name__ == '__main__': #this is only executed when file is direct called in Python
298    import time
299
300    #%%++++++++++++++++++++++++++++++++++++++++++++++++++
301    #use some learning algorithm:
302    #pip install stable_baselines3
303    from stable_baselines3 import A2C, SAC
304
305
306    # here the model is loaded (either for vectorized or scalar environment´using SAC or A2C).
307    def getModel(flagContinuous, myEnv, modelType='SAC'):
308        if flagContinuous :
309            if modelType=='SAC':
310                model = SAC('MlpPolicy',
311                       env=myEnv,
312                       learning_rate=8e-4,
313                       device='cpu', #usually cpu is faster for this size of networks
314                       batch_size=128,
315                       verbose=1)
316            elif modelType == 'A2C':
317                model = A2C('MlpPolicy',
318                        myEnv,
319                        device='cpu',
320                        verbose=1)
321            else:
322                print('Please specify the modelType.')
323                raise ValueError
324        else:
325            model = A2C('MlpPolicy',
326                    myEnv,
327                    device='cpu',  #usually cpu is faster for this size of networks
328                    #device='cuda',  #optimal with 64 SubprocVecEnv, torch.set_num_threads(1)
329                    verbose=1)
330        return model
331
332
333    #create model and do reinforcement learning
334    modelName = 'openAIgym{}Link{}'.format(nLinks, 'Continuous'*flagContinuous)
335    if False: #'scalar' environment:
336        env = InvertedNPendulumEnv()
337        #check if model runs:
338        #env.SetSolver(exu.DynamicSolverType.ExplicitMidpoint)
339        #env.SetSolver(exu.DynamicSolverType.RK44) #very acurate
340        #env.TestModel(numberOfSteps=200, seed=42, sleepTime=0.02*0, useRenderer=True)
341        model = getModel(flagContinuous, env)
342        # env.useRenderer = True
343        # env.render()
344
345        ts = -time.time()
346        model.learn(total_timesteps=20000)
347
348        print('*** learning time total =',ts+time.time(),'***')
349
350        #save learned model
351
352        model.save("solution/" + modelName)
353    else:
354        import torch #stable-baselines3 is based on pytorch
355        n_cores= max(1,int(os.cpu_count()/2)) #n_cores should be number of real cores (not threads)
356        n_cores = 14
357        torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
358
359        print('using',n_cores,'cores')
360
361        from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
362        vecEnv = SubprocVecEnv([InvertedNPendulumEnv for i in range(n_cores)])
363
364
365        #main learning task;  with 20 cores 800 000 steps take in the continous
366        # case approximatly 18 minutes (SAC), discrete (A2C) takes 2 minutes.
367        model = getModel(flagContinuous, vecEnv, modelType='SAC')
368
369        ts = -time.time()
370        print('start learning of agent with {}'.format(str(model.policy).split('(')[0]))
371        # model.learn(total_timesteps=50000)
372        model.learn(total_timesteps=int(1_000_000))
373        print('*** learning time total =',ts+time.time(),'***')
374
375        #save learned model
376        model.save("solution/" + modelName)
377
378    if True:
379        #%%++++++++++++++++++++++++++++++++++++++++++++++++++
380        #only load and test
381        if False:
382            if flagContinuous and modelType == 'A2C':
383                model = SAC.load("solution/" + modelName)
384            else:
385                model = A2C.load("solution/" + modelName)
386
387        env = InvertedNPendulumEnv(thresholdFactor=5) #larger threshold for testing
388        solutionFile='solution/learningCoordinates.txt'
389        env.TestModel(numberOfSteps=1000, model=model, solutionFileName=solutionFile,
390                      stopIfDone=False, useRenderer=False, sleepTime=0) #just compute solution file
391
392        #++++++++++++++++++++++++++++++++++++++++++++++
393        #visualize (and make animations) in exudyn:
394        from exudyn.interactive import SolutionViewer
395        env.SC.visualizationSettings.general.autoFitScene = False
396        solution = LoadSolutionFile(solutionFile)
397        SolutionViewer(env.mbs, solution) #loads solution file via name stored in mbs