openAIgymNLinkContinuous.py
You can view and download this file on Github: openAIgymNLinkContinuous.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file shows integration with OpenAI gym by testing a inverted quadruple pendulum example
5#
6# Author: Johannes Gerstmayr
7# edited by: Peter Manzl
8# Date: 2022-05-25
9#
10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
11#
12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13
14#NOTE: this model is using the stable-baselines3 version 1.7.0, which requires:
15#pip install exudyn
16#pip install pip install wheel==0.38.4 setuptools==66.0.0
17# => this downgrades setuptools to be able to install gym==0.21
18#pip install stable-baselines3==1.7.0
19#tested within a virtual environment: conda create -n venvP311 python=3.11 scipy matplotlib tqdm spyder-kernels=2.5 ipykernel psutil -y
20
21# import sys
22# sys.exudynFast = True #this variable is used to signal to load the fast exudyn module
23
24import exudyn as exu
25from exudyn.utilities import *
26from exudyn.robotics import *
27from exudyn.artificialIntelligence import *
28import math
29
30import os
31os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
32
33##%% here the number of links can be changed. Note that for n < 3 the actuator
34# force might need to be increased
35nLinks = 2 #number of inverted links ...
36flagContinuous = True # to choose for agent between A2C and SAC
37
38
39class InvertedNPendulumEnv(OpenAIGymInterfaceEnv):
40
41 #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
42 # you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
43 def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
44
45 #%%++++++++++++++++++++++++++++++++++++++++++++++
46 #this model uses kwargs: thresholdFactor
47
48 global nLinks
49 global flagContinuous
50 self.nLinks = nLinks
51 self.flagContinuous = flagContinuous
52 self.nTotalLinks = nLinks+1
53 # self.steps_beyond_done = False
54 thresholdFactor = 1
55
56 gravity = 9.81
57 self.length = 1.
58 width = 0.1*self.length
59 masscart = 1.
60 massarm = 0.1
61 total_mass = massarm + masscart
62 armInertia = self.length**2*0.5*massarm
63
64 # environment variables and force magnitudes and are taken from the
65 # paper "Reliability evaluation of reinforcement learning methods for
66 # mechanical systems with increasing complexity", Manzl et al.
67
68 self.force_mag = 40 # 10 #10*2 works for 7e7 steps; must be larger for triple pendulum to be more reactive ...
69 if self.nLinks == 1:
70 self.force_mage = 12
71 if self.nLinks == 3:
72 self.force_mag = self.force_mag*1.5
73 thresholdFactor = 2
74 if self.nLinks == 4:
75 self.force_mag = self.force_mag*3
76 thresholdFactor = 5
77
78 if 'thresholdFactor' in kwargs:
79 thresholdFactor = kwargs['thresholdFactor']
80
81 self.stepUpdateTime = 0.02 # seconds between state updates
82
83 background = GraphicsDataCheckerBoard(point= [0,0.5*self.length,-0.5*width],
84 normal= [0,0,1], size=10, size2=6, nTiles=20, nTiles2=12)
85
86 oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0], #x-pos,y-pos,angle
87 visualization=VObjectGround(graphicsData= [background])))
88
89
90 L = self.length
91 w = width
92 gravity3D = [0.,-gravity,0]
93 graphicsBaseList = [GraphicsDataOrthoCubePoint(size=[L*4, 0.8*w, 0.8*w], color=color4grey)] #rail
94
95 newRobot = Robot(gravity=gravity3D,
96 base = RobotBase(visualization=VRobotBase(graphicsData=graphicsBaseList)),
97 tool = RobotTool(HT=HTtranslate([0,0.5*L,0]), visualization=VRobotTool(graphicsData=[
98 GraphicsDataOrthoCubePoint(size=[w, L, w], color=color4orange)])),
99 referenceConfiguration = []) #referenceConfiguration created with 0s automatically
100
101 #cart:
102 Jlink = RigidBodyInertia(masscart, np.diag([0.1*masscart,0.1*masscart,0.1*masscart]), [0,0,0])
103 link = RobotLink(Jlink.Mass(), Jlink.COM(), Jlink.InertiaCOM(),
104 jointType='Px', preHT=HT0(),
105 # PDcontrol=(pControl, dControl),
106 visualization=VRobotLink(linkColor=color4lawngreen))
107 newRobot.AddLink(link)
108
109
110
111 for i in range(self.nLinks):
112
113 Jlink = RigidBodyInertia(massarm, np.diag([armInertia,0.1*armInertia,armInertia]), [0,0.5*L,0]) #only inertia_ZZ is important
114 #Jlink = Jlink.Translated([0,0.5*L,0])
115 preHT = HT0()
116 if i > 0:
117 preHT = HTtranslateY(L)
118
119 link = RobotLink(Jlink.Mass(), Jlink.COM(), Jlink.InertiaCOM(),
120 jointType='Rz', preHT=preHT,
121 #PDcontrol=(pControl, dControl),
122 visualization=VRobotLink(linkColor=color4blue))
123 newRobot.AddLink(link)
124
125 self.Jlink = Jlink
126
127
128 sKT = []
129 dKT = newRobot.CreateKinematicTree(mbs)
130 self.oKT = dKT['objectKinematicTree']
131 self.nKT = dKT['nodeGeneric']
132
133 # sKT+=[mbs.AddSensor(SensorKinematicTree(objectNumber=oKT, linkNumber=self.nTotalLinks-1,
134 # localPosition=locPos, storeInternal=True,
135 # outputVariableType=exu.OutputVariableType.Position))]
136
137 #control force
138 mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nKT, coordinate=0))
139 self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=0.))
140
141 #%%++++++++++++++++++++++++
142 self.mbs.Assemble() #computes initial vector
143
144 self.simulationSettings.timeIntegration.numberOfSteps = 1
145 self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
146 self.simulationSettings.timeIntegration.verboseMode = 0
147 self.simulationSettings.solutionSettings.writeSolutionToFile = False
148 #self.simulationSettings.timeIntegration.simulateInRealtime = True
149
150 self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
151
152 self.SC.visualizationSettings.general.drawWorldBasis=True
153 self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01
154 self.SC.visualizationSettings.openGL.multiSampling=4
155
156 #self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
157
158 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
159 # Angle at which to fail the episode
160 # these parameters are used in subfunctions
161 self.theta_threshold_radians = thresholdFactor* 18 * 2 * math.pi / 360
162 self.x_threshold = thresholdFactor*3.6
163
164 #must return state size
165 stateSize = (self.nTotalLinks)*2 #the number of states (position/velocity that are used by learning algorithm)
166
167 return stateSize
168
169 #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
170 def SetupSpaces(self):
171
172 high = np.array(
173 [
174 self.x_threshold * 2,
175 ] +
176 [
177 self.theta_threshold_radians * 2,
178 ] * self.nLinks +
179 [
180 np.finfo(np.float32).max,
181 ] * self.nTotalLinks
182 ,
183 dtype=np.float32,
184 )
185
186
187 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
188 #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
189 #for Env:
190 if flagContinuous:
191 self.action_space = spaces.Box(low=np.array([-1 ], dtype=np.float32),
192 high=np.array([1], dtype=np.float32), dtype=np.float32)
193 else:
194 self.action_space = spaces.Discrete(2)
195
196 self.observation_space = spaces.Box(-high, high, dtype=np.float32)
197 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
198
199
200 #**classFunction: this function is overwritten to map the action given by learning algorithm to the multibody system (environment)
201 def MapAction2MBS(self, action):
202 if flagContinuous:
203 force = action[0] * self.force_mag
204 # print(force)
205 else:
206 force = self.force_mag if action == 1 else -self.force_mag
207 self.mbs.SetLoadParameter(self.lControl, 'load', force)
208
209 #**classFunction: this function is overwrritten to collect output of simulation and map to self.state tuple
210 #**output: return bool done which contains information if system state is outside valid range
211 def Output2StateAndDone(self):
212
213 #+++++++++++++++++++++++++
214 statesVector = self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates)
215 statesVector_t = self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates_t)
216 self.state = tuple(list(statesVector) + list(statesVector_t)) #sorting different from previous implementation
217 cartPosX = statesVector[0]
218
219 done = bool(
220 cartPosX < -self.x_threshold
221 or cartPosX > self.x_threshold
222 or max(statesVector[1:self.nTotalLinks]) > self.theta_threshold_radians
223 or min(statesVector[1:self.nTotalLinks]) < -self.theta_threshold_radians
224 )
225 # print("cartPosX: ", cartPosX, ", done: ", done)
226 return done
227
228
229 #**classFunction: OVERRIDE this function to maps the current state to mbs initial values
230 #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
231 def State2InitialValues(self):
232 #+++++++++++++++++++++++++++++++++++++++++++++
233 initialValues = self.state[0:self.nTotalLinks]
234 initialValues_t = self.state[self.nTotalLinks:]
235
236 return [initialValues,initialValues_t]
237
238 def getReward(self):
239 reward = 1 - 0.5 * abs(self.state[0])/self.x_threshold - 0.5 * abs(self.state[1]) / self.theta_threshold_radians
240 return reward
241
242 #**classFunction: openAI gym interface function which is called to compute one step
243 def step(self, action):
244 err_msg = f"{action!r} ({type(action)}) invalid"
245 assert self.action_space.contains(action), err_msg
246 assert self.state is not None, "Call reset before using step method."
247
248 #++++++++++++++++++++++++++++++++++++++++++++++++++
249 #main steps:
250 [initialValues,initialValues_t] = self.State2InitialValues()
251 self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
252 self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
253
254 self.MapAction2MBS(action)
255
256 #this may be time consuming for larger models!
257 self.IntegrateStep()
258
259 done = self.Output2StateAndDone()
260 # print('state:', self.state, 'done: ', done)
261 #++++++++++++++++++++++++++++++++++++++++++++++++++
262 #compute reward and done
263 # chi_x = []
264 # chi_phi = [0.1, 0.1]
265 x = self.state[0]
266 phi = self.state[1:3]
267 # self.state [0] +
268 # if done:
269 # print('done at step {}'.format(self.step))
270 if not done:
271
272 reward = self.getReward()
273 elif self.steps_beyond_done is None:
274 # Arm1 just fell!
275 self.steps_beyond_done = 0
276 reward = self.getReward()
277 else:
278
279 if self.steps_beyond_done == 0:
280 logger.warn(
281 "You are calling 'step()' even though this "
282 "environment has already returned done = True. You "
283 "should always call 'reset()' once you receive 'done = "
284 "True' -- any further steps are undefined behavior."
285 )
286 self.steps_beyond_done += 1
287 reward = 0.0
288
289 return np.array(self.state, dtype=np.float32), reward, done, {}
290
291
292
293
294
295
296#%%+++++++++++++++++++++++++++++++++++++++++++++
297if __name__ == '__main__': #this is only executed when file is direct called in Python
298 import time
299
300 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
301 #use some learning algorithm:
302 #pip install stable_baselines3
303 from stable_baselines3 import A2C, SAC
304
305
306 # here the model is loaded (either for vectorized or scalar environment´using SAC or A2C).
307 def getModel(flagContinuous, myEnv, modelType='SAC'):
308 if flagContinuous :
309 if modelType=='SAC':
310 model = SAC('MlpPolicy',
311 env=myEnv,
312 learning_rate=8e-4,
313 device='cpu', #usually cpu is faster for this size of networks
314 batch_size=128,
315 verbose=1)
316 elif modelType == 'A2C':
317 model = A2C('MlpPolicy',
318 myEnv,
319 device='cpu',
320 verbose=1)
321 else:
322 print('Please specify the modelType.')
323 raise ValueError
324 else:
325 model = A2C('MlpPolicy',
326 myEnv,
327 device='cpu', #usually cpu is faster for this size of networks
328 #device='cuda', #optimal with 64 SubprocVecEnv, torch.set_num_threads(1)
329 verbose=1)
330 return model
331
332
333 #create model and do reinforcement learning
334 modelName = 'openAIgym{}Link{}'.format(nLinks, 'Continuous'*flagContinuous)
335 if False: #'scalar' environment:
336 env = InvertedNPendulumEnv()
337 #check if model runs:
338 #env.SetSolver(exu.DynamicSolverType.ExplicitMidpoint)
339 #env.SetSolver(exu.DynamicSolverType.RK44) #very acurate
340 #env.TestModel(numberOfSteps=200, seed=42, sleepTime=0.02*0, useRenderer=True)
341 model = getModel(flagContinuous, env)
342 # env.useRenderer = True
343 # env.render()
344
345 ts = -time.time()
346 model.learn(total_timesteps=20000)
347
348 print('*** learning time total =',ts+time.time(),'***')
349
350 #save learned model
351
352 model.save("solution/" + modelName)
353 else:
354 import torch #stable-baselines3 is based on pytorch
355 n_cores= max(1,int(os.cpu_count()/2)) #n_cores should be number of real cores (not threads)
356 n_cores = 14
357 torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
358
359 print('using',n_cores,'cores')
360
361 from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
362 vecEnv = SubprocVecEnv([InvertedNPendulumEnv for i in range(n_cores)])
363
364
365 #main learning task; with 20 cores 800 000 steps take in the continous
366 # case approximatly 18 minutes (SAC), discrete (A2C) takes 2 minutes.
367 model = getModel(flagContinuous, vecEnv, modelType='SAC')
368
369 ts = -time.time()
370 print('start learning of agent with {}'.format(str(model.policy).split('(')[0]))
371 # model.learn(total_timesteps=50000)
372 model.learn(total_timesteps=int(1_000_000))
373 print('*** learning time total =',ts+time.time(),'***')
374
375 #save learned model
376 model.save("solution/" + modelName)
377
378 if True:
379 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
380 #only load and test
381 if False:
382 if flagContinuous and modelType == 'A2C':
383 model = SAC.load("solution/" + modelName)
384 else:
385 model = A2C.load("solution/" + modelName)
386
387 env = InvertedNPendulumEnv(thresholdFactor=5) #larger threshold for testing
388 solutionFile='solution/learningCoordinates.txt'
389 env.TestModel(numberOfSteps=1000, model=model, solutionFileName=solutionFile,
390 stopIfDone=False, useRenderer=False, sleepTime=0) #just compute solution file
391
392 #++++++++++++++++++++++++++++++++++++++++++++++
393 #visualize (and make animations) in exudyn:
394 from exudyn.interactive import SolutionViewer
395 env.SC.visualizationSettings.general.autoFitScene = False
396 solution = LoadSolutionFile(solutionFile)
397 SolutionViewer(env.mbs, solution) #loads solution file via name stored in mbs