reinforcementLearningRobot.py
You can view and download this file on Github: reinforcementLearningRobot.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: Reinforcement learning example with stable-baselines3;
5# training a mobile platform with differential drives to meet target points
6# NOTE: frictional contact requires small enough step size to avoid artifacts!
7# GeneralContact works less stable than RollingDisc objects
8#
9# Author: Johannes Gerstmayr
10# Date: 2024-04-27
11#
12# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
13#
14#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
15
16#NOTE: this model is using the stable-baselines3 version 1.7.0, which requires:
17#pip install exudyn
18#pip install pip install wheel==0.38.4 setuptools==66.0.0
19# => this downgrades setuptools to be able to install gym==0.21
20#pip install stable-baselines3==1.7.0
21#tested within a virtual environment: conda create -n venvP311 python=3.11 scipy matplotlib tqdm spyder-kernels=2.5 ipykernel psutil -y
22
23import sys
24sys.exudynFast = True
25
26import exudyn as exu
27from exudyn.utilities import *
28from exudyn.robotics import *
29from exudyn.artificialIntelligence import *
30import math
31
32import copy
33import os
34os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
35import torch
36
37##%% here the number of links can be changed. Note that for n < 3 the actuator
38
39#**function: Add model of differential drive robot (two wheels which can be actuated independently);
40# model is parameterized for kinematics, inertia parameters as well as for graphics;
41# the model is created as a minimum coordinate model to use it together with explicit integration;
42# a contact model is added if it does not exist;
43def DifferentialDriveRobot(SC, mbs,
44 platformInertia = None,
45 wheelInertia = None,
46 platformPosition = [0,0,0], #this is the location of the platform ground centerpoint
47 wheelDistance = 0.4, #wheel midpoint-to-midpoint distance
48 platformHeight = 0.1,
49 platformRadius = 0.22,
50 platformMass = 5,
51 platformGroundOffset = 0.02,
52 planarPlatform = True,
53 dimGroundX = 8, dimGroundY = 8,
54 gravity = [0,0,-9.81],
55 wheelRadius = 0.04,
56 wheelThickness = 0.01,
57 wheelMass = 0.05,
58 pControl = 0,
59 dControl = 0.02,
60 usePenalty = True, #use penalty formulation in case useGeneralContact=False
61 frictionProportionalZone = 0.025,
62 frictionCoeff = 1, stiffnessGround = 1e5,
63 gContact = None,
64 frictionIndexWheel = None, frictionIndexFree = None,
65 useGeneralContact = False #generalcontact shows large errors currently
66 ):
67
68 #add class which can be returned to enable user to access parameters
69 class ddr: pass
70
71 #+++++++++++++++++++++++++++++++++++++++++++
72 #create contact (if not provided)
73 ddr.gGround = GraphicsDataCheckerBoard(normal= [0,0,1],
74 size=dimGroundX, size2=dimGroundY, nTiles=8)
75
76 ddr.oGround= mbs.AddObject(ObjectGround(referencePosition= [0,0,0],
77 visualization=VObjectGround(graphicsData= [ddr.gGround])))
78 ddr.mGround = mbs.AddMarker(MarkerBodyRigid(bodyNumber=ddr.oGround))
79
80
81 ddr.frictionCoeff = frictionCoeff
82 ddr.stiffnessGround = stiffnessGround
83 ddr.dampingGround = ddr.stiffnessGround*0.01
84 if gContact == None and useGeneralContact:
85 frictionIndexGround = 0
86 frictionIndexWheel = 0
87 frictionIndexFree = 1
88
89 ddr.gContact = mbs.AddGeneralContact()
90 ddr.gContact.frictionProportionalZone = frictionProportionalZone
91 #ddr.gContact.frictionVelocityPenalty = 1e4
92
93 ddr.gContact.SetFrictionPairings(np.diag([ddr.frictionCoeff,0])) #second index is for frictionless contact
94 ddr.gContact.SetSearchTreeCellSize(numberOfCells=[4,4,1]) #just a few contact cells
95
96 #add ground to contact
97 [meshPoints, meshTrigs] = GraphicsData2PointsAndTrigs(ddr.gGround) #could also use only 1 quad ...
98
99 ddr.gContact.AddTrianglesRigidBodyBased( rigidBodyMarkerIndex = ddr.mGround,
100 contactStiffness = ddr.stiffnessGround, contactDamping = ddr.dampingGround,
101 frictionMaterialIndex = frictionIndexGround,
102 pointList=meshPoints, triangleList=meshTrigs)
103
104
105 #+++++++++++++++++++++++++++++++++++++++++++
106 #create inertias (if not provided)
107 if wheelInertia == None:
108 ddr.iWheel = InertiaCylinder(wheelMass/(wheelRadius**2*pi*wheelThickness),
109 wheelThickness, wheelRadius, axis=0) #rotation about local X-axis
110 else:
111 ddr.iWheel = RigidBodyInertia(mass=wheelMass, inertiaTensorAtCOM=np.diag(wheelInertia))
112
113 if platformInertia == None:
114 ddr.iPlatform = InertiaCylinder(platformMass/(platformRadius**2*pi*platformHeight),
115 platformHeight, platformRadius, axis=0) #rotation about local X-axis
116 ddr.iPlatform = ddr.iPlatform.Translated([0,0,0.5*platformHeight+platformGroundOffset]) #put COM at mid of platform; but referencepoint is at ground level!
117 else:
118 ddr.iPlatform = RigidBodyInertia(mass=platformMass, inertiaTensorAtCOM=np.diag(platformInertia))
119
120 #+++++++++++++++++++++++++++++++++++++++++++
121 #create kinematic tree for wheeled robot
122 ddr.gPlatform = [GraphicsDataCylinder([0,0,platformGroundOffset], [0,0,platformHeight], platformRadius, color=color4steelblue, nTiles=64, addEdges=True, addFaces=False)]
123 ddr.gPlatform += [GraphicsDataCylinder([0,platformRadius*0.8,platformGroundOffset*1.5], [0,0,platformHeight], platformRadius*0.2, color=color4grey)]
124 ddr.gPlatform += [GraphicsDataBasis(length=0.1)]
125 ddr.gWheel = [GraphicsDataCylinder([-wheelThickness*0.5,0,0], [wheelThickness,0,0], wheelRadius, color=color4red, nTiles=32)]
126 ddr.gWheel += [GraphicsDataOrthoCubePoint([0,0,0],[wheelThickness*1.1,wheelRadius*1.3,wheelRadius*1.3], color=color4grey)]
127 ddr.gWheel += [GraphicsDataBasis(length=0.075)]
128
129 #create node for unknowns of KinematicTree
130 ddr.nJoints = 3+3+2 - 3*planarPlatform #6 (3 in planar case) for the platform and 2 for the wheels;
131 referenceCoordinates=[0.]*ddr.nJoints
132 referenceCoordinates[0:len(platformPosition)] = platformPosition
133 ddr.nKT = mbs.AddNode(NodeGenericODE2(referenceCoordinates=referenceCoordinates,
134 initialCoordinates=[0.]*ddr.nJoints,
135 initialCoordinates_t=[0.]*ddr.nJoints,
136 numberOfODE2Coordinates=ddr.nJoints))
137
138 ddr.linkMasses = []
139 ddr.gList = [] #list of graphics objects for links
140 ddr.linkCOMs = exu.Vector3DList()
141 ddr.linkInertiasCOM=exu.Matrix3DList()
142 ddr.jointTransformations=exu.Matrix3DList()
143 ddr.jointOffsets = exu.Vector3DList()
144 ddr.jointTypes = [exu.JointType.PrismaticX,exu.JointType.PrismaticY,exu.JointType.RevoluteZ]
145 ddr.linkParents = list(np.arange(3)-1)
146
147 ddr.platformIndex = 2
148 if not planarPlatform:
149 ddr.jointTypes+=[exu.JointType.PrismaticZ,exu.JointType.RevoluteY,exu.JointType.RevoluteX]
150 ddr.linkParents+=[2,3,4]
151 ddr.platformIndex = 5
152
153 #add data for wheels:
154 ddr.jointTypes += [exu.JointType.RevoluteX]*2
155 ddr.linkParents += [ddr.platformIndex]*2
156
157 #now create offsets, graphics list and inertia for all links
158 for i in range(len(ddr.jointTypes)):
159 ddr.jointTransformations.Append(np.eye(3))
160
161 if i < ddr.platformIndex:
162 ddr.gList += [[]]
163 ddr.jointOffsets.Append([0,0,0])
164 ddr.linkInertiasCOM.Append(np.zeros([3,3]))
165 ddr.linkCOMs.Append([0,0,0])
166 ddr.linkMasses.append(0)
167 elif i == ddr.platformIndex:
168 ddr.gList += [ddr.gPlatform]
169 ddr.jointOffsets.Append([0,0,0])
170 ddr.linkInertiasCOM.Append(ddr.iPlatform.InertiaCOM())
171 ddr.linkCOMs.Append(ddr.iPlatform.COM())
172 ddr.linkMasses.append(ddr.iPlatform.Mass())
173 else:
174 ddr.gList += [ddr.gWheel]
175 sign = -1+(i>ddr.platformIndex+1)*2
176 offZ = wheelRadius
177 if planarPlatform and (useGeneralContact or usePenalty):
178 offZ *= 0.999 #to ensure contact
179 ddr.jointOffsets.Append([sign*wheelDistance*0.5,0,offZ])
180 ddr.linkInertiasCOM.Append(ddr.iWheel.InertiaCOM())
181 ddr.linkCOMs.Append(ddr.iWheel.COM())
182 ddr.linkMasses.append(ddr.iWheel.Mass())
183
184
185 ddr.jointDControlVector = [0]*ddr.nJoints
186 ddr.jointPControlVector = [0]*ddr.nJoints
187 ddr.jointPositionOffsetVector = [0]*ddr.nJoints
188 ddr.jointVelocityOffsetVector = [0]*ddr.nJoints
189
190 ddr.jointPControlVector[-2:] = [pControl]*2
191 ddr.jointDControlVector[-2:] = [dControl]*2
192
193
194 #create KinematicTree
195 ddr.oKT = mbs.AddObject(ObjectKinematicTree(nodeNumber=ddr.nKT,
196 jointTypes=ddr.jointTypes,
197 linkParents=ddr.linkParents,
198 jointTransformations=ddr.jointTransformations,
199 jointOffsets=ddr.jointOffsets,
200 linkInertiasCOM=ddr.linkInertiasCOM,
201 linkCOMs=ddr.linkCOMs,
202 linkMasses=ddr.linkMasses,
203 baseOffset = [0.,0.,0.], gravity=gravity,
204 jointPControlVector=ddr.jointPControlVector,
205 jointDControlVector=ddr.jointDControlVector,
206 jointPositionOffsetVector=ddr.jointPositionOffsetVector,
207 jointVelocityOffsetVector=ddr.jointVelocityOffsetVector,
208 visualization=VObjectKinematicTree(graphicsDataList = ddr.gList)))
209
210 ddr.sPlatformPos = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
211 storeInternal=True, outputVariableType=exu.OutputVariableType.Position))
212 ddr.sPlatformVel = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
213 storeInternal=True, outputVariableType=exu.OutputVariableType.Velocity))
214 ddr.sPlatformAng = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
215 storeInternal=True, outputVariableType=exu.OutputVariableType.Rotation))
216 ddr.sPlatformAngVel = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
217 storeInternal=True, outputVariableType=exu.OutputVariableType.AngularVelocity))
218
219 #create markers for wheels and add contact
220 ddr.mWheels = []
221 for i in range(2):
222 mWheel = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
223 linkNumber=ddr.platformIndex+1+i,
224 localPosition=[0,0,0]))
225 ddr.mWheels.append(mWheel)
226 if useGeneralContact:
227 ddr.gContact.AddSphereWithMarker(mWheel,
228 radius=wheelRadius,
229 contactStiffness=ddr.stiffnessGround,
230 contactDamping=ddr.dampingGround,
231 frictionMaterialIndex=frictionIndexWheel)
232 #for 3D platform, we need additional support points:
233 if not planarPlatform:
234 rY = platformRadius-platformGroundOffset
235 mPlatformFront = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
236 linkNumber=ddr.platformIndex,
237 localPosition=[0,rY,1.01*platformGroundOffset]))
238 mPlatformBack = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
239 linkNumber=ddr.platformIndex,
240 localPosition=[0,-rY,1.01*platformGroundOffset]))
241
242 fact = 1
243 ddr.gContact.AddSphereWithMarker(mPlatformFront,
244 radius=platformGroundOffset,
245 contactStiffness=ddr.stiffnessGround*fact,
246 contactDamping=ddr.dampingGround*fact,
247 frictionMaterialIndex=frictionIndexFree)
248 ddr.gContact.AddSphereWithMarker(mPlatformBack,
249 radius=platformGroundOffset,
250 contactStiffness=ddr.stiffnessGround*fact,
251 contactDamping=ddr.dampingGround*fact,
252 frictionMaterialIndex=frictionIndexFree)
253 else:
254 if not planarPlatform:
255 raise ValueError('DifferentialDriveRobot: if useGeneralContact==False then planarPlatform must be True!')
256 if not usePenalty:
257 ddr.oRollingDisc = mbs.AddObject(ObjectJointRollingDisc(markerNumbers=[ddr.mGround , mWheel],
258 constrainedAxes=[i,1,1-planarPlatform], discRadius=wheelRadius,
259 visualization=VObjectJointRollingDisc(discWidth=wheelThickness,color=color4blue)))
260 else:
261 nGeneric = mbs.AddNode(NodeGenericData(initialCoordinates=[0,0,0], numberOfDataCoordinates=3))
262 ddr.oRollingDisc = mbs.AddObject(ObjectConnectorRollingDiscPenalty(markerNumbers=[ddr.mGround , mWheel],
263 nodeNumber = nGeneric,
264 discRadius=wheelRadius,
265 useLinearProportionalZone=True,
266 dryFrictionProportionalZone=0.05,
267 contactStiffness=ddr.stiffnessGround,
268 contactDamping=ddr.dampingGround,
269 dryFriction=[ddr.frictionCoeff]*2,
270 visualization=VObjectConnectorRollingDiscPenalty(discWidth=wheelThickness,color=color4blue)))
271
272
273
274 #compute wheel velocities for given forward and rotation velocity
275 def WheelVelocities(forwardVelocity, vRotation, wheelRadius, wheelDistance):
276 vLeft = -forwardVelocity/wheelRadius
277 vRight = vLeft
278 vOff = vRotation*wheelDistance*0.5/wheelRadius
279 vLeft += vOff
280 vRight -= vOff
281 return [vLeft, vRight]
282
283 ddr.WheelVelocities = WheelVelocities
284
285 #add some useful graphics settings
286
287 SC.visualizationSettings.general.circleTiling=200
288 SC.visualizationSettings.general.drawCoordinateSystem=True
289 SC.visualizationSettings.loads.show=False
290 SC.visualizationSettings.bodies.show=True
291 SC.visualizationSettings.markers.show=False
292 SC.visualizationSettings.bodies.kinematicTree.frameSize = 0.1
293 SC.visualizationSettings.bodies.kinematicTree.showJointFrames = False
294
295 SC.visualizationSettings.nodes.show=True
296 # SC.visualizationSettings.nodes.showBasis =True
297 SC.visualizationSettings.nodes.drawNodesAsPoint = False
298 SC.visualizationSettings.nodes.defaultSize = 0 #must not be -1, otherwise uses autocomputed size
299
300 SC.visualizationSettings.openGL.multiSampling = 4
301 # SC.visualizationSettings.openGL.shadow = 0.25
302 #SC.visualizationSettings.openGL.light0position = [-3,3,10,0]
303 # SC.visualizationSettings.contact.showBoundingBoxes = True
304 SC.visualizationSettings.contact.showTriangles = True
305 SC.visualizationSettings.contact.showSpheres = True
306
307 return ddr
308
309#%%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
310#for testing with a simple trajectory:
311if False:
312 SC = exu.SystemContainer()
313 mbs = SC.AddSystem()
314
315 useGeneralContact = False
316 usePenalty = True
317 wheelRadius = 0.04
318 wheelDistance = 0.4
319 ddr = DifferentialDriveRobot(SC, mbs,useGeneralContact=useGeneralContact,
320 usePenalty=usePenalty, planarPlatform=True,
321 wheelRadius=wheelRadius, wheelDistance=wheelDistance)
322 mbs.Assemble()
323
324 #create some nice trajectory
325 def PreStepUserFunction(mbs, t):
326 vSet = ddr.jointVelocityOffsetVector #nominal values
327 vSet[-2:] = [0,0]
328
329 if t < 2:
330 vSet[-2:] = ddr.WheelVelocities(0.5, 0, wheelRadius, wheelDistance)
331 elif t < 3: pass
332 elif t < 4:
333 vSet[-2:] = ddr.WheelVelocities(0, 0.5*pi, wheelRadius, wheelDistance)
334 elif t < 5: pass
335 elif t < 7:
336 vSet[-2:] = ddr.WheelVelocities(-1, 0, wheelRadius, wheelDistance)
337 elif t < 8: pass
338 elif t < 9:
339 vSet[-2:] = ddr.WheelVelocities(0.5, 0.5*pi, wheelRadius, wheelDistance)
340
341 mbs.SetObjectParameter(ddr.oKT, "jointVelocityOffsetVector", vSet)
342
343 return True
344
345 mbs.SetPreStepUserFunction(PreStepUserFunction)
346
347 tEnd = 12 #tEnd = 0.8 for test suite
348 stepSize = 0.002 #h= 0.0002 for test suite
349 if useGeneralContact or usePenalty:
350 stepSize = 2e-4
351 # h*=0.1
352 # tEnd*=3
353 simulationSettings = exu.SimulationSettings()
354 simulationSettings.solutionSettings.solutionWritePeriod = 0.01
355 simulationSettings.solutionSettings.writeSolutionToFile = False
356 simulationSettings.solutionSettings.coordinatesSolutionFileName = 'solution/coordinatesSolution.txt'
357
358 simulationSettings.solutionSettings.sensorsWritePeriod = stepSize*10
359 # simulationSettings.displayComputationTime = True
360 # simulationSettings.displayStatistics = True
361 # simulationSettings.timeIntegration.verboseMode = 1
362 #simulationSettings.timeIntegration.simulateInRealtime = True
363 simulationSettings.timeIntegration.discontinuous.maxIterations = 1 #speed up
364 #simulationSettings.timeIntegration.discontinuous.iterationTolerance = 1e-5
365
366
367 exu.StartRenderer()
368 if 'renderState' in exu.sys:
369 SC.SetRenderState(exu.sys['renderState'])
370 mbs.WaitForUserToContinue()
371
372 simulationSettings.timeIntegration.numberOfSteps = int(tEnd/stepSize)
373 simulationSettings.timeIntegration.endTime = tEnd
374 simulationSettings.timeIntegration.explicitIntegration.computeEndOfStepAccelerations = False #increase performance, accelerations less accurate
375
376 SC.visualizationSettings.window.renderWindowSize=[1600,1024]
377 SC.visualizationSettings.general.graphicsUpdateInterval = 0.02
378
379 if useGeneralContact or usePenalty:
380 mbs.SolveDynamic(simulationSettings, solverType=exu.DynamicSolverType.ExplicitEuler)
381 # mbs.SolveDynamic(simulationSettings, solverType=exu.DynamicSolverType.ExplicitMidpoint)
382 else:
383 mbs.SolveDynamic(simulationSettings)
384
385 SC.WaitForRenderEngineStopFlag()
386 exu.StopRenderer() #safely close rendering window!
387
388 if True:
389 mbs.PlotSensor(ddr.sPlatformVel, components=[0,1],closeAll=True)
390 mbs.PlotSensor(ddr.sPlatformAngVel, components=[0,1,2])
391
392def Rot2D(phi):
393 return np.array([[np.cos(phi),-np.sin(phi)],
394 [np.sin(phi), np.cos(phi)]])
395
396
397#%%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
398class RobotEnv(OpenAIGymInterfaceEnv):
399
400 #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
401 # you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
402 def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
403
404 #%%++++++++++++++++++++++++++++++++++++++++++++++
405 self.mbs = mbs
406 self.SC = SC
407
408 self.dimGroundX = 4 #dimension of ground
409 self.dimGroundY = 4
410 self.maxRotations = 0.6 #maximum number before learning stops
411
412 self.maxWheelSpeed = 2*pi #2*pi = 1 revolution/second
413 self.wheelRadius = 0.04
414 self.wheelDistance = 0.4
415 self.maxVelocity = self.wheelRadius * self.maxWheelSpeed
416 self.maxPlatformAngVel = self.wheelRadius/(self.wheelDistance*0.5)*self.maxWheelSpeed
417
418 useGeneralContact = False
419 usePenalty = True
420 ddr = DifferentialDriveRobot(SC, mbs,useGeneralContact=useGeneralContact,
421 dimGroundX=self.dimGroundY, dimGroundY=self.dimGroundY,
422 usePenalty=usePenalty,
423 planarPlatform=True,
424 stiffnessGround=1e4,
425 wheelRadius=self.wheelRadius,
426 wheelDistance=self.wheelDistance)
427
428 self.ddr = ddr
429 self.oKT = ddr.oKT
430 self.nKT = ddr.nKT
431
432 #add graphics for desination
433 gDestination = GraphicsDataSphere(point=[0,0,0.05],radius = 0.02, color=color4red, nTiles=16)
434 self.oDestination = mbs.CreateGround(graphicsDataList=[gDestination])
435
436 mbs.Assemble()
437 self.stepSize = 1e-3
438 self.stepUpdateTime = 0.05
439
440 simulationSettings.solutionSettings.solutionWritePeriod = 0.1
441
442 writeSolutionToFile = False
443 if 'writeSolutionToFile' in kwargs:
444 writeSolutionToFile = kwargs['writeSolutionToFile']
445
446 useGraphics = False
447 if 'useGraphics' in kwargs:
448 useGraphics = kwargs['useGraphics']
449
450 simulationSettings.solutionSettings.writeSolutionToFile = writeSolutionToFile
451 simulationSettings.solutionSettings.writeSolutionToFile = False
452
453 simulationSettings.solutionSettings.coordinatesSolutionFileName = 'solution/coordinatesSolution.txt'
454
455 # simulationSettings.displayComputationTime = True
456 #simulationSettings.displayStatistics = True
457 #simulationSettings.timeIntegration.verboseMode = 1
458 #simulationSettings.timeIntegration.simulateInRealtime = True
459 simulationSettings.timeIntegration.discontinuous.maxIterations = 1 #speed up
460 #simulationSettings.timeIntegration.discontinuous.iterationTolerance = 1e-5
461
462
463 simulationSettings.timeIntegration.numberOfSteps = int(self.stepUpdateTime/self.stepSize)
464 simulationSettings.timeIntegration.endTime = self.stepUpdateTime
465 simulationSettings.timeIntegration.explicitIntegration.computeEndOfStepAccelerations = False #increase performance, accelerations less accurate
466
467 SC.visualizationSettings.window.renderWindowSize=[1600,1024]
468 SC.visualizationSettings.general.graphicsUpdateInterval = 0.02
469
470 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
471 self.randomInitializationValue = [0.4*self.dimGroundX, 0.4*self.dimGroundY, self.maxRotations*2*pi*0.99,
472 self.maxVelocity*0,self.maxVelocity*0,self.maxPlatformAngVel*0,
473 0.3*self.dimGroundX, 0.3*self.dimGroundY, #destination points
474 ]
475
476 #must return state size
477 self.numberOfStates = 3 #posx, posy, rot
478 self.destinationStates = 2 #define here, if destination is included in states
479 self.destination = [0.,0.] #default value for destination
480
481 return self.destinationStates + self.numberOfStates * 2 #the number of states (position/velocity that are used by learning algorithm)
482
483 #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
484 def SetupSpaces(self):
485
486 high = np.array(
487 [
488 self.dimGroundX*0.5,
489 self.dimGroundY*0.5,
490 2*pi*self.maxRotations #10 full revolutions; no more should be needed for any task
491 ] +
492 [
493 np.finfo(np.float32).max,
494 ] * self.numberOfStates +
495 [self.dimGroundX*0.5,
496 self.dimGroundY*0.5]*(self.destinationStates>0)
497 ,
498 dtype=np.float32,
499 )
500
501
502 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
503 #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
504 #for Env:
505
506 self.action_space = spaces.Box(low=np.array([-self.maxWheelSpeed,-self.maxWheelSpeed], dtype=np.float32),
507 high=np.array([self.maxWheelSpeed,self.maxWheelSpeed], dtype=np.float32), dtype=np.float32)
508
509 self.observation_space = spaces.Box(-high, high, dtype=np.float32)
510 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
511
512
513 #**classFunction: this function is overwritten to map the action given by learning algorithm to the multibody system (environment)
514 def MapAction2MBS(self, action):
515 # force = action[0] * self.force_mag
516 # self.mbs.SetLoadParameter(self.lControl, 'load', force)
517 vSet = self.ddr.jointVelocityOffsetVector #nominal values
518 vSet[-2:] = action
519 # vSet[-1] = vSet[-2]
520 # vSet[-2:] = [2,2.5]
521 # print('action:', action)
522
523 self.mbs.SetObjectParameter(self.oKT, "jointVelocityOffsetVector", vSet)
524
525
526 #**classFunction: this function is overwrritten to collect output of simulation and map to self.state tuple
527 #**output: return bool done which contains information if system state is outside valid range
528 def Output2StateAndDone(self):
529
530 #+++++++++++++++++++++++++
531 #implemented for planar model only!
532 statesVector = self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates)[0:self.numberOfStates]
533 statesVectorGlob_t = self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates_t)[0:self.numberOfStates]
534
535 # vLoc = Rot2D(statesVector[2]).T @ statesVectorGlob_t[0:2]
536 # print('vLoc=',vLoc)
537 # statesVector_t = np.array([vLoc[1], statesVectorGlob_t[2]])
538 statesVector_t = statesVectorGlob_t #change to local in future!
539
540 self.state = list(statesVector) + list(statesVector_t)
541 if self.destinationStates:
542 self.state += list(self.destination)
543 self.state = tuple(self.state)
544
545 done = bool(
546 statesVector[0] < -self.dimGroundX
547 or statesVector[0] > self.dimGroundX
548 or statesVector[1] < -self.dimGroundY
549 or statesVector[1] > self.dimGroundY
550 or statesVector[2] < -self.maxRotations*2*pi
551 or statesVector[2] > self.maxRotations*2*pi
552 )
553
554 return done
555
556
557 #**classFunction: OVERRIDE this function to map the current state to mbs initial values
558 #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
559 def State2InitialValues(self):
560 #+++++++++++++++++++++++++++++++++++++++++++++
561 #states: x, y, phi, x_t, y_t, phi_t
562 initialValues = list(self.state[0:self.numberOfStates])+[0,0] #wheels do not initialize
563 initialValues_t = list(self.state[self.numberOfStates:2*self.numberOfStates])+[0,0]
564
565 if self.destinationStates:
566 if self.destination[0] != self.state[-2] or self.destination[1] != self.state[-1]:
567 # print('set new destination:', self.destination)
568 self.destination = self.state[-2:] #last two values are destination
569 self.mbs.SetObjectParameter(self.oDestination, 'referencePosition',
570 list(self.destination)+[0])
571
572 return [initialValues,initialValues_t]
573
574 def getReward(self):
575 X = self.dimGroundX
576 Y = self.dimGroundY
577 v = np.array([self.destination[0] - self.state[0], self.destination[1] - self.state[1]])
578 dist = NormL2(v)
579
580 phi = self.state[2]
581 localSpeed = Rot2D(phi).T @ [self.state[3],self.state[4]]
582 forwardSpeed = localSpeed[1]
583
584 reward = 1
585 #take power of 0.5 of dist to penalize small distances
586 #reward -= (dist/(0.5*NormL2([X,Y])))**0.5
587
588 #add penalty on rotations at a certain time (at beginning rotation may be needed...)
589 #reward -= 0.2*abs(self.state[5])/self.maxPlatformAngVel
590 # t = self.mbs.systemData.GetTime()
591 # if t > 4:
592 # fact = 1
593 # if t < 5: fact = 5-t
594 # reward -= fact*0.1*abs(self.state[5])/self.maxPlatformAngVel
595
596 #add penalty on reverse velocity: this supports solutions in forward direction!
597 # backwardMaxSpeed = 0.1
598 # if forwardSpeed < -backwardMaxSpeed*self.maxVelocity:
599 # reward -= abs(forwardSpeed)/self.maxVelocity+backwardMaxSpeed
600
601 reward -= 0.5*abs(forwardSpeed)
602
603 if dist > 0:
604 v0 = v*(1/dist)
605 vDir = Rot2D(phi) @ [0,1]
606 # print('v0=',v0,', dir=',vDir)
607 reward -= NormL2(vDir-v0)*0.5
608
609 # print('rew=', round(reward,3), ', vF=', round(0.5*abs(forwardSpeed),3),
610 # ', dir=', round(NormL2(vDir-v0)*0.5,4),
611 # 'v0=', v0, 'vDir=',vDir)
612
613 #reward -= max(0,abs(self.state[2])-pi)/(4*pi)
614 if reward < 0: reward = 0
615
616 # print('forwardSpeed',round(forwardSpeed/self.maxVelocity,3),
617 # ', reward',round(reward,3))
618
619 # print('reward=',reward, ', t=', self.mbs.systemData.GetTime())
620 return reward
621
622 #**classFunction: openAI gym interface function which is called to compute one step
623 def step(self, action):
624 err_msg = f"{action!r} ({type(action)}) invalid"
625 assert self.action_space.contains(action), err_msg
626 assert self.state is not None, "Call reset before using step method."
627
628 #++++++++++++++++++++++++++++++++++++++++++++++++++
629 #main steps:
630 [initialValues,initialValues_t] = self.State2InitialValues()
631 # print('initialValues_t:',initialValues_t)
632 # print(self.mbs)
633 qOriginal = self.mbs.systemData.GetODE2Coordinates(exu.ConfigurationType.Initial)
634 q_tOriginal = self.mbs.systemData.GetODE2Coordinates_t(exu.ConfigurationType.Initial)
635 initialValues[self.numberOfStates:] = qOriginal[self.numberOfStates:]
636 initialValues_t[self.numberOfStates:] = q_tOriginal[self.numberOfStates:]
637
638 self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
639 self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
640
641 self.MapAction2MBS(action)
642
643 #this may be time consuming for larger models!
644 self.IntegrateStep()
645
646 done = self.Output2StateAndDone()
647 if self.mbs.systemData.GetTime() > 16: #if it is too long, stop for now!
648 done = True
649
650 # print('state:', self.state, 'done: ', done)
651 #++++++++++++++++++++++++++++++++++++++++++++++++++
652 if not done:
653 reward = self.getReward()
654 elif self.steps_beyond_done is None:
655 self.steps_beyond_done = 0
656 reward = self.getReward()
657 else:
658
659 if self.steps_beyond_done == 0:
660 logger.warn(
661 "You are calling 'step()' even though this "
662 "environment has already returned done = True. You "
663 "should always call 'reset()' once you receive 'done = "
664 "True' -- any further steps are undefined behavior."
665 )
666 self.steps_beyond_done += 1
667 reward = 0.0
668
669 return np.array(self.state, dtype=np.float32), reward, done, {}
670
671
672
673
674# sys.exit()
675
676#%%+++++++++++++++++++++++++++++++++++++++++++++
677if __name__ == '__main__': #this is only executed when file is direct called in Python
678 import time
679
680 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
681 #use some learning algorithm:
682 #pip install stable_baselines3
683 from stable_baselines3 import A2C, SAC
684
685
686 # here the model is loaded (either for vectorized or scalar environment´using SAC or A2C).
687 def GetModel(myEnv, modelType='SAC'):
688 if modelType=='SAC':
689 model = SAC('MlpPolicy',
690 env=myEnv,
691 #learning_rate=8e-4,
692 device='cpu', #usually cpu is faster for this size of networks
693 #batch_size=128,
694 verbose=1)
695 elif modelType == 'A2C':
696 model = A2C('MlpPolicy',
697 myEnv,
698 device='cpu',
699 #n_steps=5,
700 # policy_kwargs = dict(activation_fn=torch.nn.ReLU,
701 # net_arch=dict(pi=[8]*2, vf=[8]*2)),
702 verbose=1)
703 else:
704 print('Please specify the modelType.')
705 raise ValueError
706
707 return model
708
709 # sys.exit()
710 #create model and do reinforcement learning
711 modelType='A2C'
712 modelName = 'openAIgymDDrobot_'+modelType
713 if False: #'scalar' environment:
714 env = RobotEnv()
715 #check if model runs:
716 #env.SetSolver(exu.DynamicSolverType.ExplicitMidpoint)
717 #env.SetSolver(exu.DynamicSolverType.RK44) #very acurate
718 # env.TestModel(numberOfSteps=2000, seed=42, sleepTime=0.02*0, useRenderer=True)
719 model = GetModel(env, modelType=modelType)
720 env.useRenderer = True
721 # env.render()
722 exu.StartRenderer()
723
724 ts = -time.time()
725 model.learn(total_timesteps=200000)
726
727 print('*** learning time total =',ts+time.time(),'***')
728
729 #save learned model
730
731 model.save("solution/" + modelName)
732 else:
733 import torch #stable-baselines3 is based on pytorch
734 n_cores= max(1,int(os.cpu_count()/2)) #n_cores should be number of real cores (not threads)
735 n_cores = 26 #16 #vecEnv can handle number of threads, while torch should rather use real cores
736 #torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
737 torch.set_num_threads(14) #seems to be ideal to match the size of subprocVecEnv
738
739 print('using',n_cores,'cores')
740
741 from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
742 vecEnv = SubprocVecEnv([RobotEnv for i in range(n_cores)])
743
744
745 #main learning task; training of double pendulum: with 20 cores 800 000 steps take in the continous case approximatly 18 minutes (SAC), discrete (A2C) takes 2 minutes.
746 model = GetModel(vecEnv, modelType=modelType)
747
748 ts = -time.time()
749 print('start learning of agent with {}'.format(str(model.policy).split('(')[0]))
750 # model.learn(total_timesteps=50000)
751 model.learn(total_timesteps=int(500_000),log_interval=500)
752 print('*** learning time total =',ts+time.time(),'***')
753
754 #save learned model
755 model.save("solution/" + modelName)
756
757 if False: #set True to visualize results
758 #%%++++++++++++++++++++++++++++++++++++++++++++++++++
759 #only load and test
760 if False:
761 modelName = 'openAIgymDDrobot_A2C_16M'
762 modelType='A2C'
763 if modelType == 'SAC':
764 model = SAC.load("solution/" + modelName)
765 else:
766 model = A2C.load("solution/" + modelName)
767
768 env = RobotEnv() #larger threshold for testing
769 solutionFile='solution/learningCoordinates.txt'
770 env.TestModel(numberOfSteps=800, seed=3, model=model, solutionFileName=solutionFile,
771 stopIfDone=False, useRenderer=False, sleepTime=0) #just compute solution file
772
773 #++++++++++++++++++++++++++++++++++++++++++++++
774 #visualize (and make animations) in exudyn:
775 from exudyn.interactive import SolutionViewer
776 env.SC.visualizationSettings.general.autoFitScene = False
777 solution = LoadSolutionFile(solutionFile)
778 SolutionViewer(env.mbs, solution, timeout = 0.01, rowIncrement=2) #loads solution file via name stored in mbs