reinforcementLearningRobot.py

You can view and download this file on Github: reinforcementLearningRobot.py

  1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2# This is an EXUDYN example
  3#
  4# Details:  Reinforcement learning example with stable-baselines3;
  5#           training a mobile platform with differential drives to meet target points
  6#           NOTE: frictional contact requires small enough step size to avoid artifacts!
  7#           GeneralContact works less stable than RollingDisc objects
  8#
  9# Author:    Johannes Gerstmayr
 10# Date:      2024-04-27
 11#
 12# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
 13#
 14#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 15
 16#NOTE: this model is using the stable-baselines3 version 1.7.0, which requires:
 17#pip install exudyn
 18#pip install pip install wheel==0.38.4 setuptools==66.0.0
 19#      => this downgrades setuptools to be able to install gym==0.21
 20#pip install stable-baselines3==1.7.0
 21#tested within a virtual environment: conda create -n venvP311 python=3.11 scipy matplotlib tqdm spyder-kernels=2.5 ipykernel psutil -y
 22
 23import sys
 24sys.exudynFast = True
 25
 26import exudyn as exu
 27from exudyn.utilities import *
 28from exudyn.robotics import *
 29from exudyn.artificialIntelligence import *
 30import math
 31
 32import copy
 33import os
 34os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
 35import torch
 36
 37##%% here the number of links can be changed. Note that for n < 3 the actuator
 38
 39#**function: Add model of differential drive robot (two wheels which can be actuated independently);
 40#            model is parameterized for kinematics, inertia parameters as well as for graphics;
 41#            the model is created as a minimum coordinate model to use it together with explicit integration;
 42#            a contact model is added if it does not exist;
 43def DifferentialDriveRobot(SC, mbs,
 44                           platformInertia = None,
 45                           wheelInertia = None,
 46                           platformPosition = [0,0,0], #this is the location of the platform ground centerpoint
 47                           wheelDistance = 0.4,        #wheel midpoint-to-midpoint distance
 48                           platformHeight = 0.1,
 49                           platformRadius = 0.22,
 50                           platformMass = 5,
 51                           platformGroundOffset = 0.02,
 52                           planarPlatform = True,
 53                           dimGroundX = 8, dimGroundY = 8,
 54                           gravity = [0,0,-9.81],
 55                           wheelRadius = 0.04,
 56                           wheelThickness = 0.01,
 57                           wheelMass = 0.05,
 58                           pControl = 0,
 59                           dControl = 0.02,
 60                           usePenalty = True, #use penalty formulation in case useGeneralContact=False
 61                           frictionProportionalZone = 0.025,
 62                           frictionCoeff = 1, stiffnessGround = 1e5,
 63                           gContact = None,
 64                           frictionIndexWheel = None, frictionIndexFree = None,
 65                           useGeneralContact = False #generalcontact shows large errors currently
 66                           ):
 67
 68    #add class which can be returned to enable user to access parameters
 69    class ddr: pass
 70
 71    #+++++++++++++++++++++++++++++++++++++++++++
 72    #create contact (if not provided)
 73    ddr.gGround = GraphicsDataCheckerBoard(normal= [0,0,1],
 74                                           size=dimGroundX, size2=dimGroundY, nTiles=8)
 75
 76    ddr.oGround= mbs.AddObject(ObjectGround(referencePosition= [0,0,0],
 77                                            visualization=VObjectGround(graphicsData= [ddr.gGround])))
 78    ddr.mGround = mbs.AddMarker(MarkerBodyRigid(bodyNumber=ddr.oGround))
 79
 80
 81    ddr.frictionCoeff = frictionCoeff
 82    ddr.stiffnessGround = stiffnessGround
 83    ddr.dampingGround = ddr.stiffnessGround*0.01
 84    if gContact == None and useGeneralContact:
 85        frictionIndexGround = 0
 86        frictionIndexWheel = 0
 87        frictionIndexFree = 1
 88
 89        ddr.gContact = mbs.AddGeneralContact()
 90        ddr.gContact.frictionProportionalZone = frictionProportionalZone
 91        #ddr.gContact.frictionVelocityPenalty = 1e4
 92
 93        ddr.gContact.SetFrictionPairings(np.diag([ddr.frictionCoeff,0])) #second index is for frictionless contact
 94        ddr.gContact.SetSearchTreeCellSize(numberOfCells=[4,4,1]) #just a few contact cells
 95
 96        #add ground to contact
 97        [meshPoints, meshTrigs] = GraphicsData2PointsAndTrigs(ddr.gGround) #could also use only 1 quad ...
 98
 99        ddr.gContact.AddTrianglesRigidBodyBased( rigidBodyMarkerIndex = ddr.mGround,
100                                            contactStiffness = ddr.stiffnessGround, contactDamping = ddr.dampingGround,
101                                            frictionMaterialIndex = frictionIndexGround,
102                                            pointList=meshPoints, triangleList=meshTrigs)
103
104
105    #+++++++++++++++++++++++++++++++++++++++++++
106    #create inertias (if not provided)
107    if wheelInertia == None:
108        ddr.iWheel = InertiaCylinder(wheelMass/(wheelRadius**2*pi*wheelThickness),
109                                 wheelThickness, wheelRadius, axis=0) #rotation about local X-axis
110    else:
111        ddr.iWheel = RigidBodyInertia(mass=wheelMass, inertiaTensorAtCOM=np.diag(wheelInertia))
112
113    if platformInertia == None:
114        ddr.iPlatform = InertiaCylinder(platformMass/(platformRadius**2*pi*platformHeight),
115                                 platformHeight, platformRadius, axis=0) #rotation about local X-axis
116        ddr.iPlatform = ddr.iPlatform.Translated([0,0,0.5*platformHeight+platformGroundOffset]) #put COM at mid of platform; but referencepoint is at ground level!
117    else:
118        ddr.iPlatform = RigidBodyInertia(mass=platformMass, inertiaTensorAtCOM=np.diag(platformInertia))
119
120    #+++++++++++++++++++++++++++++++++++++++++++
121    #create kinematic tree for wheeled robot
122    ddr.gPlatform = [GraphicsDataCylinder([0,0,platformGroundOffset], [0,0,platformHeight], platformRadius, color=color4steelblue, nTiles=64, addEdges=True, addFaces=False)]
123    ddr.gPlatform += [GraphicsDataCylinder([0,platformRadius*0.8,platformGroundOffset*1.5], [0,0,platformHeight], platformRadius*0.2, color=color4grey)]
124    ddr.gPlatform += [GraphicsDataBasis(length=0.1)]
125    ddr.gWheel = [GraphicsDataCylinder([-wheelThickness*0.5,0,0], [wheelThickness,0,0], wheelRadius, color=color4red, nTiles=32)]
126    ddr.gWheel += [GraphicsDataOrthoCubePoint([0,0,0],[wheelThickness*1.1,wheelRadius*1.3,wheelRadius*1.3], color=color4grey)]
127    ddr.gWheel += [GraphicsDataBasis(length=0.075)]
128
129    #create node for unknowns of KinematicTree
130    ddr.nJoints = 3+3+2 - 3*planarPlatform #6 (3 in planar case) for the platform and 2 for the wheels;
131    referenceCoordinates=[0.]*ddr.nJoints
132    referenceCoordinates[0:len(platformPosition)] = platformPosition
133    ddr.nKT = mbs.AddNode(NodeGenericODE2(referenceCoordinates=referenceCoordinates,
134                                               initialCoordinates=[0.]*ddr.nJoints,
135                                               initialCoordinates_t=[0.]*ddr.nJoints,
136                                               numberOfODE2Coordinates=ddr.nJoints))
137
138    ddr.linkMasses = []
139    ddr.gList = [] #list of graphics objects for links
140    ddr.linkCOMs = exu.Vector3DList()
141    ddr.linkInertiasCOM=exu.Matrix3DList()
142    ddr.jointTransformations=exu.Matrix3DList()
143    ddr.jointOffsets = exu.Vector3DList()
144    ddr.jointTypes = [exu.JointType.PrismaticX,exu.JointType.PrismaticY,exu.JointType.RevoluteZ]
145    ddr.linkParents = list(np.arange(3)-1)
146
147    ddr.platformIndex = 2
148    if not planarPlatform:
149        ddr.jointTypes+=[exu.JointType.PrismaticZ,exu.JointType.RevoluteY,exu.JointType.RevoluteX]
150        ddr.linkParents+=[2,3,4]
151        ddr.platformIndex = 5
152
153    #add data for wheels:
154    ddr.jointTypes += [exu.JointType.RevoluteX]*2
155    ddr.linkParents += [ddr.platformIndex]*2
156
157    #now create offsets, graphics list and inertia for all links
158    for i in range(len(ddr.jointTypes)):
159        ddr.jointTransformations.Append(np.eye(3))
160
161        if i < ddr.platformIndex:
162            ddr.gList += [[]]
163            ddr.jointOffsets.Append([0,0,0])
164            ddr.linkInertiasCOM.Append(np.zeros([3,3]))
165            ddr.linkCOMs.Append([0,0,0])
166            ddr.linkMasses.append(0)
167        elif i == ddr.platformIndex:
168            ddr.gList += [ddr.gPlatform]
169            ddr.jointOffsets.Append([0,0,0])
170            ddr.linkInertiasCOM.Append(ddr.iPlatform.InertiaCOM())
171            ddr.linkCOMs.Append(ddr.iPlatform.COM())
172            ddr.linkMasses.append(ddr.iPlatform.Mass())
173        else:
174            ddr.gList += [ddr.gWheel]
175            sign = -1+(i>ddr.platformIndex+1)*2
176            offZ = wheelRadius
177            if planarPlatform and (useGeneralContact or usePenalty):
178                offZ *= 0.999 #to ensure contact
179            ddr.jointOffsets.Append([sign*wheelDistance*0.5,0,offZ])
180            ddr.linkInertiasCOM.Append(ddr.iWheel.InertiaCOM())
181            ddr.linkCOMs.Append(ddr.iWheel.COM())
182            ddr.linkMasses.append(ddr.iWheel.Mass())
183
184
185    ddr.jointDControlVector = [0]*ddr.nJoints
186    ddr.jointPControlVector = [0]*ddr.nJoints
187    ddr.jointPositionOffsetVector = [0]*ddr.nJoints
188    ddr.jointVelocityOffsetVector = [0]*ddr.nJoints
189
190    ddr.jointPControlVector[-2:] = [pControl]*2
191    ddr.jointDControlVector[-2:] = [dControl]*2
192
193
194    #create KinematicTree
195    ddr.oKT = mbs.AddObject(ObjectKinematicTree(nodeNumber=ddr.nKT,
196                                      jointTypes=ddr.jointTypes,
197                                      linkParents=ddr.linkParents,
198                                      jointTransformations=ddr.jointTransformations,
199                                      jointOffsets=ddr.jointOffsets,
200                                      linkInertiasCOM=ddr.linkInertiasCOM,
201                                      linkCOMs=ddr.linkCOMs,
202                                      linkMasses=ddr.linkMasses,
203                                      baseOffset = [0.,0.,0.], gravity=gravity,
204                                      jointPControlVector=ddr.jointPControlVector,
205                                      jointDControlVector=ddr.jointDControlVector,
206                                      jointPositionOffsetVector=ddr.jointPositionOffsetVector,
207                                      jointVelocityOffsetVector=ddr.jointVelocityOffsetVector,
208                                      visualization=VObjectKinematicTree(graphicsDataList = ddr.gList)))
209
210    ddr.sPlatformPos = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
211                                                         storeInternal=True, outputVariableType=exu.OutputVariableType.Position))
212    ddr.sPlatformVel = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
213                                                         storeInternal=True, outputVariableType=exu.OutputVariableType.Velocity))
214    ddr.sPlatformAng = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
215                                                         storeInternal=True, outputVariableType=exu.OutputVariableType.Rotation))
216    ddr.sPlatformAngVel = mbs.AddSensor(SensorKinematicTree(objectNumber=ddr.oKT, linkNumber = ddr.platformIndex,
217                                                         storeInternal=True, outputVariableType=exu.OutputVariableType.AngularVelocity))
218
219    #create markers for wheels and add contact
220    ddr.mWheels = []
221    for i in range(2):
222        mWheel = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
223                                                        linkNumber=ddr.platformIndex+1+i,
224                                                        localPosition=[0,0,0]))
225        ddr.mWheels.append(mWheel)
226        if useGeneralContact:
227            ddr.gContact.AddSphereWithMarker(mWheel,
228                                             radius=wheelRadius,
229                                             contactStiffness=ddr.stiffnessGround,
230                                             contactDamping=ddr.dampingGround,
231                                             frictionMaterialIndex=frictionIndexWheel)
232            #for 3D platform, we need additional support points:
233            if not planarPlatform:
234                rY = platformRadius-platformGroundOffset
235                mPlatformFront = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
236                                                                linkNumber=ddr.platformIndex,
237                                                                localPosition=[0,rY,1.01*platformGroundOffset]))
238                mPlatformBack = mbs.AddMarker(MarkerKinematicTreeRigid(objectNumber=ddr.oKT,
239                                                                linkNumber=ddr.platformIndex,
240                                                                localPosition=[0,-rY,1.01*platformGroundOffset]))
241
242                fact = 1
243                ddr.gContact.AddSphereWithMarker(mPlatformFront,
244                                                 radius=platformGroundOffset,
245                                                 contactStiffness=ddr.stiffnessGround*fact,
246                                                 contactDamping=ddr.dampingGround*fact,
247                                                 frictionMaterialIndex=frictionIndexFree)
248                ddr.gContact.AddSphereWithMarker(mPlatformBack,
249                                                 radius=platformGroundOffset,
250                                                 contactStiffness=ddr.stiffnessGround*fact,
251                                                 contactDamping=ddr.dampingGround*fact,
252                                                 frictionMaterialIndex=frictionIndexFree)
253        else:
254            if not planarPlatform:
255                raise ValueError('DifferentialDriveRobot: if useGeneralContact==False then planarPlatform must be True!')
256            if not usePenalty:
257                ddr.oRollingDisc = mbs.AddObject(ObjectJointRollingDisc(markerNumbers=[ddr.mGround , mWheel],
258                                                     constrainedAxes=[i,1,1-planarPlatform], discRadius=wheelRadius,
259                                                     visualization=VObjectJointRollingDisc(discWidth=wheelThickness,color=color4blue)))
260            else:
261                nGeneric = mbs.AddNode(NodeGenericData(initialCoordinates=[0,0,0], numberOfDataCoordinates=3))
262                ddr.oRollingDisc = mbs.AddObject(ObjectConnectorRollingDiscPenalty(markerNumbers=[ddr.mGround , mWheel],
263                                                nodeNumber = nGeneric,
264                                                discRadius=wheelRadius,
265                                                useLinearProportionalZone=True,
266                                                dryFrictionProportionalZone=0.05,
267                                                contactStiffness=ddr.stiffnessGround,
268                                                contactDamping=ddr.dampingGround,
269                                                dryFriction=[ddr.frictionCoeff]*2,
270                                                visualization=VObjectConnectorRollingDiscPenalty(discWidth=wheelThickness,color=color4blue)))
271
272
273
274    #compute wheel velocities for given forward and rotation velocity
275    def WheelVelocities(forwardVelocity, vRotation, wheelRadius, wheelDistance):
276        vLeft = -forwardVelocity/wheelRadius
277        vRight = vLeft
278        vOff = vRotation*wheelDistance*0.5/wheelRadius
279        vLeft += vOff
280        vRight -= vOff
281        return [vLeft, vRight]
282
283    ddr.WheelVelocities = WheelVelocities
284
285    #add some useful graphics settings
286
287    SC.visualizationSettings.general.circleTiling=200
288    SC.visualizationSettings.general.drawCoordinateSystem=True
289    SC.visualizationSettings.loads.show=False
290    SC.visualizationSettings.bodies.show=True
291    SC.visualizationSettings.markers.show=False
292    SC.visualizationSettings.bodies.kinematicTree.frameSize = 0.1
293    SC.visualizationSettings.bodies.kinematicTree.showJointFrames = False
294
295    SC.visualizationSettings.nodes.show=True
296    # SC.visualizationSettings.nodes.showBasis =True
297    SC.visualizationSettings.nodes.drawNodesAsPoint = False
298    SC.visualizationSettings.nodes.defaultSize = 0 #must not be -1, otherwise uses autocomputed size
299
300    SC.visualizationSettings.openGL.multiSampling = 4
301    # SC.visualizationSettings.openGL.shadow = 0.25
302    #SC.visualizationSettings.openGL.light0position = [-3,3,10,0]
303    # SC.visualizationSettings.contact.showBoundingBoxes = True
304    SC.visualizationSettings.contact.showTriangles = True
305    SC.visualizationSettings.contact.showSpheres = True
306
307    return ddr
308
309#%%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
310#for testing with a simple trajectory:
311if False:
312    SC = exu.SystemContainer()
313    mbs = SC.AddSystem()
314
315    useGeneralContact = False
316    usePenalty = True
317    wheelRadius = 0.04
318    wheelDistance = 0.4
319    ddr = DifferentialDriveRobot(SC, mbs,useGeneralContact=useGeneralContact,
320                                 usePenalty=usePenalty, planarPlatform=True,
321                                 wheelRadius=wheelRadius, wheelDistance=wheelDistance)
322    mbs.Assemble()
323
324    #create some nice trajectory
325    def PreStepUserFunction(mbs, t):
326        vSet = ddr.jointVelocityOffsetVector #nominal values
327        vSet[-2:] = [0,0]
328
329        if t < 2:
330            vSet[-2:] = ddr.WheelVelocities(0.5, 0, wheelRadius, wheelDistance)
331        elif t < 3: pass
332        elif t < 4:
333            vSet[-2:] = ddr.WheelVelocities(0, 0.5*pi, wheelRadius, wheelDistance)
334        elif t < 5: pass
335        elif t < 7:
336            vSet[-2:] = ddr.WheelVelocities(-1, 0, wheelRadius, wheelDistance)
337        elif t < 8: pass
338        elif t < 9:
339            vSet[-2:] = ddr.WheelVelocities(0.5, 0.5*pi, wheelRadius, wheelDistance)
340
341        mbs.SetObjectParameter(ddr.oKT, "jointVelocityOffsetVector", vSet)
342
343        return True
344
345    mbs.SetPreStepUserFunction(PreStepUserFunction)
346
347    tEnd = 12 #tEnd = 0.8 for test suite
348    stepSize = 0.002 #h= 0.0002 for test suite
349    if useGeneralContact or usePenalty:
350        stepSize = 2e-4
351    # h*=0.1
352    # tEnd*=3
353    simulationSettings = exu.SimulationSettings()
354    simulationSettings.solutionSettings.solutionWritePeriod = 0.01
355    simulationSettings.solutionSettings.writeSolutionToFile = False
356    simulationSettings.solutionSettings.coordinatesSolutionFileName = 'solution/coordinatesSolution.txt'
357
358    simulationSettings.solutionSettings.sensorsWritePeriod = stepSize*10
359    # simulationSettings.displayComputationTime = True
360    # simulationSettings.displayStatistics = True
361    # simulationSettings.timeIntegration.verboseMode = 1
362    #simulationSettings.timeIntegration.simulateInRealtime = True
363    simulationSettings.timeIntegration.discontinuous.maxIterations = 1 #speed up
364    #simulationSettings.timeIntegration.discontinuous.iterationTolerance = 1e-5
365
366
367    exu.StartRenderer()
368    if 'renderState' in exu.sys:
369        SC.SetRenderState(exu.sys['renderState'])
370    mbs.WaitForUserToContinue()
371
372    simulationSettings.timeIntegration.numberOfSteps = int(tEnd/stepSize)
373    simulationSettings.timeIntegration.endTime = tEnd
374    simulationSettings.timeIntegration.explicitIntegration.computeEndOfStepAccelerations = False #increase performance, accelerations less accurate
375
376    SC.visualizationSettings.window.renderWindowSize=[1600,1024]
377    SC.visualizationSettings.general.graphicsUpdateInterval = 0.02
378
379    if useGeneralContact or usePenalty:
380        mbs.SolveDynamic(simulationSettings, solverType=exu.DynamicSolverType.ExplicitEuler)
381        # mbs.SolveDynamic(simulationSettings, solverType=exu.DynamicSolverType.ExplicitMidpoint)
382    else:
383        mbs.SolveDynamic(simulationSettings)
384
385    SC.WaitForRenderEngineStopFlag()
386    exu.StopRenderer() #safely close rendering window!
387
388    if True:
389        mbs.PlotSensor(ddr.sPlatformVel, components=[0,1],closeAll=True)
390        mbs.PlotSensor(ddr.sPlatformAngVel, components=[0,1,2])
391
392def Rot2D(phi):
393    return np.array([[np.cos(phi),-np.sin(phi)],
394                     [np.sin(phi), np.cos(phi)]])
395
396
397#%%++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
398class RobotEnv(OpenAIGymInterfaceEnv):
399
400    #**classFunction: OVERRIDE this function to create multibody system mbs and setup simulationSettings; call Assemble() at the end!
401    #                 you may also change SC.visualizationSettings() individually; kwargs may be used for special setup
402    def CreateMBS(self, SC, mbs, simulationSettings, **kwargs):
403
404        #%%++++++++++++++++++++++++++++++++++++++++++++++
405        self.mbs = mbs
406        self.SC = SC
407
408        self.dimGroundX = 4 #dimension of ground
409        self.dimGroundY = 4
410        self.maxRotations = 0.6 #maximum number before learning stops
411
412        self.maxWheelSpeed = 2*pi #2*pi = 1 revolution/second
413        self.wheelRadius = 0.04
414        self.wheelDistance = 0.4
415        self.maxVelocity = self.wheelRadius * self.maxWheelSpeed
416        self.maxPlatformAngVel = self.wheelRadius/(self.wheelDistance*0.5)*self.maxWheelSpeed
417
418        useGeneralContact = False
419        usePenalty = True
420        ddr = DifferentialDriveRobot(SC, mbs,useGeneralContact=useGeneralContact,
421                                     dimGroundX=self.dimGroundY, dimGroundY=self.dimGroundY,
422                                     usePenalty=usePenalty,
423                                     planarPlatform=True,
424                                     stiffnessGround=1e4,
425                                     wheelRadius=self.wheelRadius,
426                                     wheelDistance=self.wheelDistance)
427
428        self.ddr = ddr
429        self.oKT = ddr.oKT
430        self.nKT = ddr.nKT
431
432        #add graphics for desination
433        gDestination = GraphicsDataSphere(point=[0,0,0.05],radius = 0.02, color=color4red, nTiles=16)
434        self.oDestination = mbs.CreateGround(graphicsDataList=[gDestination])
435
436        mbs.Assemble()
437        self.stepSize = 1e-3
438        self.stepUpdateTime = 0.05
439
440        simulationSettings.solutionSettings.solutionWritePeriod = 0.1
441
442        writeSolutionToFile = False
443        if 'writeSolutionToFile' in kwargs:
444            writeSolutionToFile = kwargs['writeSolutionToFile']
445
446        useGraphics = False
447        if 'useGraphics' in kwargs:
448            useGraphics = kwargs['useGraphics']
449
450        simulationSettings.solutionSettings.writeSolutionToFile = writeSolutionToFile
451        simulationSettings.solutionSettings.writeSolutionToFile = False
452
453        simulationSettings.solutionSettings.coordinatesSolutionFileName = 'solution/coordinatesSolution.txt'
454
455        # simulationSettings.displayComputationTime = True
456        #simulationSettings.displayStatistics = True
457        #simulationSettings.timeIntegration.verboseMode = 1
458        #simulationSettings.timeIntegration.simulateInRealtime = True
459        simulationSettings.timeIntegration.discontinuous.maxIterations = 1 #speed up
460        #simulationSettings.timeIntegration.discontinuous.iterationTolerance = 1e-5
461
462
463        simulationSettings.timeIntegration.numberOfSteps = int(self.stepUpdateTime/self.stepSize)
464        simulationSettings.timeIntegration.endTime = self.stepUpdateTime
465        simulationSettings.timeIntegration.explicitIntegration.computeEndOfStepAccelerations = False #increase performance, accelerations less accurate
466
467        SC.visualizationSettings.window.renderWindowSize=[1600,1024]
468        SC.visualizationSettings.general.graphicsUpdateInterval = 0.02
469
470        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
471        self.randomInitializationValue = [0.4*self.dimGroundX, 0.4*self.dimGroundY, self.maxRotations*2*pi*0.99,
472                                          self.maxVelocity*0,self.maxVelocity*0,self.maxPlatformAngVel*0,
473                                          0.3*self.dimGroundX, 0.3*self.dimGroundY, #destination points
474                                          ]
475
476        #must return state size
477        self.numberOfStates = 3 #posx, posy, rot
478        self.destinationStates = 2 #define here, if destination is included in states
479        self.destination = [0.,0.] #default value for destination
480
481        return self.destinationStates + self.numberOfStates * 2 #the number of states (position/velocity that are used by learning algorithm)
482
483    #**classFunction: OVERRIDE this function to set up self.action_space and self.observation_space
484    def SetupSpaces(self):
485
486        high = np.array(
487            [
488                self.dimGroundX*0.5,
489                self.dimGroundY*0.5,
490                2*pi*self.maxRotations #10 full revolutions; no more should be needed for any task
491            ] +
492            [
493                np.finfo(np.float32).max,
494            ] * self.numberOfStates +
495            [self.dimGroundX*0.5,
496             self.dimGroundY*0.5]*(self.destinationStates>0)
497            ,
498            dtype=np.float32,
499        )
500
501
502        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
503        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
504        #for Env:
505
506        self.action_space = spaces.Box(low=np.array([-self.maxWheelSpeed,-self.maxWheelSpeed], dtype=np.float32),
507                                       high=np.array([self.maxWheelSpeed,self.maxWheelSpeed], dtype=np.float32), dtype=np.float32)
508
509        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
510        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
511
512
513    #**classFunction: this function is overwritten to map the action given by learning algorithm to the multibody system (environment)
514    def MapAction2MBS(self, action):
515        # force = action[0] * self.force_mag
516        # self.mbs.SetLoadParameter(self.lControl, 'load', force)
517        vSet = self.ddr.jointVelocityOffsetVector #nominal values
518        vSet[-2:] = action
519        # vSet[-1] = vSet[-2]
520        # vSet[-2:] = [2,2.5]
521        # print('action:', action)
522
523        self.mbs.SetObjectParameter(self.oKT, "jointVelocityOffsetVector", vSet)
524
525
526    #**classFunction: this function is overwrritten to collect output of simulation and map to self.state tuple
527    #**output: return bool done which contains information if system state is outside valid range
528    def Output2StateAndDone(self):
529
530        #+++++++++++++++++++++++++
531        #implemented for planar model only!
532        statesVector =  self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates)[0:self.numberOfStates]
533        statesVectorGlob_t =  self.mbs.GetNodeOutput(self.nKT, variableType=exu.OutputVariableType.Coordinates_t)[0:self.numberOfStates]
534
535        # vLoc = Rot2D(statesVector[2]).T @ statesVectorGlob_t[0:2]
536        # print('vLoc=',vLoc)
537        # statesVector_t = np.array([vLoc[1], statesVectorGlob_t[2]])
538        statesVector_t = statesVectorGlob_t #change to local in future!
539
540        self.state = list(statesVector) + list(statesVector_t)
541        if self.destinationStates:
542            self.state += list(self.destination)
543        self.state = tuple(self.state)
544
545        done = bool(
546            statesVector[0] < -self.dimGroundX
547            or statesVector[0] > self.dimGroundX
548            or statesVector[1] < -self.dimGroundY
549            or statesVector[1] > self.dimGroundY
550            or statesVector[2] < -self.maxRotations*2*pi
551            or statesVector[2] > self.maxRotations*2*pi
552            )
553
554        return done
555
556
557    #**classFunction: OVERRIDE this function to map the current state to mbs initial values
558    #**output: return [initialValues, initialValues\_t] where initialValues[\_t] are ODE2 vectors of coordinates[\_t] for the mbs
559    def State2InitialValues(self):
560        #+++++++++++++++++++++++++++++++++++++++++++++
561        #states: x, y, phi, x_t, y_t, phi_t
562        initialValues = list(self.state[0:self.numberOfStates])+[0,0] #wheels do not initialize
563        initialValues_t = list(self.state[self.numberOfStates:2*self.numberOfStates])+[0,0]
564
565        if self.destinationStates:
566            if self.destination[0] != self.state[-2] or self.destination[1] != self.state[-1]:
567                # print('set new destination:', self.destination)
568                self.destination = self.state[-2:] #last two values are destination
569                self.mbs.SetObjectParameter(self.oDestination, 'referencePosition',
570                                             list(self.destination)+[0])
571
572        return [initialValues,initialValues_t]
573
574    def getReward(self):
575        X = self.dimGroundX
576        Y = self.dimGroundY
577        v = np.array([self.destination[0] - self.state[0], self.destination[1] - self.state[1]])
578        dist = NormL2(v)
579
580        phi = self.state[2]
581        localSpeed = Rot2D(phi).T @ [self.state[3],self.state[4]]
582        forwardSpeed = localSpeed[1]
583
584        reward = 1
585        #take power of 0.5 of dist to penalize small distances
586        #reward -= (dist/(0.5*NormL2([X,Y])))**0.5
587
588        #add penalty on rotations at a certain time (at beginning rotation may be needed...)
589        #reward -= 0.2*abs(self.state[5])/self.maxPlatformAngVel
590        # t = self.mbs.systemData.GetTime()
591        # if t > 4:
592        #     fact = 1
593        #     if t < 5: fact  = 5-t
594        #     reward -= fact*0.1*abs(self.state[5])/self.maxPlatformAngVel
595
596        #add penalty on reverse velocity: this supports solutions in forward direction!
597        # backwardMaxSpeed = 0.1
598        # if forwardSpeed < -backwardMaxSpeed*self.maxVelocity:
599        #     reward -= abs(forwardSpeed)/self.maxVelocity+backwardMaxSpeed
600
601        reward -= 0.5*abs(forwardSpeed)
602
603        if dist > 0:
604            v0 = v*(1/dist)
605            vDir = Rot2D(phi) @ [0,1]
606            # print('v0=',v0,', dir=',vDir)
607            reward -= NormL2(vDir-v0)*0.5
608
609        # print('rew=', round(reward,3), ', vF=', round(0.5*abs(forwardSpeed),3),
610        #       ', dir=', round(NormL2(vDir-v0)*0.5,4),
611        #       'v0=', v0, 'vDir=',vDir)
612
613        #reward -= max(0,abs(self.state[2])-pi)/(4*pi)
614        if reward < 0: reward = 0
615
616        # print('forwardSpeed',round(forwardSpeed/self.maxVelocity,3),
617        #       ', reward',round(reward,3))
618
619        # print('reward=',reward, ', t=', self.mbs.systemData.GetTime())
620        return reward
621
622    #**classFunction: openAI gym interface function which is called to compute one step
623    def step(self, action):
624        err_msg = f"{action!r} ({type(action)}) invalid"
625        assert self.action_space.contains(action), err_msg
626        assert self.state is not None, "Call reset before using step method."
627
628        #++++++++++++++++++++++++++++++++++++++++++++++++++
629        #main steps:
630        [initialValues,initialValues_t] = self.State2InitialValues()
631        # print('initialValues_t:',initialValues_t)
632        # print(self.mbs)
633        qOriginal = self.mbs.systemData.GetODE2Coordinates(exu.ConfigurationType.Initial)
634        q_tOriginal = self.mbs.systemData.GetODE2Coordinates_t(exu.ConfigurationType.Initial)
635        initialValues[self.numberOfStates:] = qOriginal[self.numberOfStates:]
636        initialValues_t[self.numberOfStates:] = q_tOriginal[self.numberOfStates:]
637
638        self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
639        self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
640
641        self.MapAction2MBS(action)
642
643        #this may be time consuming for larger models!
644        self.IntegrateStep()
645
646        done = self.Output2StateAndDone()
647        if self.mbs.systemData.GetTime() > 16: #if it is too long, stop for now!
648            done = True
649
650        # print('state:', self.state, 'done: ', done)
651        #++++++++++++++++++++++++++++++++++++++++++++++++++
652        if not done:
653            reward = self.getReward()
654        elif self.steps_beyond_done is None:
655            self.steps_beyond_done = 0
656            reward = self.getReward()
657        else:
658
659            if self.steps_beyond_done == 0:
660                logger.warn(
661                    "You are calling 'step()' even though this "
662                    "environment has already returned done = True. You "
663                    "should always call 'reset()' once you receive 'done = "
664                    "True' -- any further steps are undefined behavior."
665                )
666            self.steps_beyond_done += 1
667            reward = 0.0
668
669        return np.array(self.state, dtype=np.float32), reward, done, {}
670
671
672
673
674# sys.exit()
675
676#%%+++++++++++++++++++++++++++++++++++++++++++++
677if __name__ == '__main__': #this is only executed when file is direct called in Python
678    import time
679
680    #%%++++++++++++++++++++++++++++++++++++++++++++++++++
681    #use some learning algorithm:
682    #pip install stable_baselines3
683    from stable_baselines3 import A2C, SAC
684
685
686    # here the model is loaded (either for vectorized or scalar environment´using SAC or A2C).
687    def GetModel(myEnv, modelType='SAC'):
688        if modelType=='SAC':
689            model = SAC('MlpPolicy',
690                   env=myEnv,
691                   #learning_rate=8e-4,
692                   device='cpu', #usually cpu is faster for this size of networks
693                   #batch_size=128,
694                   verbose=1)
695        elif modelType == 'A2C':
696            model = A2C('MlpPolicy',
697                    myEnv,
698                    device='cpu',
699                    #n_steps=5,
700                    # policy_kwargs = dict(activation_fn=torch.nn.ReLU,
701                    #  net_arch=dict(pi=[8]*2, vf=[8]*2)),
702                    verbose=1)
703        else:
704            print('Please specify the modelType.')
705            raise ValueError
706
707        return model
708
709    # sys.exit()
710    #create model and do reinforcement learning
711    modelType='A2C'
712    modelName = 'openAIgymDDrobot_'+modelType
713    if False: #'scalar' environment:
714        env = RobotEnv()
715        #check if model runs:
716        #env.SetSolver(exu.DynamicSolverType.ExplicitMidpoint)
717        #env.SetSolver(exu.DynamicSolverType.RK44) #very acurate
718        # env.TestModel(numberOfSteps=2000, seed=42, sleepTime=0.02*0, useRenderer=True)
719        model = GetModel(env, modelType=modelType)
720        env.useRenderer = True
721        # env.render()
722        exu.StartRenderer()
723
724        ts = -time.time()
725        model.learn(total_timesteps=200000)
726
727        print('*** learning time total =',ts+time.time(),'***')
728
729        #save learned model
730
731        model.save("solution/" + modelName)
732    else:
733        import torch #stable-baselines3 is based on pytorch
734        n_cores= max(1,int(os.cpu_count()/2)) #n_cores should be number of real cores (not threads)
735        n_cores = 26 #16 #vecEnv can handle number of threads, while torch should rather use real cores
736        #torch.set_num_threads(n_cores) #seems to be ideal to match the size of subprocVecEnv
737        torch.set_num_threads(14) #seems to be ideal to match the size of subprocVecEnv
738
739        print('using',n_cores,'cores')
740
741        from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
742        vecEnv = SubprocVecEnv([RobotEnv for i in range(n_cores)])
743
744
745        #main learning task;  training of double pendulum: with 20 cores 800 000 steps take in the continous case approximatly 18 minutes (SAC), discrete (A2C) takes 2 minutes.
746        model = GetModel(vecEnv, modelType=modelType)
747
748        ts = -time.time()
749        print('start learning of agent with {}'.format(str(model.policy).split('(')[0]))
750        # model.learn(total_timesteps=50000)
751        model.learn(total_timesteps=int(500_000),log_interval=500)
752        print('*** learning time total =',ts+time.time(),'***')
753
754        #save learned model
755        model.save("solution/" + modelName)
756
757    if False: #set True to visualize results
758        #%%++++++++++++++++++++++++++++++++++++++++++++++++++
759        #only load and test
760        if False:
761            modelName = 'openAIgymDDrobot_A2C_16M'
762            modelType='A2C'
763            if modelType == 'SAC':
764                model = SAC.load("solution/" + modelName)
765            else:
766                model = A2C.load("solution/" + modelName)
767
768        env = RobotEnv() #larger threshold for testing
769        solutionFile='solution/learningCoordinates.txt'
770        env.TestModel(numberOfSteps=800, seed=3, model=model, solutionFileName=solutionFile,
771                      stopIfDone=False, useRenderer=False, sleepTime=0) #just compute solution file
772
773        #++++++++++++++++++++++++++++++++++++++++++++++
774        #visualize (and make animations) in exudyn:
775        from exudyn.interactive import SolutionViewer
776        env.SC.visualizationSettings.general.autoFitScene = False
777        solution = LoadSolutionFile(solutionFile)
778        SolutionViewer(env.mbs, solution, timeout = 0.01, rowIncrement=2) #loads solution file via name stored in mbs