testGymDoublePendulumEnv.py

You can view and download this file on Github: testGymDoublePendulumEnv.py

  1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2# This is an EXUDYN example
  3#
  4# Details:  This file serves as an input to testGymDoublePendulum.py
  5#
  6# Author:   Johannes Gerstmayr
  7# Date:     2022-05-18
  8# Update:   2023-05-20: derive from gym.Env to ensure compatibility with newer stable-baselines3
  9#
 10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
 11#
 12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 13
 14import exudyn as exu
 15from exudyn.utilities import *
 16import math
 17
 18import math
 19from typing import Optional, Union
 20
 21import numpy as np
 22
 23import gym
 24from gym import logger, spaces, Env
 25from gym.error import DependencyNotInstalled
 26
 27
 28class DoublePendulumEnv(Env):
 29
 30    #metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
 31    metadata = {"render_modes": ["human"], "render_fps": 50}
 32
 33    def __init__(self, thresholdFactor = 1.):
 34
 35        self.SC = exu.SystemContainer()
 36        self.mbs = self.SC.AddSystem()
 37
 38
 39        #%%+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 40        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
 41        self.gravity = 9.81
 42        self.length = 1.
 43        self.masscart = 1.
 44        self.massarm = 0.1
 45        self.total_mass = self.massarm + self.masscart
 46        self.armInertia = self.length**2*0.5*self.massarm
 47        self.force_mag = 10.0
 48        self.stepUpdateTime = 0.02  # seconds between state updates
 49
 50        # Angle at which to fail the episode
 51        self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
 52        self.x_threshold = thresholdFactor*2.4
 53
 54        high = np.array(
 55            [
 56                self.x_threshold * 2,
 57                np.finfo(np.float32).max,
 58                self.theta_threshold_radians * 2,
 59                np.finfo(np.float32).max,
 60                self.theta_threshold_radians * 2,
 61                np.finfo(np.float32).max,
 62            ],
 63            dtype=np.float32,
 64        )
 65
 66        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
 67        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
 68        #for Env:
 69        self.action_space = spaces.Discrete(2)
 70        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
 71        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
 72        self.state = None
 73        self.rendererRunning=None
 74        self.useRenderer = False #turn this on if needed
 75
 76        background = GraphicsDataCheckerBoard(point= [0,0,0], normal= [0,0,1], size=4)
 77
 78        oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0],  #x-pos,y-pos,angle
 79                                           visualization=VObjectGround(graphicsData= [background])))
 80        nGround=self.mbs.AddNode(NodePointGround())
 81
 82        gCart = GraphicsDataOrthoCubePoint(size=[0.5*self.length, 0.1*self.length, 0.1*self.length],
 83                                           color=color4dodgerblue)
 84        self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
 85        oCart = self.mbs.AddObject(RigidBody2D(physicsMass=self.masscart,
 86                                          physicsInertia=0.1*self.masscart, #not needed
 87                                          nodeNumber=self.nCart,
 88                                          visualization=VObjectRigidBody2D(graphicsData= [gCart])))
 89        mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
 90
 91        gArm1 = GraphicsDataOrthoCubePoint(size=[0.1*self.length, self.length, 0.1*self.length], color=color4red)
 92        self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
 93        oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
 94                                          physicsInertia=self.armInertia, #not included in original paper
 95                                          nodeNumber=self.nArm1,
 96                                          visualization=VObjectRigidBody2D(graphicsData= [gArm1])))
 97
 98        mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
 99        mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
100        mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
101
102        gArm2 = GraphicsDataOrthoCubePoint(size=[0.1*self.length, self.length, 0.1*self.length], color=color4red)
103        self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
104        oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
105                                          physicsInertia=self.armInertia, #not included in original paper
106                                          nodeNumber=self.nArm2,
107                                          visualization=VObjectRigidBody2D(graphicsData= [gArm2])))
108
109        mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
110        mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
111
112        mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
113        mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
114        mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
115
116        #gravity
117        self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-self.masscart*self.gravity,0]))
118        self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-self.massarm*self.gravity,0]))
119        self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-self.massarm*self.gravity,0]))
120
121        #control force
122        self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
123
124        #joints and constraints:
125        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
126        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
127        self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
128
129
130
131
132        #%%++++++++++++++++++++++++
133        self.mbs.Assemble() #computes initial vector
134
135        self.simulationSettings = exu.SimulationSettings() #takes currently set values or default values
136
137
138        self.simulationSettings.timeIntegration.numberOfSteps = 1
139        self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
140        self.simulationSettings.timeIntegration.verboseMode = 0
141        self.simulationSettings.solutionSettings.writeSolutionToFile = False
142        #self.simulationSettings.timeIntegration.simulateInRealtime = True
143
144        self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
145
146        self.SC.visualizationSettings.general.drawWorldBasis=True
147        self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
148
149        self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
150
151        self.dynamicSolver = exudyn.MainSolverImplicitSecondOrder()
152        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings)
153        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings) #to initialize all data
154
155    def integrateStep(self, force):
156        #exudyn simulation part
157        #index 2 solver
158        self.mbs.SetLoadParameter(self.lControl, 'load', force)
159
160        #progress integration time
161        currentTime = self.simulationSettings.timeIntegration.endTime
162        self.simulationSettings.timeIntegration.startTime = currentTime
163        self.simulationSettings.timeIntegration.endTime = currentTime+self.stepUpdateTime
164
165        # exu.SolveDynamic(self.mbs, self.simulationSettings, solverType=exu.DynamicSolverType.TrapezoidalIndex2,
166        #                  updateInitialValues=True) #use final value as new initial values
167        self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings)
168        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings)
169        currentState = self.mbs.systemData.GetSystemState() #get current values
170        self.mbs.systemData.SetSystemState(systemStateList=currentState,
171                                        configuration = exu.ConfigurationType.Initial)
172        self.mbs.systemData.SetODE2Coordinates_tt(coordinates = self.mbs.systemData.GetODE2Coordinates_tt(),
173                                                configuration = exu.ConfigurationType.Initial)
174
175
176
177    def step(self, action):
178        err_msg = f"{action!r} ({type(action)}) invalid"
179        assert self.action_space.contains(action), err_msg
180        assert self.state is not None, "Call reset before using step method."
181        self.setState2InitialValues()
182
183        force = self.force_mag if action == 1 else -self.force_mag
184
185        #++++++++++++++++++++++++++++++++++++++++++++++++++
186        #++++++++++++++++++++++++++++++++++++++++++++++++++
187        self.integrateStep(force)
188        #+++++++++++++++++++++++++
189        #compute some output:
190        cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
191        arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
192        arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
193        cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
194        arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
195        arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
196
197        #finally write updated state:
198        self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
199        #++++++++++++++++++++++++++++++++++++++++++++++++++
200        #++++++++++++++++++++++++++++++++++++++++++++++++++
201
202        done = bool(
203            cartPosX < -self.x_threshold
204            or cartPosX > self.x_threshold
205            or arm1Angle < -self.theta_threshold_radians
206            or arm1Angle > self.theta_threshold_radians
207            or arm2Angle < -self.theta_threshold_radians
208            or arm2Angle > self.theta_threshold_radians
209        )
210
211        if not done:
212            reward = 1.0
213        elif self.steps_beyond_done is None:
214            # Arm1 just fell!
215            self.steps_beyond_done = 0
216            reward = 1.0
217        else:
218            if self.steps_beyond_done == 0:
219                logger.warn(
220                    "You are calling 'step()' even though this "
221                    "environment has already returned done = True. You "
222                    "should always call 'reset()' once you receive 'done = "
223                    "True' -- any further steps are undefined behavior."
224                )
225            self.steps_beyond_done += 1
226            reward = 0.0
227
228        return np.array(self.state, dtype=np.float32), reward, done, {}
229
230    def setState2InitialValues(self):
231        #+++++++++++++++++++++++++++++++++++++++++++++
232        #set specific initial state:
233        (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state
234
235        initialValues = np.zeros(9) #model has 3*3 = 9  redundant states
236        initialValues_t = np.zeros(9)
237
238        #build redundant cordinates from self.state
239        initialValues[0] = xCart
240        initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
241        initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
242        initialValues[3+2] = phiArm1
243        initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
244        initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
245        initialValues[6+2] = phiArm2
246
247        initialValues_t[0] = xCart_t
248        initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
249        initialValues_t[3+1] = -0.5*self.length * sin(phiArm1)  * phiArm1_t
250        initialValues_t[3+2] = phiArm1_t
251        initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
252        initialValues_t[6+1] = -self.length * sin(phiArm1)  * phiArm1_t - 0.5*self.length * sin(phiArm2)  * phiArm2_t
253        initialValues_t[6+2] = phiArm2_t
254
255        self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
256        self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
257
258
259    def reset(
260        self,
261        *,
262        seed: Optional[int] = None,
263        return_info: bool = False,
264        options: Optional[dict] = None,
265    ):
266        #super().reset(seed=seed)
267        self.state = np.random.uniform(low=-0.05, high=0.05, size=(6,))
268        self.steps_beyond_done = None
269
270
271        #+++++++++++++++++++++++++++++++++++++++++++++
272        #set state into initial values:
273        self.setState2InitialValues()
274
275        self.simulationSettings.timeIntegration.endTime = 0
276        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions
277
278        #+++++++++++++++++++++++++++++++++++++++++++++
279
280        if not return_info:
281            return np.array(self.state, dtype=np.float32)
282        else:
283            return np.array(self.state, dtype=np.float32), {}
284
285    def render(self, mode="human"):
286        if self.rendererRunning==None and self.useRenderer:
287            exu.StartRenderer()
288            self.rendererRunning = True
289
290    def close(self):
291        self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings)
292        if self.rendererRunning==True:
293            # SC.WaitForRenderEngineStopFlag()
294            exu.StopRenderer() #safely close rendering window!
295
296
297
298# #+++++++++++++++++++++++++++++++++++++++++++++
299# #reset:
300# self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
301# self.mbs.systemData.SetODE2Coordinates_t(initialValues, exu.ConfigurationType.Initial)
302# self.mbs.systemData.SetODE2Coordinates_tt(initialValues, exu.ConfigurationType.Initial)