testGymDoublePendulumEnv.py
You can view and download this file on Github: testGymDoublePendulumEnv.py
1#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2# This is an EXUDYN example
3#
4# Details: This file serves as an input to testGymDoublePendulum.py
5#
6# Author: Johannes Gerstmayr
7# Date: 2022-05-18
8# Update: 2023-05-20: derive from gym.Env to ensure compatibility with newer stable-baselines3
9#
10# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
11#
12#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
13
14import exudyn as exu
15from exudyn.utilities import *
16import math
17
18import math
19from typing import Optional, Union
20
21import numpy as np
22
23import gym
24from gym import logger, spaces, Env
25from gym.error import DependencyNotInstalled
26
27
28class DoublePendulumEnv(Env):
29
30 #metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
31 metadata = {"render_modes": ["human"], "render_fps": 50}
32
33 def __init__(self, thresholdFactor = 1.):
34
35 self.SC = exu.SystemContainer()
36 self.mbs = self.SC.AddSystem()
37
38
39 #%%+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
40 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
41 self.gravity = 9.81
42 self.length = 1.
43 self.masscart = 1.
44 self.massarm = 0.1
45 self.total_mass = self.massarm + self.masscart
46 self.armInertia = self.length**2*0.5*self.massarm
47 self.force_mag = 10.0
48 self.stepUpdateTime = 0.02 # seconds between state updates
49
50 # Angle at which to fail the episode
51 self.theta_threshold_radians = thresholdFactor* 12 * 2 * math.pi / 360
52 self.x_threshold = thresholdFactor*2.4
53
54 high = np.array(
55 [
56 self.x_threshold * 2,
57 np.finfo(np.float32).max,
58 self.theta_threshold_radians * 2,
59 np.finfo(np.float32).max,
60 self.theta_threshold_radians * 2,
61 np.finfo(np.float32).max,
62 ],
63 dtype=np.float32,
64 )
65
66 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
67 #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
68 #for Env:
69 self.action_space = spaces.Discrete(2)
70 self.observation_space = spaces.Box(-high, high, dtype=np.float32)
71 #+++++++++++++++++++++++++++++++++++++++++++++++++++++
72 self.state = None
73 self.rendererRunning=None
74 self.useRenderer = False #turn this on if needed
75
76 background = GraphicsDataCheckerBoard(point= [0,0,0], normal= [0,0,1], size=4)
77
78 oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0], #x-pos,y-pos,angle
79 visualization=VObjectGround(graphicsData= [background])))
80 nGround=self.mbs.AddNode(NodePointGround())
81
82 gCart = GraphicsDataOrthoCubePoint(size=[0.5*self.length, 0.1*self.length, 0.1*self.length],
83 color=color4dodgerblue)
84 self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
85 oCart = self.mbs.AddObject(RigidBody2D(physicsMass=self.masscart,
86 physicsInertia=0.1*self.masscart, #not needed
87 nodeNumber=self.nCart,
88 visualization=VObjectRigidBody2D(graphicsData= [gCart])))
89 mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
90
91 gArm1 = GraphicsDataOrthoCubePoint(size=[0.1*self.length, self.length, 0.1*self.length], color=color4red)
92 self.nArm1 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
93 oArm1 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
94 physicsInertia=self.armInertia, #not included in original paper
95 nodeNumber=self.nArm1,
96 visualization=VObjectRigidBody2D(graphicsData= [gArm1])))
97
98 mArm1COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm1))
99 mArm1JointA = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0,-0.5*self.length,0]))
100 mArm1JointB = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm1, localPosition=[0, 0.5*self.length,0]))
101
102 gArm2 = GraphicsDataOrthoCubePoint(size=[0.1*self.length, self.length, 0.1*self.length], color=color4red)
103 self.nArm2 = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,1.5*self.length,0]));
104 oArm2 = self.mbs.AddObject(RigidBody2D(physicsMass=self.massarm,
105 physicsInertia=self.armInertia, #not included in original paper
106 nodeNumber=self.nArm2,
107 visualization=VObjectRigidBody2D(graphicsData= [gArm2])))
108
109 mArm2COM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nArm2))
110 mArm2Joint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oArm2, localPosition=[0,-0.5*self.length,0]))
111
112 mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
113 mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
114 mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))
115
116 #gravity
117 self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-self.masscart*self.gravity,0]))
118 self.mbs.AddLoad(Force(markerNumber=mArm1COM, loadVector=[0,-self.massarm*self.gravity,0]))
119 self.mbs.AddLoad(Force(markerNumber=mArm2COM, loadVector=[0,-self.massarm*self.gravity,0]))
120
121 #control force
122 self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))
123
124 #joints and constraints:
125 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mArm1JointA]))
126 self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mArm1JointB, mArm2Joint]))
127 self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))
128
129
130
131
132 #%%++++++++++++++++++++++++
133 self.mbs.Assemble() #computes initial vector
134
135 self.simulationSettings = exu.SimulationSettings() #takes currently set values or default values
136
137
138 self.simulationSettings.timeIntegration.numberOfSteps = 1
139 self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
140 self.simulationSettings.timeIntegration.verboseMode = 0
141 self.simulationSettings.solutionSettings.writeSolutionToFile = False
142 #self.simulationSettings.timeIntegration.simulateInRealtime = True
143
144 self.simulationSettings.timeIntegration.newton.useModifiedNewton = True
145
146 self.SC.visualizationSettings.general.drawWorldBasis=True
147 self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz
148
149 self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"
150
151 self.dynamicSolver = exudyn.MainSolverImplicitSecondOrder()
152 self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings)
153 self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings) #to initialize all data
154
155 def integrateStep(self, force):
156 #exudyn simulation part
157 #index 2 solver
158 self.mbs.SetLoadParameter(self.lControl, 'load', force)
159
160 #progress integration time
161 currentTime = self.simulationSettings.timeIntegration.endTime
162 self.simulationSettings.timeIntegration.startTime = currentTime
163 self.simulationSettings.timeIntegration.endTime = currentTime+self.stepUpdateTime
164
165 # exu.SolveDynamic(self.mbs, self.simulationSettings, solverType=exu.DynamicSolverType.TrapezoidalIndex2,
166 # updateInitialValues=True) #use final value as new initial values
167 self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings)
168 self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings)
169 currentState = self.mbs.systemData.GetSystemState() #get current values
170 self.mbs.systemData.SetSystemState(systemStateList=currentState,
171 configuration = exu.ConfigurationType.Initial)
172 self.mbs.systemData.SetODE2Coordinates_tt(coordinates = self.mbs.systemData.GetODE2Coordinates_tt(),
173 configuration = exu.ConfigurationType.Initial)
174
175
176
177 def step(self, action):
178 err_msg = f"{action!r} ({type(action)}) invalid"
179 assert self.action_space.contains(action), err_msg
180 assert self.state is not None, "Call reset before using step method."
181 self.setState2InitialValues()
182
183 force = self.force_mag if action == 1 else -self.force_mag
184
185 #++++++++++++++++++++++++++++++++++++++++++++++++++
186 #++++++++++++++++++++++++++++++++++++++++++++++++++
187 self.integrateStep(force)
188 #+++++++++++++++++++++++++
189 #compute some output:
190 cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
191 arm1Angle = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates)[2]
192 arm2Angle = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates)[2]
193 cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
194 arm1Angle_t = self.mbs.GetNodeOutput(self.nArm1, variableType=exu.OutputVariableType.Coordinates_t)[2]
195 arm2Angle_t = self.mbs.GetNodeOutput(self.nArm2, variableType=exu.OutputVariableType.Coordinates_t)[2]
196
197 #finally write updated state:
198 self.state = (cartPosX, cartPosX_t, arm1Angle, arm1Angle_t, arm2Angle, arm2Angle_t)
199 #++++++++++++++++++++++++++++++++++++++++++++++++++
200 #++++++++++++++++++++++++++++++++++++++++++++++++++
201
202 done = bool(
203 cartPosX < -self.x_threshold
204 or cartPosX > self.x_threshold
205 or arm1Angle < -self.theta_threshold_radians
206 or arm1Angle > self.theta_threshold_radians
207 or arm2Angle < -self.theta_threshold_radians
208 or arm2Angle > self.theta_threshold_radians
209 )
210
211 if not done:
212 reward = 1.0
213 elif self.steps_beyond_done is None:
214 # Arm1 just fell!
215 self.steps_beyond_done = 0
216 reward = 1.0
217 else:
218 if self.steps_beyond_done == 0:
219 logger.warn(
220 "You are calling 'step()' even though this "
221 "environment has already returned done = True. You "
222 "should always call 'reset()' once you receive 'done = "
223 "True' -- any further steps are undefined behavior."
224 )
225 self.steps_beyond_done += 1
226 reward = 0.0
227
228 return np.array(self.state, dtype=np.float32), reward, done, {}
229
230 def setState2InitialValues(self):
231 #+++++++++++++++++++++++++++++++++++++++++++++
232 #set specific initial state:
233 (xCart, xCart_t, phiArm1, phiArm1_t, phiArm2, phiArm2_t) = self.state
234
235 initialValues = np.zeros(9) #model has 3*3 = 9 redundant states
236 initialValues_t = np.zeros(9)
237
238 #build redundant cordinates from self.state
239 initialValues[0] = xCart
240 initialValues[3+0] = xCart - 0.5*self.length * sin(phiArm1)
241 initialValues[3+1] = 0.5*self.length * (cos(phiArm1)-1)
242 initialValues[3+2] = phiArm1
243 initialValues[6+0] = xCart - 1.*self.length * sin(phiArm1) - 0.5*self.length * sin(phiArm2)
244 initialValues[6+1] = self.length * cos(phiArm1) + 0.5*self.length * cos(phiArm2) - 1.5*self.length
245 initialValues[6+2] = phiArm2
246
247 initialValues_t[0] = xCart_t
248 initialValues_t[3+0] = xCart_t - phiArm1_t*0.5*self.length * cos(phiArm1)
249 initialValues_t[3+1] = -0.5*self.length * sin(phiArm1) * phiArm1_t
250 initialValues_t[3+2] = phiArm1_t
251 initialValues_t[6+0] = xCart_t - phiArm1_t*1.*self.length * cos(phiArm1) - phiArm2_t*0.5*self.length * cos(phiArm2)
252 initialValues_t[6+1] = -self.length * sin(phiArm1) * phiArm1_t - 0.5*self.length * sin(phiArm2) * phiArm2_t
253 initialValues_t[6+2] = phiArm2_t
254
255 self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
256 self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)
257
258
259 def reset(
260 self,
261 *,
262 seed: Optional[int] = None,
263 return_info: bool = False,
264 options: Optional[dict] = None,
265 ):
266 #super().reset(seed=seed)
267 self.state = np.random.uniform(low=-0.05, high=0.05, size=(6,))
268 self.steps_beyond_done = None
269
270
271 #+++++++++++++++++++++++++++++++++++++++++++++
272 #set state into initial values:
273 self.setState2InitialValues()
274
275 self.simulationSettings.timeIntegration.endTime = 0
276 self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions
277
278 #+++++++++++++++++++++++++++++++++++++++++++++
279
280 if not return_info:
281 return np.array(self.state, dtype=np.float32)
282 else:
283 return np.array(self.state, dtype=np.float32), {}
284
285 def render(self, mode="human"):
286 if self.rendererRunning==None and self.useRenderer:
287 exu.StartRenderer()
288 self.rendererRunning = True
289
290 def close(self):
291 self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings)
292 if self.rendererRunning==True:
293 # SC.WaitForRenderEngineStopFlag()
294 exu.StopRenderer() #safely close rendering window!
295
296
297
298# #+++++++++++++++++++++++++++++++++++++++++++++
299# #reset:
300# self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
301# self.mbs.systemData.SetODE2Coordinates_t(initialValues, exu.ConfigurationType.Initial)
302# self.mbs.systemData.SetODE2Coordinates_tt(initialValues, exu.ConfigurationType.Initial)