testGymCartpoleEnv.py

You can view and download this file on Github: testGymCartpoleEnv.py
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# This is an EXUDYN example
#
# Details:  This file serves as an input to testGymCartpole.py
#
# Author:   Johannes Gerstmayr, Grzegorz Orzechowski
# Date:     2022-05-17
# Update:   2023-05-20: derive from gym.Env to ensure compatibility with newer stable-baselines3
#
# Copyright:This file is part of Exudyn. Exudyn is free software. You can redistribute it and/or modify it under the terms of the Exudyn license. See 'LICENSE.txt' for more details.
#
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

import exudyn as exu
from exudyn.utilities import *
import math

import math
from typing import Optional, Union

import numpy as np

import gym
from gym import logger, spaces, Env
from gym.error import DependencyNotInstalled


class CartPoleEnv(Env):

    #metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50}
    metadata = {"render_modes": ["human"], "render_fps": 50}

    def __init__(self, thresholdFactor = 1., forceFactor = 1.):

        self.SC = exu.SystemContainer()
        self.mbs = self.SC.AddSystem()


        #%%+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
        #take variables from cartpole example in openAIgym
        self.gravity = 9.8
        self.masscart = 1.0
        self.masspole = 0.1
        self.total_mass = self.masspole + self.masscart
        self.lengthHalf = 0.5  # actually half the pole's self.length
        self.length = self.lengthHalf*2
        self.polemass_length = self.masspole * self.length
        self.force_mag = 10.0*forceFactor
        self.stepUpdateTime = 0.02  # seconds between state updates
        self.kinematics_integrator = "euler"

        # Angle at which to fail the episode
        self.theta_threshold_radians = thresholdFactor*12 * 2 * math.pi / 360
        self.x_threshold = thresholdFactor*2.4

        high = np.array(
            [
                self.x_threshold * 2,
                np.finfo(np.float32).max,
                self.theta_threshold_radians * 2,
                np.finfo(np.float32).max,
            ],
            dtype=np.float32,
        )

        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
        #see https://github.com/openai/gym/blob/64b4b31d8245f6972b3d37270faf69b74908a67d/gym/core.py#L16
        #for Env:
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
        #+++++++++++++++++++++++++++++++++++++++++++++++++++++
        self.state = None
        self.rendererRunning=None
        self.useRenderer = False #turn this on if needed

        background = GraphicsDataCheckerBoard(point= [0,0,0], normal= [0,0,1], size=4)

        oGround=self.mbs.AddObject(ObjectGround(referencePosition= [0,0,0],  #x-pos,y-pos,angle
                                           visualization=VObjectGround(graphicsData= [background])))
        nGround=self.mbs.AddNode(NodePointGround())

        gCart = GraphicsDataOrthoCubePoint(size=[0.5*self.length, 0.1*self.length, 0.1*self.length],
                                           color=color4dodgerblue)
        self.nCart = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0,0]));
        oCart = self.mbs.AddObject(RigidBody2D(physicsMass=self.masscart,
                                          physicsInertia=0.1*self.masscart, #not needed
                                          nodeNumber=self.nCart,
                                          visualization=VObjectRigidBody2D(graphicsData= [gCart])))

        gPole = GraphicsDataOrthoCubePoint(size=[0.1*self.length, self.length, 0.1*self.length], color=color4red)
        self.nPole = self.mbs.AddNode(Rigid2D(referenceCoordinates=[0,0.5*self.length,0]));
        oPole = self.mbs.AddObject(RigidBody2D(physicsMass=self.masspole,
                                          physicsInertia=1e-6, #not included in original paper
                                          nodeNumber=self.nPole,
                                          visualization=VObjectRigidBody2D(graphicsData= [gPole])))

        mCartCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nCart))
        mPoleCOM = self.mbs.AddMarker(MarkerNodePosition(nodeNumber=self.nPole))
        mPoleJoint = self.mbs.AddMarker(MarkerBodyPosition(bodyNumber=oPole, localPosition=[0,-0.5*self.length,0]))

        mCartCoordX = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=0))
        mCartCoordY = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=self.nCart, coordinate=1))
        mGroundNode = self.mbs.AddMarker(MarkerNodeCoordinate(nodeNumber=nGround, coordinate=0))

        #gravity
        self.mbs.AddLoad(Force(markerNumber=mCartCOM, loadVector=[0,-self.masscart*self.gravity,0]))
        self.mbs.AddLoad(Force(markerNumber=mPoleCOM, loadVector=[0,-self.masspole*self.gravity,0]))

        #control force
        self.lControl = self.mbs.AddLoad(LoadCoordinate(markerNumber=mCartCoordX, load=1.))

        #constraints:
        self.mbs.AddObject(RevoluteJoint2D(markerNumbers=[mCartCOM, mPoleJoint]))
        self.mbs.AddObject(CoordinateConstraint(markerNumbers=[mCartCoordY, mGroundNode]))




        #%%++++++++++++++++++++++++
        self.mbs.Assemble() #computes initial vector

        self.simulationSettings = exu.SimulationSettings() #takes currently set values or default values


        self.simulationSettings.timeIntegration.numberOfSteps = 1
        self.simulationSettings.timeIntegration.endTime = 0 #will be overwritten in step
        self.simulationSettings.timeIntegration.verboseMode = 0
        self.simulationSettings.solutionSettings.writeSolutionToFile = False
        #self.simulationSettings.timeIntegration.simulateInRealtime = True

        self.simulationSettings.timeIntegration.newton.useModifiedNewton = True

        self.SC.visualizationSettings.general.drawWorldBasis=True
        self.SC.visualizationSettings.general.graphicsUpdateInterval = 0.01 #50Hz

        self.simulationSettings.solutionSettings.solutionInformation = "Open AI gym"

        self.dynamicSolver = exudyn.MainSolverImplicitSecondOrder()
        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings)
        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings) #to initialize all data


    def integrateStep(self, force):
        #exudyn simulation part
        #index 2 solver
        self.mbs.SetLoadParameter(self.lControl, 'load', force)

        #progress integration time
        currentTime = self.simulationSettings.timeIntegration.endTime
        self.simulationSettings.timeIntegration.startTime = currentTime
        self.simulationSettings.timeIntegration.endTime = currentTime+self.stepUpdateTime

        # exu.SolveDynamic(self.mbs, self.simulationSettings, solverType=exu.DynamicSolverType.TrapezoidalIndex2,
        #                  updateInitialValues=True) #use final value as new initial values

        self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings)
        self.dynamicSolver.SolveSteps(self.mbs, self.simulationSettings)
        currentState = self.mbs.systemData.GetSystemState() #get current values
        self.mbs.systemData.SetSystemState(systemStateList=currentState,
                                        configuration = exu.ConfigurationType.Initial)
        self.mbs.systemData.SetODE2Coordinates_tt(coordinates = self.mbs.systemData.GetODE2Coordinates_tt(),
                                                configuration = exu.ConfigurationType.Initial)




    def step(self, action):
        err_msg = f"{action!r} ({type(action)}) invalid"
        assert self.action_space.contains(action), err_msg
        assert self.state is not None, "Call reset before using step method."
        x, x_dot, theta, theta_dot = self.state

        force = self.force_mag if action == 1 else -self.force_mag

        #++++++++++++++++++++++++++++++++++++++++++++++++++
        #++++++++++++++++++++++++++++++++++++++++++++++++++
        self.integrateStep(force)
        #+++++++++++++++++++++++++
        #compute some output:
        cartPosX = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates)[0]
        poleAngle = self.mbs.GetNodeOutput(self.nPole, variableType=exu.OutputVariableType.Coordinates)[2]
        cartPosX_t = self.mbs.GetNodeOutput(self.nCart, variableType=exu.OutputVariableType.Coordinates_t)[0]
        poleAngle_t = self.mbs.GetNodeOutput(self.nPole, variableType=exu.OutputVariableType.Coordinates_t)[2]

        #finally write updated state:
        self.state = (cartPosX, cartPosX_t, poleAngle, poleAngle_t)
        #++++++++++++++++++++++++++++++++++++++++++++++++++
        #++++++++++++++++++++++++++++++++++++++++++++++++++

        done = bool(
            cartPosX < -self.x_threshold
            or cartPosX > self.x_threshold
            or poleAngle < -self.theta_threshold_radians
            or poleAngle > self.theta_threshold_radians
        )

        if not done:
            reward = 1.0
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1.0
        else:
            if self.steps_beyond_done == 0:
                logger.warn(
                    "You are calling 'step()' even though this "
                    "environment has already returned done = True. You "
                    "should always call 'reset()' once you receive 'done = "
                    "True' -- any further steps are undefined behavior."
                )
            self.steps_beyond_done += 1
            reward = 0.0

        return np.array(self.state, dtype=np.float32), reward, done, {}


    def reset(
        self,
        *,
        seed: Optional[int] = None,
        return_info: bool = False,
        options: Optional[dict] = None,
    ):
        #super().reset(seed=seed)
        #self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
        self.state = np.random.uniform(low=-0.05, high=0.05, size=(4,))
        self.steps_beyond_done = None


        #+++++++++++++++++++++++++++++++++++++++++++++
        #set initial values:
        #+++++++++++++++++++++++++++++++++++++++++++++
        #set specific initial state:
        (xCart, xCart_t, phiPole, phiPole_t) = self.state

        initialValues = np.zeros(6)
        initialValues_t = np.zeros(6)
        initialValues[0] = xCart
        initialValues[3+0] = xCart - 0.5*self.length * sin(phiPole)
        initialValues[3+1] = 0.5*self.length * (cos(phiPole)-1)
        initialValues[3+2] = phiPole

        initialValues_t[0] = xCart_t
        initialValues_t[3+0] = xCart_t - phiPole_t*0.5*self.length * cos(phiPole)
        initialValues_t[3+1] = -0.5*self.length * sin(phiPole)  * phiPole_t
        initialValues_t[3+2] = phiPole_t

        self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
        self.mbs.systemData.SetODE2Coordinates_t(initialValues_t, exu.ConfigurationType.Initial)

        self.simulationSettings.timeIntegration.endTime = 0
        #self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions
        self.dynamicSolver.InitializeSolver(self.mbs, self.simulationSettings) #needed to update initial conditions
        # self.dynamicSolver.InitializeSolverInitialConditions(self.mbs, self.simulationSettings) #needed to update initial conditions

        if not return_info:
            return np.array(self.state, dtype=np.float32)
        else:
            return np.array(self.state, dtype=np.float32), {}

    def render(self, mode="human"):
        if self.rendererRunning==None and self.useRenderer:
            exu.StartRenderer()
            self.rendererRunning = True

    def close(self):
        self.dynamicSolver.FinalizeSolver(self.mbs, self.simulationSettings)
        if self.rendererRunning==True:
            # SC.WaitForRenderEngineStopFlag()
            exu.StopRenderer() #safely close rendering window!



# #+++++++++++++++++++++++++++++++++++++++++++++
# #reset:
# self.mbs.systemData.SetODE2Coordinates(initialValues, exu.ConfigurationType.Initial)
# self.mbs.systemData.SetODE2Coordinates_t(initialValues, exu.ConfigurationType.Initial)
# self.mbs.systemData.SetODE2Coordinates_tt(initialValues, exu.ConfigurationType.Initial)