Using the Environment API

This document serves as a guide in how to implement and use the AbstractEnvironment API. You can find full documentation in the documentation section. Some of these details are out of date but will be updated after the API stabilizes.

Implementing a new environment

We are going to implement the Mountain Car in this document to get used to how we take advantage of the API, and some of the functionality we get by implementing the full API!

I often create a module which contains several environment constants (which are consistent across all versions of this environment). You can also accomplish this by creating a few functions for the type to inline these values, and many other ways. You want to make sure to declare these global variables constant to get nice compiler optimizations for global scope variables. There are also other ways of handling this such as: creating inline functions return the values you care about, others...

module MountainCarConst
const vel_limit = (-0.07, 0.07)
const pos_limit = (-1.2, 0.5)
const pos_initial_range = (-0.6, 0.4)

const Reverse=1
const Neutral=2
const Accelerate=3
end

We create the initial MountainCar struct which is a subtype of AbstractEnvironment. There are several functions we must implement if we want to take advantage of the MinimalRLCore API.

MinimalRLCore.reset!
MinimalRLCore.environment_step!

mutable struct MountainCar <: MinimalRLCore.AbstractEnvironment
    pos::Float64
    vel::Float64
    actions::AbstractSet
    normalized::Bool
    function MountainCar(pos=0.0, vel=0.0, normalized::Bool=false)
        mcc = MountainCarConst
        @boundscheck (pos >= mcc.pos_limit[1] && pos <= mcc.pos_limit[2])
        @boundscheck (vel >= mcc.vel_limit[1] && vel <= mcc.vel_limit[2])
        new(pos, vel, Set([mcc.Reverse, mcc.Neutral, mcc.Accelerate]), normalized)
    end
end

MinimalRLCore.get_actions(env::MountainCar) = env.actions
valid_action(env::MountainCar, action) = action in env.actions

function MinimalRLCore.reset!(env::MountainCar, rng::AbstractRNG; kwargs...)
    env.pos = (rand(rng)*(MountainCarConst.pos_initial_range[2]
                          - MountainCarConst.pos_initial_range[1])
               + MountainCarConst.pos_initial_range[1])
    env.vel = 0.0
end

function MinimalRLCore.reset!(env::MountainCar,
                        start_state::T;
                        kwargs...) where {T<:AbstractArray}
    if env.normalized
        env.pos = start_state[1]
        env.vel = start_state[2]
    else
        pos_limit = MountainCarConst.pos_limit
        vel_limit = MountainCarConst.vel_limit
        env.pos = (start_state[1]*(pos_limit[2] - pos_limit[1])) + pos_limit[1]
        env.vel = (start_state[2]*(vel_limit[2] - vel_limit[1])) + vel_limit[1]
    end
end

function MinimalRLCore.environment_step!(env::MountainCar,
                                   action,
                                   rng; kwargs...)
    
    @boundscheck valid_action(env, action)
    env.vel =
        clamp(env.vel + (action - 2)*0.001 - 0.0025*cos(3*env.pos),
              MountainCarConst.vel_limit...)
    env.pos = clamp(env.pos + env.vel,
                    MountainCarConst.pos_limit...)
end

function MinimalRLCore.get_reward(env::MountainCar) # -> determines if the agent_state is terminal
    if env.pos >= MountainCarConst.pos_limit[2]
        return 0
    end
    return -1
end


function MinimalRLCore.is_terminal(env::MountainCar) # -> determines if the agent_state is terminal
    return env.pos >= MountainCarConst.pos_limit[2]
end


function MinimalRLCore.get_state(env::MountainCar)
    if env.normalized
        return get_normalized_state(env)
    else
        return [env.pos, env.vel]
    end
end


function get_normalized_state(env::MountainCar)
    pos_limit = MountainCarConst.pos_limit
    vel_limit = MountainCarConst.vel_limit
    return [(env.pos - pos_limit[1])/(pos_limit[2] - pos_limit[1]),
            (env.vel - vel_limit[1])/(vel_limit[2] - vel_limit[1])]
end