Pass-through Wrappers¶

Stateless wrappers that transform observations, actions, or rewards without carrying any state between steps.

`envrax.wrappers.jit_wrapper.JitWrapper` ¶

Bases: Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]

Wrap a JaxEnv so that reset and step are compiled with jax.jit on construction.

Parameters:

Name	Type	Description	Default
`env`	`JaxEnv`	Environment to wrap.	required
`cache_dir`	`Path \| str \| None`	Directory for the persistent XLA compilation cache. Defaults to `~/.cache/envrax/xla_cache`. Pass `None` to disable.	required
`pre_warm`	`bool`	Run a dummy `reset` + `step` immediately to trigger XLA compilation. Set to `False` to defer compilation to the first real call or an explicit `compile()` call. Default is `True`.	required

Source code in envrax/wrappers/jit_wrapper.py

Python
class JitWrapper(Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]):
    """
    Wrap a `JaxEnv` so that `reset` and `step` are compiled with
    `jax.jit` on construction.

    Parameters
    ----------
    env : JaxEnv
        Environment to wrap.
    cache_dir : Path | str | None (optional)
        Directory for the persistent XLA compilation cache.
        Defaults to `~/.cache/envrax/xla_cache`. Pass `None` to disable.
    pre_warm : bool (optional)
        Run a dummy `reset` + `step` immediately to trigger XLA compilation.
        Set to `False` to defer compilation to the first real call or an
        explicit `compile()` call. Default is `True`.
    """

    def __init__(
        self,
        env: JaxEnv[ObsSpaceT, ActSpaceT, StateT, ConfigT],
        cache_dir: pathlib.Path | str | None = DEFAULT_CACHE_DIR,
        *,
        pre_warm: bool = True,
    ) -> None:
        super().__init__(env)
        setup_cache(cache_dir)

        self._jit_reset = jax.jit(env.reset)
        self._jit_step = jax.jit(env.step)

        if pre_warm:
            self.compile()

    def compile(self) -> None:
        """
        Trigger XLA compilation by running a dummy `reset` + `step`.

        Safe to call multiple times — subsequent calls are near-instant
        because JAX caches the compiled kernels in memory.
        """
        _key = jax.random.key(0)
        _, _state = self._jit_reset(_key)
        self._jit_step(_state, self._env.action_space.sample(_key))

    def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
        return self._jit_reset(rng)

    def step(
        self,
        state: StateT,
        action: chex.Array,
    ) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
        return self._jit_step(state, action)

`compile()` ¶

Trigger XLA compilation by running a dummy reset + step.

Safe to call multiple times — subsequent calls are near-instant because JAX caches the compiled kernels in memory.

Source code in envrax/wrappers/jit_wrapper.py

Python
def compile(self) -> None:
    """
    Trigger XLA compilation by running a dummy `reset` + `step`.

    Safe to call multiple times — subsequent calls are near-instant
    because JAX caches the compiled kernels in memory.
    """
    _key = jax.random.key(0)
    _, _state = self._jit_reset(_key)
    self._jit_step(_state, self._env.action_space.sample(_key))

`envrax.wrappers.clip_reward.ClipReward` ¶

Bases: Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]

Clip rewards to the sign of the reward: {−1, 0, +1}.

Parameters:

Name	Type	Description	Default
`env`	`JaxEnv`	Inner environment to wrap.	required

Source code in envrax/wrappers/clip_reward.py

Python
class ClipReward(Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]):
    """
    Clip rewards to the sign of the reward: `{−1, 0, +1}`.

    Parameters
    ----------
    env : JaxEnv
        Inner environment to wrap.
    """

    def __init__(self, env: JaxEnv[ObsSpaceT, ActSpaceT, StateT, ConfigT]) -> None:
        super().__init__(env)

    def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
        return self._env.reset(rng)

    def step(
        self,
        state: StateT,
        action: chex.Array,
    ) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
        """
        Advance the environment by one step and clip the reward to `{−1, 0, +1}`.

        Parameters
        ----------
        state : StateT
            Current environment state
        action : chex.Array
            Action to take in the environment

        Returns
        -------
        obs  : chex.Array
            Observation from the inner step
        new_state : StateT
            Updated environment state
        reward  : chex.Array
            Reward clipped to sign: `{−1.0, 0.0, +1.0}`
        done  : chex.Array
            Terminal flag from the inner step
        info : Dict[str, Any]
            Info dict from the inner step
        """
        obs, new_state, reward, done, info = self._env.step(state, action)
        reward = jnp.sign(reward).astype(jnp.float32)
        return obs, new_state, reward, done, info

`step(state, action)` ¶

Advance the environment by one step and clip the reward to {−1, 0, +1}.

Parameters:

Name	Type	Description	Default
`state`	`StateT`	Current environment state	required
`action`	`Array`	Action to take in the environment	required

Returns:

Name	Type	Description
`obs`	`Array`	Observation from the inner step
`new_state`	`StateT`	Updated environment state
`reward`	`Array`	Reward clipped to sign: `{−1.0, 0.0, +1.0}`
`done`	`Array`	Terminal flag from the inner step
`info`	`Dict[str, Any]`	Info dict from the inner step

Source code in envrax/wrappers/clip_reward.py

Python
def step(
    self,
    state: StateT,
    action: chex.Array,
) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
    """
    Advance the environment by one step and clip the reward to `{−1, 0, +1}`.

    Parameters
    ----------
    state : StateT
        Current environment state
    action : chex.Array
        Action to take in the environment

    Returns
    -------
    obs  : chex.Array
        Observation from the inner step
    new_state : StateT
        Updated environment state
    reward  : chex.Array
        Reward clipped to sign: `{−1.0, 0.0, +1.0}`
    done  : chex.Array
        Terminal flag from the inner step
    info : Dict[str, Any]
        Info dict from the inner step
    """
    obs, new_state, reward, done, info = self._env.step(state, action)
    reward = jnp.sign(reward).astype(jnp.float32)
    return obs, new_state, reward, done, info

`envrax.wrappers.discount.EpisodeDiscount` ¶

Bases: Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]

Convert the boolean done signal to a float32 episode discount.

The 4th return value of step() changes from bool to float32: 1.0 while the episode is running, 0.0 on termination.

Parameters:

Name	Type	Description	Default
`env`	`JaxEnv`	Inner environment to wrap.	required

Source code in envrax/wrappers/discount.py

Python
class EpisodeDiscount(Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]):
    """
    Convert the boolean `done` signal to a float32 episode discount.

    The 4th return value of `step()` changes from `bool` to `float32`:
    `1.0` while the episode is running, `0.0` on termination.

    Parameters
    ----------
    env : JaxEnv
        Inner environment to wrap.
    """

    def __init__(self, env: JaxEnv[ObsSpaceT, ActSpaceT, StateT, ConfigT]) -> None:
        super().__init__(env)

    def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
        return self._env.reset(rng)

    def step(
        self,
        state: StateT,
        action: chex.Array,
    ) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
        """
        Advance the environment and return a float32 discount instead of done.

        Parameters
        ----------
        state : StateT
            Current environment state
        action : chex.Array
            Action to take in the environment

        Returns
        -------
        obs  : chex.Array
            Observation from the inner step
        new_state : StateT
            Updated environment state
        reward  : chex.Array
            Reward from the inner step (unchanged)
        discount  : chex.Array
            `1.0` if the episode continues, `0.0` if it terminated
        info : Dict[str, Any]
            Info dict from the inner step
        """
        obs, new_state, reward, done, info = self._env.step(state, action)
        discount = jnp.where(done, jnp.float32(0.0), jnp.float32(1.0))
        return obs, new_state, reward, discount, info

`step(state, action)` ¶

Advance the environment and return a float32 discount instead of done.

Parameters:

Name	Type	Description	Default
`state`	`StateT`	Current environment state	required
`action`	`Array`	Action to take in the environment	required

Returns:

Name	Type	Description
`obs`	`Array`	Observation from the inner step
`new_state`	`StateT`	Updated environment state
`reward`	`Array`	Reward from the inner step (unchanged)
`discount`	`Array`	`1.0` if the episode continues, `0.0` if it terminated
`info`	`Dict[str, Any]`	Info dict from the inner step

Source code in envrax/wrappers/discount.py

Python
def step(
    self,
    state: StateT,
    action: chex.Array,
) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
    """
    Advance the environment and return a float32 discount instead of done.

    Parameters
    ----------
    state : StateT
        Current environment state
    action : chex.Array
        Action to take in the environment

    Returns
    -------
    obs  : chex.Array
        Observation from the inner step
    new_state : StateT
        Updated environment state
    reward  : chex.Array
        Reward from the inner step (unchanged)
    discount  : chex.Array
        `1.0` if the episode continues, `0.0` if it terminated
    info : Dict[str, Any]
        Info dict from the inner step
    """
    obs, new_state, reward, done, info = self._env.step(state, action)
    discount = jnp.where(done, jnp.float32(0.0), jnp.float32(1.0))
    return obs, new_state, reward, discount, info

`envrax.wrappers.expand_dims.ExpandDims` ¶

Bases: Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]

Add a trailing size-1 dimension to reward and done.

Transforms scalar outputs from step() so that reward and done have shape (..., 1) instead of (...).

Parameters:

Name	Type	Description	Default
`env`	`JaxEnv`	Inner environment to wrap.	required

Source code in envrax/wrappers/expand_dims.py

Python
class ExpandDims(Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]):
    """
    Add a trailing size-1 dimension to `reward` and `done`.

    Transforms scalar outputs from `step()` so that `reward` and `done`
    have shape `(..., 1)` instead of `(...)`.

    Parameters
    ----------
    env : JaxEnv
        Inner environment to wrap.
    """

    def __init__(self, env: JaxEnv[ObsSpaceT, ActSpaceT, StateT, ConfigT]) -> None:
        super().__init__(env)

    def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
        return self._env.reset(rng)

    def step(
        self,
        state: StateT,
        action: chex.Array,
    ) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
        """
        Advance the environment and expand `reward` and `done`.

        Parameters
        ----------
        state : StateT
            Current environment state
        action : chex.Array
            Action to take in the environment

        Returns
        -------
        obs  : chex.Array
            Observation after the step (unchanged)
        new_state : StateT
            Updated environment state
        reward  : chex.Array
            Reward with a trailing size-1 dimension
        done  : chex.Array
            Terminal flag with a trailing size-1 dimension
        info : Dict[str, Any]
            Auxiliary info dict (unchanged)
        """
        obs, new_state, reward, done, info = self._env.step(state, action)
        return (
            obs,
            new_state,
            jnp.expand_dims(reward, -1),
            jnp.expand_dims(done, -1),
            info,
        )

`step(state, action)` ¶

Advance the environment and expand reward and done.

Parameters:

Name	Type	Description	Default
`state`	`StateT`	Current environment state	required
`action`	`Array`	Action to take in the environment	required

Returns:

Name	Type	Description
`obs`	`Array`	Observation after the step (unchanged)
`new_state`	`StateT`	Updated environment state
`reward`	`Array`	Reward with a trailing size-1 dimension
`done`	`Array`	Terminal flag with a trailing size-1 dimension
`info`	`Dict[str, Any]`	Auxiliary info dict (unchanged)

Source code in envrax/wrappers/expand_dims.py

Python
def step(
    self,
    state: StateT,
    action: chex.Array,
) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
    """
    Advance the environment and expand `reward` and `done`.

    Parameters
    ----------
    state : StateT
        Current environment state
    action : chex.Array
        Action to take in the environment

    Returns
    -------
    obs  : chex.Array
        Observation after the step (unchanged)
    new_state : StateT
        Updated environment state
    reward  : chex.Array
        Reward with a trailing size-1 dimension
    done  : chex.Array
        Terminal flag with a trailing size-1 dimension
    info : Dict[str, Any]
        Auxiliary info dict (unchanged)
    """
    obs, new_state, reward, done, info = self._env.step(state, action)
    return (
        obs,
        new_state,
        jnp.expand_dims(reward, -1),
        jnp.expand_dims(done, -1),
        info,
    )

`envrax.wrappers.grayscale.GrayscaleObservation` ¶

Bases: Wrapper[Box, ActSpaceT, StateT, ConfigT]

Convert RGB observations to grayscale using the NTSC luminance formula.

Wraps any environment whose reset / step return uint8[H, W, 3] observations and converts them to uint8[H, W].

Parameters:

Name	Type	Description	Default
`env`	`JaxEnv`	Inner environment to wrap. Must have a `Box` observation space of shape `(H, W, 3)` and dtype `uint8`.	required

Source code in envrax/wrappers/grayscale.py

Python
class GrayscaleObservation(Wrapper[Box, ActSpaceT, StateT, ConfigT]):
    """
    Convert RGB observations to grayscale using the NTSC luminance formula.

    Wraps any environment whose `reset` / `step` return `uint8[H, W, 3]`
    observations and converts them to `uint8[H, W]`.

    Parameters
    ----------
    env : JaxEnv
        Inner environment to wrap. Must have a `Box` observation space
        of shape `(H, W, 3)` and dtype `uint8`.
    """

    def __init__(self, env: JaxEnv[Box, ActSpaceT, StateT, ConfigT]) -> None:
        super().__init__(env)
        require_box(
            env,
            type(self).__name__,
            rank=3,
            last_dim=3,
            dtype=jnp.uint8,
        )

    def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
        """
        Reset the inner environment and convert the observation to grayscale.

        Parameters
        ----------
        rng : chex.PRNGKey
            JAX PRNG key

        Returns
        -------
        obs  : chex.Array
            Grayscale observation
        state : StateT
            Inner environment state
        """
        obs, state = self._env.reset(rng)
        return to_gray(obs), state

    def step(
        self,
        state: StateT,
        action: chex.Array,
    ) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
        """
        Step the inner environment and convert the observation to grayscale.

        Parameters
        ----------
        state : StateT
            Current environment state
        action : chex.Array
            Action to take in the environment

        Returns
        -------
        obs  : chex.Array
            Grayscale observation
        new_state : StateT
            Updated environment state
        reward  : chex.Array
            Reward from the inner step
        done  : chex.Array
            Terminal flag from the inner step
        info : Dict[str, Any]
            Info dict from the inner step
        """
        obs, new_state, reward, done, info = self._env.step(state, action)
        return to_gray(obs), new_state, reward, done, info

    @property
    def observation_space(self) -> Box:
        inner = self._env.observation_space
        h, w = inner.shape[0], inner.shape[1]
        return Box(low=inner.low, high=inner.high, shape=(h, w), dtype=inner.dtype)

`reset(rng)` ¶

Reset the inner environment and convert the observation to grayscale.

Parameters:

Name	Type	Description	Default
`rng`	`PRNGKey`	JAX PRNG key	required

Returns:

Name	Type	Description
`obs`	`Array`	Grayscale observation
`state`	`StateT`	Inner environment state

Source code in envrax/wrappers/grayscale.py

Python
def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
    """
    Reset the inner environment and convert the observation to grayscale.

    Parameters
    ----------
    rng : chex.PRNGKey
        JAX PRNG key

    Returns
    -------
    obs  : chex.Array
        Grayscale observation
    state : StateT
        Inner environment state
    """
    obs, state = self._env.reset(rng)
    return to_gray(obs), state

`step(state, action)` ¶

Step the inner environment and convert the observation to grayscale.

Parameters:

Name	Type	Description	Default
`state`	`StateT`	Current environment state	required
`action`	`Array`	Action to take in the environment	required

Returns:

Name	Type	Description
`obs`	`Array`	Grayscale observation
`new_state`	`StateT`	Updated environment state
`reward`	`Array`	Reward from the inner step
`done`	`Array`	Terminal flag from the inner step
`info`	`Dict[str, Any]`	Info dict from the inner step

Source code in envrax/wrappers/grayscale.py

Python
def step(
    self,
    state: StateT,
    action: chex.Array,
) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
    """
    Step the inner environment and convert the observation to grayscale.

    Parameters
    ----------
    state : StateT
        Current environment state
    action : chex.Array
        Action to take in the environment

    Returns
    -------
    obs  : chex.Array
        Grayscale observation
    new_state : StateT
        Updated environment state
    reward  : chex.Array
        Reward from the inner step
    done  : chex.Array
        Terminal flag from the inner step
    info : Dict[str, Any]
        Info dict from the inner step
    """
    obs, new_state, reward, done, info = self._env.step(state, action)
    return to_gray(obs), new_state, reward, done, info

`envrax.wrappers.normalize_obs.NormalizeObservation` ¶

Bases: Wrapper[Box, ActSpaceT, StateT, ConfigT]

Normalises pixel observations from uint8 [0, 255] to float32 [0, 1].

Divides observations by 255.0 and casts to float32.

Parameters:

Name	Type	Description	Default
`env`	`JaxEnv`	Environment to wrap. Must have a `Box` observation space with dtype `uint8`.	required

Source code in envrax/wrappers/normalize_obs.py

Python
class NormalizeObservation(Wrapper[Box, ActSpaceT, StateT, ConfigT]):
    """
    Normalises pixel observations from `uint8 [0, 255]` to `float32 [0, 1]`.

    Divides observations by 255.0 and casts to float32.

    Parameters
    ----------
    env : JaxEnv
        Environment to wrap. Must have a `Box` observation space with
        dtype `uint8`.
    """

    def __init__(self, env: JaxEnv[Box, ActSpaceT, StateT, ConfigT]) -> None:
        super().__init__(env)
        require_box(env, type(self).__name__, dtype=jnp.uint8)

    def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
        """
        Reset and return a normalised initial observation.

        Parameters
        ----------
        rng : chex.PRNGKey
            JAX PRNG key

        Returns
        -------
        obs  : chex.Array
            Normalised observation in `[0, 1]`
        state : StateT
            Inner environment state
        """
        obs, state = self._env.reset(rng)
        return obs.astype(jnp.float32) / jnp.float32(255.0), state

    def step(
        self,
        state: StateT,
        action: chex.Array,
    ) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
        """
        Step and return a normalised observation.

        Parameters
        ----------
        state : StateT
            Current environment state
        action : chex.Array
            Action to take in the environment

        Returns
        -------
        obs  : chex.Array
            Normalised observation in `[0, 1]`
        new_state : StateT
            Updated environment state
        reward  : chex.Array
            Step reward
        done  : chex.Array
            Terminal flag
        info : Dict[str, Any]
            Environment metadata
        """
        obs, new_state, reward, done, info = self._env.step(state, action)
        return (
            obs.astype(jnp.float32) / jnp.float32(255.0),
            new_state,
            reward,
            done,
            info,
        )

    @property
    def observation_space(self) -> Box:
        inner = self._env.observation_space
        return Box(low=0.0, high=1.0, shape=inner.shape, dtype=jnp.float32)

`reset(rng)` ¶

Reset and return a normalised initial observation.

Parameters:

Name	Type	Description	Default
`rng`	`PRNGKey`	JAX PRNG key	required

Returns:

Name	Type	Description
`obs`	`Array`	Normalised observation in `[0, 1]`
`state`	`StateT`	Inner environment state

Source code in envrax/wrappers/normalize_obs.py

Python
def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
    """
    Reset and return a normalised initial observation.

    Parameters
    ----------
    rng : chex.PRNGKey
        JAX PRNG key

    Returns
    -------
    obs  : chex.Array
        Normalised observation in `[0, 1]`
    state : StateT
        Inner environment state
    """
    obs, state = self._env.reset(rng)
    return obs.astype(jnp.float32) / jnp.float32(255.0), state

`step(state, action)` ¶

Step and return a normalised observation.

Parameters:

Name	Type	Description	Default
`state`	`StateT`	Current environment state	required
`action`	`Array`	Action to take in the environment	required

Returns:

Name	Type	Description
`obs`	`Array`	Normalised observation in `[0, 1]`
`new_state`	`StateT`	Updated environment state
`reward`	`Array`	Step reward
`done`	`Array`	Terminal flag
`info`	`Dict[str, Any]`	Environment metadata

Source code in envrax/wrappers/normalize_obs.py

Python
def step(
    self,
    state: StateT,
    action: chex.Array,
) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
    """
    Step and return a normalised observation.

    Parameters
    ----------
    state : StateT
        Current environment state
    action : chex.Array
        Action to take in the environment

    Returns
    -------
    obs  : chex.Array
        Normalised observation in `[0, 1]`
    new_state : StateT
        Updated environment state
    reward  : chex.Array
        Step reward
    done  : chex.Array
        Terminal flag
    info : Dict[str, Any]
        Environment metadata
    """
    obs, new_state, reward, done, info = self._env.step(state, action)
    return (
        obs.astype(jnp.float32) / jnp.float32(255.0),
        new_state,
        reward,
        done,
        info,
    )

`envrax.wrappers.resize.ResizeObservation` ¶

Bases: Wrapper[Box, ActSpaceT, StateT, ConfigT]

Resize observations to (h, w) using bilinear interpolation.

Handles both:

Grayscale — uint8[H, W] → uint8[h, w]
RGB — uint8[H, W, C] → uint8[h, w, C]

The channel dimension is preserved automatically; no pre-processing step is required. For DQN-style pipelines, apply GrayscaleObservation first so the output is uint8[h, w] before stacking.

Parameters:

Name	Type	Description	Default
`env`	`JaxEnv`	Inner environment returning `uint8[H, W]` or `uint8[H, W, C]` observations.	required
`h`	`int`	Output height in pixels. Default is `84`.	required
`w`	`int`	Output width in pixels. Default is `84`.	required

Source code in envrax/wrappers/resize.py

Python
class ResizeObservation(Wrapper[Box, ActSpaceT, StateT, ConfigT]):
    """
    Resize observations to `(h, w)` using bilinear interpolation.

    Handles both:

    - **Grayscale** — `uint8[H, W]` → `uint8[h, w]`
    - **RGB** — `uint8[H, W, C]` → `uint8[h, w, C]`

    The channel dimension is preserved automatically; no pre-processing step
    is required.  For DQN-style pipelines, apply `GrayscaleObservation` first
    so the output is `uint8[h, w]` before stacking.

    Parameters
    ----------
    env : JaxEnv
        Inner environment returning `uint8[H, W]` or `uint8[H, W, C]` observations.
    h : int (optional)
        Output height in pixels. Default is `84`.
    w : int (optional)
        Output width in pixels. Default is `84`.
    """

    def __init__(
        self,
        env: JaxEnv[Box, ActSpaceT, StateT, ConfigT],
        *,
        h: int = 84,
        w: int = 84,
    ) -> None:
        super().__init__(env)
        require_box(env, type(self).__name__, rank=(2, 3), dtype=jnp.uint8)
        self._h = h
        self._w = w

    def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
        """
        Reset the inner environment and resize the observation.

        Parameters
        ----------
        rng : chex.PRNGKey
            JAX PRNG key

        Returns
        -------
        obs  : chex.Array
            Resized observation
        state : StateT
            Inner environment state
        """
        obs, state = self._env.reset(rng)
        return resize(obs, self._h, self._w), state

    def step(
        self,
        state: StateT,
        action: chex.Array,
    ) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
        """
        Step the inner environment and resize the observation.

        Parameters
        ----------
        state : StateT
            Current environment state
        action : chex.Array
            Action to take in the environment

        Returns
        -------
        obs  : chex.Array
            Resized observation
        new_state : StateT
            Updated environment state
        reward  : chex.Array
            Reward from the inner step
        done  : chex.Array
            Terminal flag from the inner step
        info : Dict[str, Any]
            Info dict from the inner step
        """
        obs, new_state, reward, done, info = self._env.step(state, action)
        return resize(obs, self._h, self._w), new_state, reward, done, info

    @property
    def observation_space(self) -> Box:
        inner = self._env.observation_space

        if len(inner.shape) == 3:
            shape = (self._h, self._w, inner.shape[-1])
        else:
            shape = (self._h, self._w)

        return Box(low=inner.low, high=inner.high, shape=shape, dtype=inner.dtype)

`reset(rng)` ¶

Reset the inner environment and resize the observation.

Parameters:

Name	Type	Description	Default
`rng`	`PRNGKey`	JAX PRNG key	required

Returns:

Name	Type	Description
`obs`	`Array`	Resized observation
`state`	`StateT`	Inner environment state

Source code in envrax/wrappers/resize.py

Python
def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
    """
    Reset the inner environment and resize the observation.

    Parameters
    ----------
    rng : chex.PRNGKey
        JAX PRNG key

    Returns
    -------
    obs  : chex.Array
        Resized observation
    state : StateT
        Inner environment state
    """
    obs, state = self._env.reset(rng)
    return resize(obs, self._h, self._w), state

`step(state, action)` ¶

Step the inner environment and resize the observation.

Parameters:

Name	Type	Description	Default
`state`	`StateT`	Current environment state	required
`action`	`Array`	Action to take in the environment	required

Returns:

Name	Type	Description
`obs`	`Array`	Resized observation
`new_state`	`StateT`	Updated environment state
`reward`	`Array`	Reward from the inner step
`done`	`Array`	Terminal flag from the inner step
`info`	`Dict[str, Any]`	Info dict from the inner step

Source code in envrax/wrappers/resize.py

Python
def step(
    self,
    state: StateT,
    action: chex.Array,
) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
    """
    Step the inner environment and resize the observation.

    Parameters
    ----------
    state : StateT
        Current environment state
    action : chex.Array
        Action to take in the environment

    Returns
    -------
    obs  : chex.Array
        Resized observation
    new_state : StateT
        Updated environment state
    reward  : chex.Array
        Reward from the inner step
    done  : chex.Array
        Terminal flag from the inner step
    info : Dict[str, Any]
        Info dict from the inner step
    """
    obs, new_state, reward, done, info = self._env.step(state, action)
    return resize(obs, self._h, self._w), new_state, reward, done, info

`envrax.wrappers.record_video.RecordVideo` ¶

Bases: Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]

Save episode frames to MP4 based on configurable triggers.

Not JIT/vmap-compatible. Intended for evaluation, logging, and training visualisation.

Three optional triggers control when recording is active. They are OR'd together — if any trigger returns True, that episode is recorded. When no triggers are provided, every episode is recorded.

Each completed recording is written to <output_dir>/episode_<NNNN>.mp4 via imageio.

Requires imageio with the ffmpeg plugin (pip install "imageio[ffmpeg]").

Parameters:

Name	Type	Description	Default
`env`	`JaxEnv`	Inner environment to wrap that has a `render()` method.	required
`output_dir`	`str \| Path`	Directory where MP4 files are saved. Created automatically if it does not exist. Default is `runs/recordings`	required
`fps`	`int`	Frames per second for the saved video. Default is `30`.	required
`episode_trigger`	`Callable[[int], bool]`	Called with the episode count at each `reset()`. If `True`, record this episode. Useful for "record every Nth episode". Default is `None`	required
`step_trigger`	`Callable[[int], bool]`	Called with the global step count at each `step()`. If `True`, start recording from this step until the episode ends. Default is `None`	required
`recording_trigger`	`Callable[[], bool]`	Zero-arg callable checked at each `reset()`. If `True`, record this episode. Useful for meta-learning where the framework controls when to record via an external flag. Default is `None`	required

Raises:

Name	Type	Description
`render_missing`	`TypeError`	If the unwrapped environment does not implement `render()`.

Source code in envrax/wrappers/record_video.py

Python
class RecordVideo(Wrapper[ObsSpaceT, ActSpaceT, StateT, ConfigT]):
    """
    Save episode frames to MP4 based on configurable triggers.

    **Not JIT/vmap-compatible.** Intended for evaluation, logging, and
    training visualisation.

    Three optional triggers control when recording is active. They are
    OR'd together — if any trigger returns `True`, that episode is
    recorded. When no triggers are provided, every episode is recorded.

    Each completed recording is written to
    `<output_dir>/episode_<NNNN>.mp4` via `imageio`.

    Requires `imageio` with the `ffmpeg` plugin
    (`pip install "imageio[ffmpeg]"`).

    Parameters
    ----------
    env : JaxEnv
        Inner environment to wrap that has a `render()` method.
    output_dir : str | Path (optional)
        Directory where MP4 files are saved. Created automatically if
        it does not exist. Default is `runs/recordings`
    fps : int (optional)
        Frames per second for the saved video. Default is `30`.
    episode_trigger : Callable[[int], bool] (optional)
        Called with the episode count at each `reset()`. If `True`,
        record this episode. Useful for "record every Nth episode".
        Default is `None`
    step_trigger : Callable[[int], bool] (optional)
        Called with the global step count at each `step()`. If `True`,
        start recording from this step until the episode ends.
        Default is `None`
    recording_trigger : Callable[[], bool] (optional)
        Zero-arg callable checked at each `reset()`. If `True`, record
        this episode. Useful for meta-learning where the framework
        controls when to record via an external flag.
        Default is `None`

    Raises
    ------
    render_missing : TypeError
        If the unwrapped environment does not implement `render()`.
    """

    def __init__(
        self,
        env: JaxEnv[ObsSpaceT, ActSpaceT, StateT, ConfigT],
        *,
        output_dir: str | Path = "runs/recordings",
        fps: int = 30,
        episode_trigger: Callable[[int], bool] | None = None,
        step_trigger: Callable[[int], bool] | None = None,
        recording_trigger: Callable[[], bool] | None = None,
    ) -> None:
        super().__init__(env)

        # Fail fast if the env doesn't support rendering
        if type(self.unwrapped).render is JaxEnv.render:
            raise TypeError(
                f"RecordVideo requires an environment that implements render(). "
                f"{type(self.unwrapped).__name__} does not."
            )

        self.output_dir = Path(output_dir)
        self.fps = fps
        self.output_dir.mkdir(parents=True, exist_ok=True)

        self._episode_trigger = episode_trigger
        self._step_trigger = step_trigger
        self._recording_trigger = recording_trigger
        self._has_triggers = any(
            t is not None for t in (episode_trigger, step_trigger, recording_trigger)
        )

        self._frames: List[np.ndarray] = []
        self._episode_id: int = 0
        self._global_step: int = 0
        self._recording: bool = False

    @property
    def recording(self) -> bool:
        """Whether the current episode is being recorded."""
        return self._recording

    def _should_record_episode(self) -> bool:
        """Check episode-level triggers (OR logic)."""
        if not self._has_triggers:
            return True  # no triggers → record everything

        if self._episode_trigger is not None and self._episode_trigger(
            self._episode_id
        ):
            return True

        if self._recording_trigger is not None and self._recording_trigger():
            return True

        return False

    def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
        """
        Reset the environment and optionally begin a new recording.

        Parameters
        ----------
        rng : chex.PRNGKey
            JAX PRNG key

        Returns
        -------
        obs : chex.Array
            First observation
        state : StateT
            Initial environment state
        """
        _ensure_not_traced(rng)
        obs, state = self._env.reset(rng)

        self._recording = self._should_record_episode()

        if self._recording:
            self._frames = [np.asarray(self._env.render(state))]
        else:
            self._frames = []

        return obs, state

    def step(
        self,
        state: StateT,
        action: chex.Array,
    ) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
        """
        Advance the environment by one step and record the frame if active.

        If a `step_trigger` is provided and fires, recording starts
        mid-episode and continues until the episode ends.

        Flushes accumulated frames to an MP4 file when `done` is `True`.

        Parameters
        ----------
        state : StateT
            Current environment state
        action : chex.Array
            Action to take in the environment

        Returns
        -------
        obs : chex.Array
            Observation after the step
        new_state : StateT
            Updated environment state
        reward : chex.Array
            Reward for this step
        done : chex.Array
            `True` when the episode has ended
        info : Dict[str, Any]
            Pass-through info dict from the inner environment
        """
        _ensure_not_traced(action)
        obs, new_state, reward, done, info = self._env.step(state, action)
        self._global_step += 1

        # Mid-episode trigger: start recording if step_trigger fires
        if (
            not self._recording
            and self._step_trigger is not None
            and self._step_trigger(self._global_step)
        ):
            self._recording = True

        if self._recording:
            self._frames.append(np.asarray(self._env.render(new_state)))

        if bool(done) and self._recording:
            self._flush()
            self._recording = False

        if bool(done):
            self._episode_id += 1

        return obs, new_state, reward, done, info

    def _flush(self) -> None:
        """Write accumulated frames to an MP4 file."""
        if not self._frames:
            return

        try:
            import imageio
        except ImportError as exc:
            raise ImportError(
                "imageio is required for video recording. "
                'Install it with: pip install "imageio[ffmpeg]"'
            ) from exc

        path = self.output_dir / f"episode_{self._episode_id:04d}.mp4"
        frames: list[Any] = self._frames
        imageio.mimwrite(str(path), frames, fps=self.fps)
        self._frames = []

`recording` `property` ¶

Whether the current episode is being recorded.

`reset(rng)` ¶

Reset the environment and optionally begin a new recording.

Parameters:

Name	Type	Description	Default
`rng`	`PRNGKey`	JAX PRNG key	required

Returns:

Name	Type	Description
`obs`	`Array`	First observation
`state`	`StateT`	Initial environment state

Source code in envrax/wrappers/record_video.py

Python
def reset(self, rng: chex.PRNGKey) -> Tuple[chex.Array, StateT]:
    """
    Reset the environment and optionally begin a new recording.

    Parameters
    ----------
    rng : chex.PRNGKey
        JAX PRNG key

    Returns
    -------
    obs : chex.Array
        First observation
    state : StateT
        Initial environment state
    """
    _ensure_not_traced(rng)
    obs, state = self._env.reset(rng)

    self._recording = self._should_record_episode()

    if self._recording:
        self._frames = [np.asarray(self._env.render(state))]
    else:
        self._frames = []

    return obs, state

`step(state, action)` ¶

Advance the environment by one step and record the frame if active.

If a step_trigger is provided and fires, recording starts mid-episode and continues until the episode ends.

Flushes accumulated frames to an MP4 file when done is True.

Parameters:

Name	Type	Description	Default
`state`	`StateT`	Current environment state	required
`action`	`Array`	Action to take in the environment	required

Returns:

Name	Type	Description
`obs`	`Array`	Observation after the step
`new_state`	`StateT`	Updated environment state
`reward`	`Array`	Reward for this step
`done`	`Array`	`True` when the episode has ended
`info`	`Dict[str, Any]`	Pass-through info dict from the inner environment

Source code in envrax/wrappers/record_video.py

Python
def step(
    self,
    state: StateT,
    action: chex.Array,
) -> Tuple[chex.Array, StateT, chex.Array, chex.Array, Dict[str, Any]]:
    """
    Advance the environment by one step and record the frame if active.

    If a `step_trigger` is provided and fires, recording starts
    mid-episode and continues until the episode ends.

    Flushes accumulated frames to an MP4 file when `done` is `True`.

    Parameters
    ----------
    state : StateT
        Current environment state
    action : chex.Array
        Action to take in the environment

    Returns
    -------
    obs : chex.Array
        Observation after the step
    new_state : StateT
        Updated environment state
    reward : chex.Array
        Reward for this step
    done : chex.Array
        `True` when the episode has ended
    info : Dict[str, Any]
        Pass-through info dict from the inner environment
    """
    _ensure_not_traced(action)
    obs, new_state, reward, done, info = self._env.step(state, action)
    self._global_step += 1

    # Mid-episode trigger: start recording if step_trigger fires
    if (
        not self._recording
        and self._step_trigger is not None
        and self._step_trigger(self._global_step)
    ):
        self._recording = True

    if self._recording:
        self._frames.append(np.asarray(self._env.render(new_state)))

    if bool(done) and self._recording:
        self._flush()
        self._recording = False

    if bool(done):
        self._episode_id += 1

    return obs, new_state, reward, done, info

Pass-through Wrappers¶

envrax.wrappers.jit_wrapper.JitWrapper ¶

compile() ¶

envrax.wrappers.clip_reward.ClipReward ¶

step(state, action) ¶

envrax.wrappers.discount.EpisodeDiscount ¶

step(state, action) ¶

envrax.wrappers.expand_dims.ExpandDims ¶

step(state, action) ¶

envrax.wrappers.grayscale.GrayscaleObservation ¶

reset(rng) ¶

step(state, action) ¶

envrax.wrappers.normalize_obs.NormalizeObservation ¶

reset(rng) ¶

step(state, action) ¶

envrax.wrappers.resize.ResizeObservation ¶

reset(rng) ¶

step(state, action) ¶

envrax.wrappers.record_video.RecordVideo ¶

recording property ¶

reset(rng) ¶

step(state, action) ¶

`envrax.wrappers.jit_wrapper.JitWrapper` ¶

`compile()` ¶

`envrax.wrappers.clip_reward.ClipReward` ¶

`step(state, action)` ¶

`envrax.wrappers.discount.EpisodeDiscount` ¶

`step(state, action)` ¶

`envrax.wrappers.expand_dims.ExpandDims` ¶

`step(state, action)` ¶

`envrax.wrappers.grayscale.GrayscaleObservation` ¶

`reset(rng)` ¶

`step(state, action)` ¶

`envrax.wrappers.normalize_obs.NormalizeObservation` ¶

`reset(rng)` ¶

`step(state, action)` ¶

`envrax.wrappers.resize.ResizeObservation` ¶

`reset(rng)` ¶

`step(state, action)` ¶

`envrax.wrappers.record_video.RecordVideo` ¶

`recording` `property` ¶

`reset(rng)` ¶

`step(state, action)` ¶