Source code for objectrl.nets.actor_nets

# -----------------------------------------------------------------------------------
# ObjectRL: An Object-Oriented Reinforcement Learning Codebase
# Copyright (C) 2025 ADIN Lab

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------------

from typing import Any, Literal

import torch
from torch import nn as nn

from objectrl.nets.layers.heads import SquashedGaussianHead
from objectrl.utils.net_utils import MLP



[docs]
class ActorNetProbabilistic(nn.Module):

[docs]
    def __init__(
        self,
        dim_state: int,
        dim_act: int,
        n_heads: int = 1,
        depth: int = 3,
        width: int = 256,
        act: Literal["relu", "crelu"] = "relu",
        has_norm: bool = False,
        upper_clamp: float = -2.0,
    ) -> None:
        """
        Probabilistic Actor Network that outputs a Gaussian distribution
        over actions using a squashed Gaussian head.

        Args:
        dim_state (int): Observation space dimension (assumed 1D tuple).
        dim_act (int): Action space dimension (assumed 1D tuple).
        n_heads (int): Number of policy heads (useful for ensemble methods).
        depth (int): Number of hidden layers.
        width (int): Width of each hidden layer.
        act (str): Activation function to use.
        has_norm (bool): Whether to include normalization layers.
        upper_clamp (float): Upper clamp value for log-variance in Squashed Gaussian.
        """
        super().__init__()
        self.dim_act = dim_act
        self.n_heads = n_heads

        # Create the network architecture
        self.arch = MLP(dim_state, 2 * dim_act * n_heads, depth, width, act, has_norm)

        # Gaussian distribution head for action selection
        self.head = SquashedGaussianHead(self.dim_act, upper_clamp)



[docs]
    def forward(self, x: torch.Tensor, is_training: bool = True) -> dict[str, Any]:
        """
        Forward pass to generate a squashed Gaussian action distribution.

        Args:
            x (Tensor): Input observation tensor.
            is_training (bool): Whether to sample actions stochastically.
        """
        f = self.arch(x)
        if self.n_heads > 1:
            f = f.view(-1, self.n_heads, 2 * self.dim_act)
        return self.head(f, is_training)





[docs]
class ActorNet(nn.Module):

[docs]
    def __init__(
        self,
        dim_state: int,
        dim_act: int,
        n_heads: int = 1,
        depth: int = 3,
        width: int = 256,
        act: Literal["crelu", "relu"] = "relu",
        has_norm: bool = False,
    ) -> None:
        """
        Deterministic Actor Network that outputs continuous actions.

        Args:
        dim_state (int): Observation space dimension.
        dim_act (int): Action space dimension.
        n_heads (int): Number of parallel output heads.
        depth (int): Number of hidden layers.
        width (int): Width of each hidden layer.
        act (str): Activation function name.
        has_norm (bool): Whether to use normalization layers.
        """
        super().__init__()

        self.dim_act = dim_act
        self.n_heads = n_heads

        self.arch = nn.Sequential(
            MLP(dim_state, dim_act * n_heads, depth, width, act, has_norm),
            nn.Tanh(),
        )



[docs]
    def forward(
        self, x: torch.Tensor, is_training: bool | None = None
    ) -> dict[str, torch.Tensor]:
        """
        Forward pass for deterministic action prediction.

        Args:
            x (Tensor): Input observation tensor.
            is_training (Optional[bool]): Unused; included for interface compatibility.
        """
        out = self.arch(x)
        if self.n_heads > 1:
            out = out.view(-1, self.n_heads, self.dim_act)
        return_dict = {
            "action": out,
        }
        return return_dict