Source code for objectrl.loggers.logger

# -----------------------------------------------------------------------------------
# ObjectRL: An Object-Oriented Reinforcement Learning Codebase
# Copyright (C) 2025 ADIN Lab

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------------

import logging
import typing
from datetime import datetime
from pathlib import Path
from typing import Optional

import matplotlib.pyplot as plt
import numpy
import torch

if typing.TYPE_CHECKING:
    from objectrl.config.config import MainConfig


[docs] class Logger: """ Logger class for experiment tracking, result storage, and evaluation plotting. Args: result_path (str): Base directory where results will be stored. env_name (str): Name of the environment being used. model_name (str): Name of the model being trained. seed (int): Random seed for reproducibility. config (MainConfig, optional): Configuration object containing experiment parameters. Attributes: path (Path): Path to the directory where logs and plots are saved. eval_results (dict): Stores evaluation rewards for different training steps. logger (logging.Logger): Python logger instance configured to write to a file. """
[docs] def __init__( self, result_path: Path, env_name: str, model_name: str, seed: int, config: Optional["MainConfig"] = None, ): timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") self.path = ( result_path / env_name / model_name / f"seed_{str(seed).zfill(2)}" / timestamp ) # Create data storage self.eval_results = {} # Setup logger self.logger = self.create_logger() self.logger.info(f"Experiment with seed no {seed}") # If user provided a config, log its details if config is not None: self.logger.info(f"Args: \n{config}")
[docs] def create_logger(self) -> logging.Logger: """ Sets up a file-based logger. Args: None Returns: logging.Logger: Configured logger object for recording logs. """ self.path.mkdir(parents=True, exist_ok=True) # Remove existing handlers for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) # Configure logging logging.basicConfig( filename=self.path / "log.log", format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%m-%d %H:%M:%S", level=logging.INFO, filemode="w", ) logging.disable(logging.DEBUG) logger = logging.getLogger() logger.setLevel(logging.DEBUG) return logger
[docs] def log(self, message: str) -> None: """ Logs an informational message. Args: message (str): The message to be logged. Returns: None """ self.logger.info(message)
[docs] def critical(self, message: str) -> None: """ Logs a critical message (used for evaluation results). Args: message (str): The critical message to be logged. Returns: None """ self.logger.critical(message)
def __call__(self, message: str) -> None: """ Allows the logger instance to be called like a function. Args: message (str): The message to be logged as info. Returns: None """ self.log(message)
[docs] def episode_summary(self, episode: int, steps: int, info: dict) -> None: """ Logs a summary of a completed episode. Args: episode (int): Episode index. steps (int): Step count at episode end. info (dict): Dictionary containing reward and step information. Returns: None """ reward = info["episode_rewards"][episode] self.log( f"Episode: {episode + 1:4d}\tN-steps: {steps:7d}\tReward: {reward:10.3f}" )
[docs] def plot_rewards(self, rewards: numpy.ndarray, steps: numpy.ndarray) -> None: """ Generates and saves a plot of normalized per-episode rewards. Args: rewards (numpy.ndarray): Array of rewards per episode. steps (numpy.ndarray): Array of steps per episode. Returns: None """ plt.figure() plt.plot(steps, rewards) plt.xlabel("Steps") plt.ylabel("Normalized Per-Episode Reward") plt.savefig(self.path / "learning-curve.png") plt.close()
[docs] def save(self, info: dict, episode: int, n_step: int) -> None: """ Saves episode and step reward information and generates training curve plots. Args: info (dict): Dictionary containing episode and step rewards. episode (int): Current episode index. n_step (int): Current training step index. Returns: None """ if not info: return episode_rewards = info["episode_rewards"][: episode + 1] episode_steps = info["episode_steps"][: episode + 1] step_rewards = info["step_rewards"][: n_step + 1] numpy.save(self.path / "episode_rewards.npy", episode_rewards) numpy.save(self.path / "step_rewards.npy", step_rewards) self.plot_rewards(episode_rewards, episode_steps)
[docs] @staticmethod def IQM_reward_calculator(rewards: torch.Tensor) -> numpy.floating: """ Computes the Interquartile Mean (IQM) of rewards. Args: rewards (torch.Tensor): Tensor of evaluation rewards. Returns: float: The IQM of the rewards. """ q1 = numpy.percentile(rewards, 25) q3 = numpy.percentile(rewards, 75) rewards = rewards.numpy() return numpy.mean(rewards[(rewards >= q1) & (rewards <= q3)])
[docs] def save_eval_results(self, n_step: int, rewards: torch.Tensor) -> None: """ Saves evaluation results and logs IQM and mean rewards. Args: n_step (int): Step at which evaluation is performed. rewards (torch.Tensor): Array of reward values from evaluation episodes. Returns: None """ iqm = self.IQM_reward_calculator(rewards) mean_reward = rewards.mean() self.critical( f"EVALUATION\tN-steps: {n_step:7d}\tMean_Reward: {mean_reward:10.3f}\tIQM_Reward: {iqm:10.3f}" ) self.eval_results[n_step] = rewards numpy.save(self.path / "eval_results.npy", self.eval_results) # Plot evaluation curve self._plot_eval_curve()
[docs] def _plot_eval_curve(self) -> None: """Helper method to plot evaluation curve. Args: None Returns: None """ x = list(self.eval_results.keys()) y_mean = numpy.array([self.eval_results[k].mean() for k in x]) y_std = numpy.array([self.eval_results[k].std() for k in x]) plt.figure() plt.plot(x, y_mean) plt.fill_between(x, y_mean - y_std, y_mean + y_std, alpha=0.2) plt.xlabel("Steps") plt.ylabel("Eval Reward") plt.savefig(self.path / "eval-curve.png") plt.close()