[RLlib] Fix LearnerGroup.load_module_state() and simultaneously mark as deprecated. (ray-project#60354)

kamil-kaczmarek · peterxcli · commit a50b5b34fd3d · 2026-02-25T15:56:54.000+08:00
## Description

* allow to pass a path with cloud filesystem (for example `gcs` or `s3`)
to the `LearnerGroup.load_module_state()`.
* mark `LearnerGroup.load_module_state()` as Deprecated. Users should
use `Algorithm.restore_from_path(path=..., component=...)`

* mark `load_state_path` field in the `RLModuleSpec` dataclass as
Deprecated. Direct users to use `Algorithm.restore_from_path(path=...,
component=...)`.
* add unit tests for `LearnerGroup.load_module_state()`

---------

Signed-off-by: Kamil Kaczmarek &lt;kamil@anyscale.com&gt;
Signed-off-by: peterxcli &lt;peterxcli@gmail.com&gt;
diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
@@ -3524,7 +3524,7 @@ def multi_agent(
             policy_map_capacity: Keep this many policies in the "policy_map" (before
                 writing least-recently used ones to disk/S3).
             policy_mapping_fn: Function mapping agent ids to policy ids. The signature
-                is: `(agent_id, episode, worker, **kwargs) -> PolicyID`.
+                is: `(agent_id, episode, **kwargs) -> PolicyID`.
             policies_to_train: Determines those policies that should be updated.
                 Options are:
                 - None, for training all policies.
diff --git a/rllib/core/learner/learner_group.py b/rllib/core/learner/learner_group.py
@@ -1,6 +1,5 @@
 import copy
 import itertools
-import pathlib
 from functools import partial
 from typing import (
     TYPE_CHECKING,
@@ -732,63 +731,75 @@ def update_from_episodes(self, episodes, **kwargs):
     def async_update(self, *args, **kwargs):
         pass
 
-    @Deprecated(new="LearnerGroup.load_from_path(path=..., component=...)", error=False)
+    @Deprecated(
+        old="LearnerGroup.load_module_state()",
+        help="To restore RLModule or MultiRLModule state "
+        "use LearnerGroup.restore_from_path(path=..., component=...). "
+        "See docs for more details: "
+        "https://docs.ray.io/en/latest/rllib/rl-modules.html#checkpointing-rlmodules",
+        error=False,
+    )
     def load_module_state(
         self,
         *,
         multi_rl_module_ckpt_dir: Optional[str] = None,
         modules_to_load: Optional[Set[str]] = None,
         rl_module_ckpt_dirs: Optional[Dict[ModuleID, str]] = None,
     ) -> None:
-        """Load the checkpoints of the modules being trained by this LearnerGroup.
+        """Load the checkpoints of the modules being trained by `LearnerGroup`.
 
         `load_module_state` can be used 3 ways:
-            1. Load a checkpoint for the MultiRLModule being trained by this
-                LearnerGroup. Limit the modules that are loaded from the checkpoint
-                by specifying the `modules_to_load` argument.
-            2. Load the checkpoint(s) for single agent RLModules that
-                are in the MultiRLModule being trained by this LearnerGroup.
-            3. Load a checkpoint for the MultiRLModule being trained by this
-                LearnerGroup and load the checkpoint(s) for single agent RLModules
-                that are in the MultiRLModule. The checkpoints for the single
-                agent RLModules take precedence over the module states in the
-                MultiRLModule checkpoint.
-
-        NOTE: At lease one of multi_rl_module_ckpt_dir or rl_module_ckpt_dirs is
-            must be specified. modules_to_load can only be specified if
-            multi_rl_module_ckpt_dir is specified.
+            1. Load a checkpoint for the `MultiRLModule` being trained by this
+                `LearnerGroup`. Optionally, limit the modules that are loaded
+                from the checkpoint by specifying the `modules_to_load` argument.
+            2. Load the checkpoint(s) for single agent `RLModules` that
+                are in the `MultiRLModule` being trained by this `LearnerGroup`.
+            3. Load a checkpoint for the `MultiRLModule` being trained by this
+                `LearnerGroup` and load the checkpoint(s) for single agent `RLModules`
+                that are in the `MultiRLModule`. The checkpoints for the single
+                agent `RLModules` take precedence over the module states in the
+                `MultiRLModule` checkpoint.
+
+        At least one of `multi_rl_module_ckpt_dir` or `rl_module_ckpt_dirs`
+        must be specified.
+        `modules_to_load` can only be specified if `multi_rl_module_ckpt_dir`
+        is provided.
 
         Args:
             multi_rl_module_ckpt_dir: The path to the checkpoint for the
-                MultiRLModule.
-            modules_to_load: A set of module ids to load from the checkpoint.
+                `MultiRLModule`.
+            modules_to_load: A set of `RLModule` ids to load from the checkpoint.
             rl_module_ckpt_dirs: A mapping from module ids to the path to a
-                checkpoint for a single agent RLModule.
+                checkpoint for a single agent `RLModule`.
         """
         if not (multi_rl_module_ckpt_dir or rl_module_ckpt_dirs):
             raise ValueError(
-                "At least one of `multi_rl_module_ckpt_dir` or "
-                "`rl_module_ckpt_dirs` must be provided!"
+                f"At least one of `multi_rl_module_ckpt_dir` or "
+                f"`rl_module_ckpt_dirs` must be provided. "
+                f"Got {multi_rl_module_ckpt_dir=} and {rl_module_ckpt_dirs=}."
+            )
+
+        if modules_to_load and not multi_rl_module_ckpt_dir:
+            raise ValueError(
+                f"`modules_to_load` can only be specified if a "
+                f"multi_rl_module_ckpt_dir is provided. "
+                f"Got {modules_to_load=} and {multi_rl_module_ckpt_dir=}."
             )
-        if multi_rl_module_ckpt_dir:
-            multi_rl_module_ckpt_dir = pathlib.Path(multi_rl_module_ckpt_dir)
-        if rl_module_ckpt_dirs:
-            for module_id, path in rl_module_ckpt_dirs.items():
-                rl_module_ckpt_dirs[module_id] = pathlib.Path(path)
 
         # MultiRLModule checkpoint is provided.
         if multi_rl_module_ckpt_dir:
             # Restore the entire MultiRLModule state.
             if modules_to_load is None:
                 self.restore_from_path(
-                    multi_rl_module_ckpt_dir,
+                    path=multi_rl_module_ckpt_dir,
                     component=COMPONENT_LEARNER + "/" + COMPONENT_RL_MODULE,
-                )
+                ),
             # Restore individual module IDs.
             else:
                 for module_id in modules_to_load:
+                    path = multi_rl_module_ckpt_dir + "/" + module_id
                     self.restore_from_path(
-                        multi_rl_module_ckpt_dir / module_id,
+                        path=path,
                         component=(
                             COMPONENT_LEARNER
                             + "/"
@@ -800,7 +811,7 @@ def load_module_state(
         if rl_module_ckpt_dirs:
             for module_id, path in rl_module_ckpt_dirs.items():
                 self.restore_from_path(
-                    path,
+                    path=path,
                     component=(
                         COMPONENT_LEARNER + "/" + COMPONENT_RL_MODULE + "/" + module_id
                     ),
diff --git a/rllib/core/rl_module/rl_module.py b/rllib/core/rl_module/rl_module.py
@@ -63,8 +63,12 @@ class RLModuleSpec:
             Note that `inference_only=True` AND `learner_only=True` is not allowed.
         model_config: The model config dict or default RLlib dataclass to use.
         catalog_class: The Catalog class to use.
-        load_state_path: The path to the module state to load from. NOTE: This must be
-            an absolute path.
+        load_state_path: The path to the RLModule state to load from.
+            Deprecated. This field will be removed in the future Ray release.
+            To restore RLModule state use
+            `Algorithm.restore_from_path(path=..., component=...)` instead.
+            See docs for more details: :
+            https://docs.ray.io/en/latest/rllib/rl-modules.html#checkpointing-rlmodules
     """
 
     module_class: Optional[Type["RLModule"]] = None
diff --git a/rllib/examples/utils.py b/rllib/examples/utils.py
@@ -539,7 +539,7 @@ def run_rllib_example_script_experiment(
                 else 1
             ) * num_actual_learners
             # Define compute resources used.
-            config.resources(num_gpus=0)  # old API stack setting
+            config.resources(num_gpus=0)  # @OldAPIStack
             if args.num_learners is not None:
                 config.learners(num_learners=args.num_learners)
 
diff --git a/rllib/models/torch/mingpt.py b/rllib/models/torch/mingpt.py
@@ -1,3 +1,5 @@
+# @OldAPIStack
+
 # LICENSE: MIT
 """
 Adapted from https://github.com/karpathy/minGPT
diff --git a/rllib/offline/tests/test_offline_prelearner.py b/rllib/offline/tests/test_offline_prelearner.py
@@ -36,6 +36,9 @@ def setUpClass(cls):
     def tearDownClass(cls):
         ray.shutdown()
 
+        # Delete the cluster address just in case.
+        ray._common.utils.reset_ray_address()
+
     def setUp(self) -> None:
         data_path = "offline/tests/data/cartpole/cartpole-v1_large"
         self.base_path = Path(__file__).parents[2]
diff --git a/rllib/utils/tests/run_memory_leak_tests.py b/rllib/utils/tests/run_memory_leak_tests.py
@@ -1,3 +1,5 @@
+# @OldAPIStack
+
 #!/usr/bin/env python
 # Runs one or more memory leak tests.
 #

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# @OldAPIStack`
	`2`	`+`
`1`	`3`	`# LICENSE: MIT`
`2`	`4`	`"""`
`3`	`5`	`Adapted from https://github.com/karpathy/minGPT`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# @OldAPIStack`
	`2`	`+`
`1`	`3`	`#!/usr/bin/env python`
`2`	`4`	`# Runs one or more memory leak tests.`
`3`	`5`	`#`