[1/n] add application level autoscaling policy in schema (#57535)

abrarsheikh · elliot-barn · commit 3d628a7e198c · 2025-10-23T05:46:41.000Z
part 1 of #56149 1. move `_serialized_policy_def` into `AutoscalingPolicy` from `AutoscalingConfig`. We need this in order to reuse `AutoscalingPolicy` for application-level autoscaling. 2. Make `autoscaling_policy` a top-level config in `ServeApplicationSchema`. --------- Signed-off-by: abrar <abrar@anyscale.com> Signed-off-by: elliot-barn <elliot.barnwell@anyscale.com>
diff --git a/python/ray/serve/_private/autoscaling_state.py b/python/ray/serve/_private/autoscaling_state.py
@@ -106,7 +106,7 @@ def register(self, info: DeploymentInfo, curr_target_num_replicas: int) -> int:
 
         self._deployment_info = info
         self._config = config
-        self._policy = self._config.get_policy()
+        self._policy = self._config.policy.get_policy()
         self._target_capacity = info.target_capacity
         self._target_capacity_direction = info.target_capacity_direction
         self._policy_state = {}
diff --git a/python/ray/serve/config.py b/python/ray/serve/config.py
@@ -172,12 +172,39 @@ class AggregationFunction(str, Enum):
 
 @PublicAPI(stability="alpha")
 class AutoscalingPolicy(BaseModel):
-    name: Union[str, Callable] = Field(
+    # Cloudpickled policy definition.
+    _serialized_policy_def: bytes = PrivateAttr(default=b"")
+
+    policy_function: Union[str, Callable] = Field(
         default=DEFAULT_AUTOSCALING_POLICY_NAME,
-        description="Name of the policy function or the import path of the policy. "
-        "Will be the concatenation of the policy module and the policy name if user passed a callable.",
+        description="Policy function can be a string import path or a function callable. "
+        "If it's a string import path, it must be of the form `path.to.module:function_name`. ",
     )
 
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.serialize_policy()
+
+    def serialize_policy(self) -> None:
+        """Serialize policy with cloudpickle.
+
+        Import the policy if it's passed in as a string import path. Then cloudpickle
+        the policy and set `serialized_policy_def` if it's empty.
+        """
+        policy_path = self.policy_function
+
+        if isinstance(policy_path, Callable):
+            policy_path = f"{policy_path.__module__}.{policy_path.__name__}"
+
+        if not self._serialized_policy_def:
+            self._serialized_policy_def = cloudpickle.dumps(import_attr(policy_path))
+
+        self.policy_function = policy_path
+
+    def get_policy(self) -> Callable:
+        """Deserialize policy from cloudpickled bytes."""
+        return cloudpickle.loads(self._serialized_policy_def)
+
 
 @PublicAPI(stability="stable")
 class AutoscalingConfig(BaseModel):
@@ -247,9 +274,6 @@ class AutoscalingConfig(BaseModel):
         description="Function used to aggregate metrics across a time window.",
     )
 
-    # Cloudpickled policy definition.
-    _serialized_policy_def: bytes = PrivateAttr(default=b"")
-
     # Autoscaling policy. This policy is deployment scoped. Defaults to the request-based autoscaler.
     policy: AutoscalingPolicy = Field(
         default_factory=AutoscalingPolicy,
@@ -298,27 +322,6 @@ def aggregation_function_valid(cls, v: Union[str, AggregationFunction]):
             return v
         return AggregationFunction(str(v).lower())
 
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.serialize_policy()
-
-    def serialize_policy(self) -> None:
-        """Serialize policy with cloudpickle.
-
-        Import the policy if it's passed in as a string import path. Then cloudpickle
-        the policy and set `serialized_policy_def` if it's empty.
-        """
-        policy = self.policy
-        policy_name = policy.name
-
-        if isinstance(policy_name, Callable):
-            policy_name = f"{policy_name.__module__}.{policy_name.__name__}"
-
-        if not self._serialized_policy_def:
-            self._serialized_policy_def = cloudpickle.dumps(import_attr(policy_name))
-
-        self.policy = AutoscalingPolicy(name=policy_name)
-
     @classmethod
     def default(cls):
         return cls(
@@ -327,10 +330,6 @@ def default(cls):
             max_replicas=100,
         )
 
-    def get_policy(self) -> Callable:
-        """Deserialize policy from cloudpickled bytes."""
-        return cloudpickle.loads(self._serialized_policy_def)
-
     def get_upscaling_factor(self) -> PositiveFloat:
         if self.upscaling_factor:
             return self.upscaling_factor
diff --git a/python/ray/serve/schema.py b/python/ray/serve/schema.py
@@ -560,6 +560,15 @@ class ServeApplicationSchema(BaseModel):
         default=[],
         description="Deployment options that override options specified in the code.",
     )
+    autoscaling_policy: Optional[dict] = Field(
+        default=None,
+        description=(
+            "Application-level autoscaling policy. "
+            "If null, serve fallbacks to autoscaling policy in each deployment. "
+            "This option is under development and not yet supported."
+        ),
+    )
+
     args: Dict = Field(
         default={},
         description="Arguments that will be passed to the application builder.",
diff --git a/python/ray/serve/tests/test_autoscaling_policy.py b/python/ray/serve/tests/test_autoscaling_policy.py
@@ -1540,11 +1540,13 @@ def custom_autoscaling_policy(ctx: AutoscalingContext):
 @pytest.mark.parametrize(
     "policy",
     [
-        {"name": "ray.serve.tests.test_autoscaling_policy.custom_autoscaling_policy"},
+        {
+            "policy_function": "ray.serve.tests.test_autoscaling_policy.custom_autoscaling_policy"
+        },
         AutoscalingPolicy(
-            name="ray.serve.tests.test_autoscaling_policy.custom_autoscaling_policy"
+            policy_function="ray.serve.tests.test_autoscaling_policy.custom_autoscaling_policy"
         ),
-        AutoscalingPolicy(name=custom_autoscaling_policy),
+        AutoscalingPolicy(policy_function=custom_autoscaling_policy),
     ],
 )
 def test_e2e_scale_up_down_basic_with_custom_policy(serve_instance_with_signal, policy):
diff --git a/python/ray/serve/tests/test_controller.py b/python/ray/serve/tests/test_controller.py
@@ -180,7 +180,7 @@ def autoscaling_app():
                                     "upscale_delay_s": 30.0,
                                     "aggregation_function": "mean",
                                     "policy": {
-                                        "name": "ray.serve.autoscaling_policy:default_autoscaling_policy"
+                                        "policy_function": "ray.serve.autoscaling_policy:default_autoscaling_policy"
                                     },
                                 },
                                 "graceful_shutdown_wait_loop_s": 2.0,
diff --git a/python/ray/serve/tests/test_deploy_2.py b/python/ray/serve/tests/test_deploy_2.py
@@ -333,7 +333,9 @@ async def __call__(self):
         "smoothing_factor": 1.0,
         "initial_replicas": None,
         "aggregation_function": "mean",
-        "policy": {"name": "ray.serve.autoscaling_policy:default_autoscaling_policy"},
+        "policy": {
+            "policy_function": "ray.serve.autoscaling_policy:default_autoscaling_policy"
+        },
     }
 
 
@@ -397,7 +399,9 @@ async def __call__(self):
         "smoothing_factor": 1.0,
         "initial_replicas": None,
         "aggregation_function": "mean",
-        "policy": {"name": "ray.serve.autoscaling_policy:default_autoscaling_policy"},
+        "policy": {
+            "policy_function": "ray.serve.autoscaling_policy:default_autoscaling_policy"
+        },
     }
 
     for i in range(3):
diff --git a/python/ray/serve/tests/test_deploy_app_2.py b/python/ray/serve/tests/test_deploy_app_2.py
@@ -596,7 +596,9 @@ def test_num_replicas_auto_api(serve_instance):
         "smoothing_factor": 1.0,
         "initial_replicas": None,
         "aggregation_function": "mean",
-        "policy": {"name": "ray.serve.autoscaling_policy:default_autoscaling_policy"},
+        "policy": {
+            "policy_function": "ray.serve.autoscaling_policy:default_autoscaling_policy"
+        },
     }
 
 
@@ -651,7 +653,9 @@ def test_num_replicas_auto_basic(serve_instance):
         "smoothing_factor": 1.0,
         "initial_replicas": None,
         "aggregation_function": "mean",
-        "policy": {"name": "ray.serve.autoscaling_policy:default_autoscaling_policy"},
+        "policy": {
+            "policy_function": "ray.serve.autoscaling_policy:default_autoscaling_policy"
+        },
     }
 
     h = serve.get_app_handle(SERVE_DEFAULT_APP_NAME)
diff --git a/python/ray/serve/tests/unit/test_config.py b/python/ray/serve/tests/unit/test_config.py
@@ -784,7 +784,12 @@ def test_deployment_mode_to_proxy_location():
 
 
 @pytest.mark.parametrize(
-    "policy", [None, fake_policy, "ray.serve.tests.unit.test_config:fake_policy"]
+    "policy",
+    [
+        None,
+        {"policy_function": "ray.serve.tests.unit.test_config:fake_policy"},
+        {"policy_function": fake_policy},
+    ],
 )
 def test_autoscaling_policy_serializations(policy):
     """Test that autoscaling policy can be serialized and deserialized.
@@ -794,16 +799,19 @@ def test_autoscaling_policy_serializations(policy):
     """
     autoscaling_config = AutoscalingConfig()
     if policy:
-        autoscaling_config = AutoscalingConfig(_policy=policy)
+        autoscaling_config = AutoscalingConfig(policy=policy)
 
     config = DeploymentConfig.from_default(autoscaling_config=autoscaling_config)
     deserialized_autoscaling_policy = DeploymentConfig.from_proto_bytes(
         config.to_proto_bytes()
-    ).autoscaling_config.get_policy()
+    ).autoscaling_config.policy.get_policy()
 
-    # Right now we don't allow modifying the autoscaling policy, so this will always
-    # be the default autoscaling policy
-    assert deserialized_autoscaling_policy == default_autoscaling_policy
+    if policy is None:
+        assert deserialized_autoscaling_policy == default_autoscaling_policy
+    else:
+        # Compare function behavior instead of function objects
+        # since serialization/deserialization creates new function objects
+        assert deserialized_autoscaling_policy() == fake_policy()
 
 
 def test_autoscaling_policy_import_fails_for_non_existing_policy():
@@ -814,7 +822,8 @@ def test_autoscaling_policy_import_fails_for_non_existing_policy():
     """
     # Right now we don't allow modifying the autoscaling policy, so this will not fail
     policy = "i.dont.exist:fake_policy"
-    AutoscalingConfig(_policy=policy)
+    with pytest.raises(ModuleNotFoundError):
+        AutoscalingConfig(policy={"policy_function": policy})
 
 
 def test_default_autoscaling_policy_import_path():
diff --git a/src/ray/protobuf/serve.proto b/src/ray/protobuf/serve.proto
@@ -24,10 +24,11 @@ option java_multiple_files = true;
 
 // Configuration options for Serve's autoscaling policy
 message AutoscalingPolicy {
-  // Name of the policy function or the import path of the policy if user passed a string.
-  // Will be the concatenation of the policy module and the policy name if user passed a
-  // callable.
-  string name = 1;
+  // Policy function needs to be a string import path.
+  string policy_function = 1;
+
+  // The cloudpickled policy definition.
+  bytes _serialized_policy_def = 2;
 }
 
 // Configuration options for Serve's replica autoscaler.
@@ -66,28 +67,25 @@ message AutoscalingConfig {
   // [DEPRECATED] Use `downscaling_factor` instead.
   optional double downscale_smoothing_factor = 10;
 
-  // The cloudpickled policy definition.
-  bytes _serialized_policy_def = 11;
-
   // The autoscaling policy definition.
-  AutoscalingPolicy policy = 12;
+  AutoscalingPolicy policy = 11;
 
   // Target number of in flight requests per replica. This is the primary configuration
   // knob for replica autoscaler. Lower the number, the more rapidly the replicas
   // scales up. Must be a non-negative integer.
-  double target_ongoing_requests = 13;
+  double target_ongoing_requests = 12;
 
   // The multiplicative "gain" factor to limit upscale.
-  optional double upscaling_factor = 14;
+  optional double upscaling_factor = 13;
 
   // The multiplicative "gain" factor to limit downscale.
-  optional double downscaling_factor = 15;
+  optional double downscaling_factor = 14;
 
   // How long to wait before scaling down replicas from 1 to 0
-  optional double downscale_to_zero_delay_s = 16;
+  optional double downscale_to_zero_delay_s = 15;
 
   // How metrics are aggregated for autoscaling. One of "mean", "max", "min".
-  string aggregation_function = 17;
+  string aggregation_function = 16;
 }
 
 //[Begin] LOGGING CONFIG