Skip to content

Commit a6fb34d

Browse files
authored
Merge branch 'master' into cve-fix-upstream
2 parents d04b8bb + 33a2e95 commit a6fb34d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+8709
-4071
lines changed

.secrets.baseline

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,7 @@
934934
"filename": "infra/feast-operator/api/v1/featurestore_types.go",
935935
"hashed_secret": "44e17306b837162269a410204daaa5ecee4ec22c",
936936
"is_verified": false,
937-
"line_number": 695
937+
"line_number": 725
938938
}
939939
],
940940
"infra/feast-operator/api/v1/zz_generated.deepcopy.go": [
@@ -943,21 +943,21 @@
943943
"filename": "infra/feast-operator/api/v1/zz_generated.deepcopy.go",
944944
"hashed_secret": "f914fc9324de1bec1ad13dec94a8ea2ddb41fc87",
945945
"is_verified": false,
946-
"line_number": 663
946+
"line_number": 681
947947
},
948948
{
949949
"type": "Secret Keyword",
950950
"filename": "infra/feast-operator/api/v1/zz_generated.deepcopy.go",
951951
"hashed_secret": "44e17306b837162269a410204daaa5ecee4ec22c",
952952
"is_verified": false,
953-
"line_number": 1206
953+
"line_number": 1249
954954
},
955955
{
956956
"type": "Secret Keyword",
957957
"filename": "infra/feast-operator/api/v1/zz_generated.deepcopy.go",
958958
"hashed_secret": "c2028031c154bbe86fd69bef740855c74b927dcf",
959959
"is_verified": false,
960-
"line_number": 1211
960+
"line_number": 1254
961961
}
962962
],
963963
"infra/feast-operator/api/v1alpha1/featurestore_types.go": [
@@ -1156,7 +1156,7 @@
11561156
"filename": "infra/feast-operator/internal/controller/services/services.go",
11571157
"hashed_secret": "36dc326eb15c7bdd8d91a6b87905bcea20b637d1",
11581158
"is_verified": false,
1159-
"line_number": 173
1159+
"line_number": 176
11601160
}
11611161
],
11621162
"infra/feast-operator/internal/controller/services/tls_test.go": [

Makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,10 @@ benchmark-python-local: ## Run integration + benchmark tests for Python (local d
164164
##@ Tests
165165

166166
test-python-unit: ## Run Python unit tests (use pattern=<pattern> to filter tests, e.g., pattern=milvus, pattern=test_online_retrieval.py, pattern=test_online_retrieval.py::test_get_online_features_milvus)
167-
uv run python -m pytest -n 8 --color=yes $(if $(pattern),-k "$(pattern)") sdk/python/tests
167+
uv run python -m pytest -n 8 --color=yes $(if $(pattern),-k "$(pattern)") \
168+
--ignore=sdk/python/tests/component/ray \
169+
--ignore=sdk/python/tests/component/spark \
170+
sdk/python/tests
168171

169172
# Fast unit tests only
170173
test-python-unit-fast: ## Run fast unit tests only (no external dependencies)
@@ -186,7 +189,7 @@ test-python-integration: ## Run Python integration tests (CI)
186189
-k "(not snowflake or not test_historical_features_main)" \
187190
-m "not rbac_remote_integration_test and not ray_offline_stores_only" \
188191
--ignore=sdk/python/tests/integration/registration \
189-
--ignore=sdk/python/tests/integration/compute_engines/ray_compute \
192+
--ignore=sdk/python/tests/component/ray \
190193
--log-cli-level=INFO -s \
191194
sdk/python/tests
192195

@@ -203,7 +206,7 @@ test-python-integration-local: ## Run Python integration tests (local dev mode)
203206
uv run python -m pytest --tb=short -v -n auto --color=yes --integration --durations=10 --timeout=1200 --timeout_method=thread --dist loadgroup \
204207
-k "not test_lambda_materialization and not test_snowflake_materialization" \
205208
-m "not rbac_remote_integration_test and not ray_offline_stores_only" \
206-
--ignore=sdk/python/tests/integration/compute_engines/ray_compute \
209+
--ignore=sdk/python/tests/component/ray \
207210
--ignore=sdk/python/tests/integration/registration \
208211
--log-cli-level=INFO -s \
209212
sdk/python/tests

docs/how-to-guides/feast-on-kubernetes.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,11 @@ spec:
6565
> _More advanced FeatureStore CR examples can be found in the feast-operator [samples directory](../../infra/feast-operator/config/samples)._
6666
6767
{% hint style="success" %}
68-
**Scaling:** The Feast Operator supports horizontal scaling via static replicas, HPA autoscaling, or external autoscalers like [KEDA](https://keda.sh). Scaling requires DB-backed persistence for all enabled services.
68+
**Scaling & High Availability:** The Feast Operator supports horizontal scaling via static replicas, HPA autoscaling, or external autoscalers like [KEDA](https://keda.sh). Scaling requires DB-backed persistence for all enabled services.
6969
70-
See the [Horizontal Scaling with the Feast Operator](./scaling-feast.md#horizontal-scaling-with-the-feast-operator) guide for configuration details, or check the general recommendations on [how to scale Feast](./scaling-feast.md).
70+
When scaling is enabled, the operator auto-injects soft pod anti-affinity and zone topology spread constraints for resilience. You can also configure a PodDisruptionBudget to protect against voluntary disruptions.
71+
72+
See the [Horizontal Scaling with the Feast Operator](./scaling-feast.md#horizontal-scaling-with-the-feast-operator) guide for configuration details, including [HA options](./scaling-feast.md#high-availability), or check the general recommendations on [how to scale Feast](./scaling-feast.md).
7173
{% endhint %}
7274
7375
> _Sample scaling CRs are available at [`v1_featurestore_scaling_static.yaml`](../../infra/feast-operator/config/samples/v1_featurestore_scaling_static.yaml) and [`v1_featurestore_scaling_hpa.yaml`](../../infra/feast-operator/config/samples/v1_featurestore_scaling_hpa.yaml)._

docs/how-to-guides/scaling-feast.md

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ spec:
8686
target:
8787
type: Utilization
8888
averageUtilization: 70
89+
podDisruptionBudgets:
90+
maxUnavailable: 1
8991
onlineStore:
9092
persistence:
9193
store:
@@ -107,7 +109,7 @@ spec:
107109
```
108110

109111
{% hint style="info" %}
110-
When autoscaling is configured, the operator automatically sets the deployment strategy to `RollingUpdate` (instead of the default `Recreate`) to ensure zero-downtime scaling. You can override this by explicitly setting `deploymentStrategy` in the CR.
112+
When autoscaling is configured, the operator automatically sets the deployment strategy to `RollingUpdate` (instead of the default `Recreate`) to ensure zero-downtime scaling, and auto-injects soft pod anti-affinity and zone topology spread constraints. You can override any of these by explicitly setting `deploymentStrategy`, `affinity`, or `topologySpreadConstraints` in the CR.
111113
{% endhint %}
112114

113115
#### Validation Rules
@@ -117,6 +119,72 @@ The operator enforces the following rules:
117119
- Scaling with `replicas > 1` or any `autoscaling` config is **rejected** if any enabled service uses file-based persistence.
118120
- S3 (`s3://`) and GCS (`gs://`) backed registry file persistence is allowed with scaling, since these object stores support concurrent readers.
119121

122+
#### High Availability
123+
124+
When scaling is enabled (`replicas > 1` or `autoscaling`), the operator provides HA features to improve resilience:
125+
126+
**Pod Anti-Affinity** — The operator automatically injects a soft (`preferredDuringSchedulingIgnoredDuringExecution`) pod anti-affinity rule that prefers spreading pods across different nodes. This prevents multiple replicas from being co-located on the same node, improving resilience to node failures. You can override this by providing your own `affinity` configuration:
127+
128+
```yaml
129+
spec:
130+
replicas: 3
131+
services:
132+
# Override with custom affinity (e.g. strict anti-affinity)
133+
affinity:
134+
podAntiAffinity:
135+
requiredDuringSchedulingIgnoredDuringExecution:
136+
- topologyKey: kubernetes.io/hostname
137+
labelSelector:
138+
matchLabels:
139+
feast.dev/name: my-feast
140+
# ...
141+
```
142+
143+
**Topology Spread Constraints** — The operator automatically injects a soft zone-spread constraint (`whenUnsatisfiable: ScheduleAnyway`) that distributes pods across availability zones. This is a best-effort spread — if zones are unavailable, pods will still be scheduled. You can override this with explicit constraints or disable it with an empty array:
144+
145+
```yaml
146+
spec:
147+
replicas: 3
148+
services:
149+
# Override with custom topology spread (e.g. strict zone spreading)
150+
topologySpreadConstraints:
151+
- maxSkew: 1
152+
topologyKey: topology.kubernetes.io/zone
153+
whenUnsatisfiable: DoNotSchedule
154+
labelSelector:
155+
matchLabels:
156+
feast.dev/name: my-feast
157+
# ...
158+
```
159+
160+
To disable the auto-injected topology spread:
161+
162+
```yaml
163+
spec:
164+
replicas: 3
165+
services:
166+
topologySpreadConstraints: []
167+
# ...
168+
```
169+
170+
**PodDisruptionBudget** — You can configure a PDB to limit voluntary disruptions (e.g. during node drains or cluster upgrades). The PDB is only created when scaling is enabled. Exactly one of `minAvailable` or `maxUnavailable` must be set:
171+
172+
```yaml
173+
spec:
174+
replicas: 3
175+
services:
176+
podDisruptionBudgets:
177+
maxUnavailable: 1 # at most 1 pod unavailable during disruptions
178+
# -- OR --
179+
# podDisruptionBudgets:
180+
# minAvailable: "50%" # at least 50% of pods must remain available
181+
# ...
182+
```
183+
184+
{% hint style="info" %}
185+
The PDB is not auto-injected — you must explicitly configure it. This is intentional because a misconfigured PDB (e.g. `minAvailable` equal to the replica count) can block node drains and cluster upgrades.
186+
{% endhint %}
187+
120188
#### Using KEDA (Kubernetes Event-Driven Autoscaling)
121189

122190
[KEDA](https://keda.sh) is also supported as an external autoscaler. KEDA should target the FeatureStore's scale sub-resource directly (since it implements the Kubernetes scale API). This is the recommended approach because the operator manages the Deployment's replica count from `spec.replicas` — targeting the Deployment directly would conflict with the operator's reconciliation.

docs/reference/online-stores/dynamodb.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,47 @@ online_store:
2222
2323
The full set of configuration options is available in [DynamoDBOnlineStoreConfig](https://rtd.feast.dev/en/master/#feast.infra.online_stores.dynamodb.DynamoDBOnlineStoreConfig).
2424
25+
## Configuration
26+
27+
Below is a example with performance tuning options:
28+
29+
{% code title="feature_store.yaml" %}
30+
```yaml
31+
project: my_feature_repo
32+
registry: data/registry.db
33+
provider: aws
34+
online_store:
35+
type: dynamodb
36+
region: us-west-2
37+
batch_size: 100
38+
max_read_workers: 10
39+
consistent_reads: false
40+
```
41+
{% endcode %}
42+
43+
### Configuration Options
44+
45+
| Option | Type | Default | Description |
46+
| ------ | ---- | ------- | ----------- |
47+
| `region` | string | | AWS region for DynamoDB |
48+
| `table_name_template` | string | `{project}.{table_name}` | Template for table names |
49+
| `batch_size` | int | `100` | Number of items per BatchGetItem/BatchWriteItem request (max 100) |
50+
| `max_read_workers` | int | `10` | Maximum parallel threads for batch read operations. Higher values improve throughput for large batch reads but increase resource usage |
51+
| `consistent_reads` | bool | `false` | Whether to use strongly consistent reads (higher latency, guaranteed latest data) |
52+
| `tags` | dict | `null` | AWS resource tags added to each table |
53+
| `session_based_auth` | bool | `false` | Use AWS session-based client authentication |
54+
55+
### Performance Tuning
56+
57+
**Parallel Batch Reads**: When reading features for many entities, DynamoDB's BatchGetItem is limited to 100 items per request. For 500 entities, this requires 5 batch requests. The `max_read_workers` option controls how many of these batches execute in parallel:
58+
59+
- **Sequential (old behavior)**: 5 batches × 10ms = 50ms total
60+
- **Parallel (with `max_read_workers: 10`)**: 5 batches in parallel ≈ 10ms total
61+
62+
For high-throughput workloads with large entity counts, increase `max_read_workers` (up to 20-30) based on your DynamoDB capacity and network conditions.
63+
64+
**Batch Size**: Increase `batch_size` up to 100 to reduce the number of API calls. However, larger batches may hit DynamoDB's 16MB response limit for tables with large feature values.
65+
2566
## Permissions
2667

2768
Feast requires the following permissions in order to execute commands for DynamoDB online store:

infra/feast-operator/api/v1/featurestore_types.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
batchv1 "k8s.io/api/batch/v1"
2323
corev1 "k8s.io/api/core/v1"
2424
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
"k8s.io/apimachinery/pkg/util/intstr"
2526
)
2627

2728
const (
@@ -314,6 +315,21 @@ type FeatureStoreServices struct {
314315
// Scaling configures horizontal scaling for the FeatureStore deployment (e.g. HPA autoscaling).
315316
// For static replicas, use spec.replicas instead.
316317
Scaling *ScalingConfig `json:"scaling,omitempty"`
318+
// PodDisruptionBudgets configures a PodDisruptionBudget for the FeatureStore deployment.
319+
// Only created when scaling is enabled (replicas > 1 or autoscaling).
320+
// +optional
321+
PodDisruptionBudgets *PDBConfig `json:"podDisruptionBudgets,omitempty"`
322+
// TopologySpreadConstraints defines how pods are spread across topology domains.
323+
// When scaling is enabled and this is not set, the operator auto-injects a soft
324+
// zone-spread constraint (whenUnsatisfiable: ScheduleAnyway).
325+
// Set to an empty array to disable auto-injection.
326+
// +optional
327+
TopologySpreadConstraints []corev1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
328+
// Affinity defines the pod scheduling constraints for the FeatureStore deployment.
329+
// When scaling is enabled and this is not set, the operator auto-injects a soft
330+
// pod anti-affinity rule to prefer spreading pods across nodes.
331+
// +optional
332+
Affinity *corev1.Affinity `json:"affinity,omitempty"`
317333
}
318334

319335
// ScalingConfig configures horizontal scaling for the FeatureStore deployment.
@@ -342,6 +358,20 @@ type AutoscalingConfig struct {
342358
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
343359
}
344360

361+
// PDBConfig configures a PodDisruptionBudget for the FeatureStore deployment.
362+
// Exactly one of minAvailable or maxUnavailable must be set.
363+
// +kubebuilder:validation:XValidation:rule="[has(self.minAvailable), has(self.maxUnavailable)].exists_one(c, c)",message="Exactly one of minAvailable or maxUnavailable must be set."
364+
type PDBConfig struct {
365+
// MinAvailable specifies the minimum number/percentage of pods that must remain available.
366+
// Mutually exclusive with maxUnavailable.
367+
// +optional
368+
MinAvailable *intstr.IntOrString `json:"minAvailable,omitempty"`
369+
// MaxUnavailable specifies the maximum number/percentage of pods that can be unavailable.
370+
// Mutually exclusive with minAvailable.
371+
// +optional
372+
MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"`
373+
}
374+
345375
// OfflineStore configures the offline store service
346376
type OfflineStore struct {
347377
// Creates a remote offline server container

infra/feast-operator/api/v1/zz_generated.deepcopy.go

Lines changed: 43 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)