Skip to content

Commit 2171fb7

Browse files
committed
fix(lib): address Codex review feedback on model capabilities
Two P1 corrections from the automated Codex review on PR #3124: 1. Enforce segment boundary in prefix matching. `startswith()` was matching unknown identifiers like `gpt-5.10` as `gpt-5.1` and `o1-previewed` as `o1-preview`, causing the documented `None` fallback to be skipped. The lookup now requires the family prefix to either equal the model exactly or be followed by a `-`. 2. Align gpt-5 family effort options with the canonical SDK docs in `src/openai/types/shared/reasoning.py`: - gpt-5.1 (and -codex / -codex-max / -mini): `none/low/medium/high` -- removed `minimal`, which would have surfaced an unsupported value and triggered avoidable 400 responses. - gpt-5.2 / gpt-5.3 / gpt-5.4: `none/low/medium/high/xhigh` (`xhigh` is supported for models *after* gpt-5.1-codex-max). - gpt-5-pro: split into its own family with the high-only effort scale per the SDK docstring ("defaults to and only supports high"). Other improvements: - Chat / search variant detection now matches dated snapshots like `gpt-4o-search-preview-2025-03-11` via a regex instead of a strict `endswith` check. - New `TestBoundaryMatching` class with parametrized regression tests for prefix-collision cases (`gpt-5.10`, `gpt-5.1foo`, `o1-previewed`) and dated chat / search variants. - New `TestGpt5ProFamily` class covering the high-only effort scale. 49 of 49 new tests pass; 154 of 154 lib tests pass overall.
1 parent 3b79e58 commit 2171fb7

2 files changed

Lines changed: 179 additions & 48 deletions

File tree

src/openai/lib/_models.py

Lines changed: 67 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,23 @@
44
capability matrix is documented behaviour rather than schema. When OpenAI ships
55
a new model family, the registry in this file should be updated to match.
66
7+
The canonical source for reasoning-effort behaviour is the docstring on the
8+
``Reasoning.effort`` parameter in ``src/openai/types/shared/reasoning.py``.
9+
710
Example:
811
>>> from openai import get_model_capabilities
912
>>> caps = get_model_capabilities("gpt-5.4-mini")
1013
>>> caps.supports_reasoning
1114
True
1215
>>> caps.reasoning_effort_options
13-
('none', 'minimal', 'low', 'medium', 'high', 'xhigh')
16+
('none', 'low', 'medium', 'high', 'xhigh')
1417
>>> get_model_capabilities("gpt-4.1").supports_reasoning
1518
False
1619
"""
1720

1821
from __future__ import annotations
1922

23+
import re
2024
from typing import Any, Tuple, Optional
2125
from dataclasses import dataclass
2226

@@ -81,28 +85,43 @@ def _caps(
8185
# ---------------------------------------------------------------------------
8286
# Family registry.
8387
#
84-
# Entries are matched by longest-prefix against the model string, with chat /
85-
# search variants checked via the suffix test in `get_model_capabilities`.
88+
# Entries are matched by longest *segment* prefix against the model string
89+
# (i.e. the registered prefix must either equal the model exactly or be
90+
# followed by a `-`), with chat / search variants checked via the suffix test
91+
# in `get_model_capabilities`.
8692
#
8793
# When OpenAI ships a new family, add an entry here. Order within this tuple
8894
# does not matter; the lookup picks the longest matching prefix.
8995
# ---------------------------------------------------------------------------
9096

91-
# Effort scales reused across families.
97+
# Effort scales reused across families. These mirror the prose in
98+
# `src/openai/types/shared/reasoning.py`:
99+
#
100+
# - All models *before* gpt-5.1 default to medium and do NOT support `none`.
101+
# gpt-5 base accepts `minimal/low/medium/high`.
102+
# - gpt-5.1 supports `none/low/medium/high` (no `minimal`).
103+
# - `xhigh` is supported for models *after* gpt-5.1-codex-max, i.e.
104+
# gpt-5.2 onward, on top of the gpt-5.1 effort scale.
105+
# - gpt-5-pro defaults to and only supports `high`.
92106
_EFFORT_O_SERIES: Tuple[ReasoningEffort, ...] = ("low", "medium", "high")
93-
_EFFORT_GPT5: Tuple[ReasoningEffort, ...] = ("minimal", "low", "medium", "high")
94-
_EFFORT_GPT5_1: Tuple[ReasoningEffort, ...] = ("none", "minimal", "low", "medium", "high")
95-
_EFFORT_GPT5_4: Tuple[ReasoningEffort, ...] = ("none", "minimal", "low", "medium", "high", "xhigh")
107+
_EFFORT_GPT5_BASE: Tuple[ReasoningEffort, ...] = ("minimal", "low", "medium", "high")
108+
_EFFORT_GPT5_1: Tuple[ReasoningEffort, ...] = ("none", "low", "medium", "high")
109+
_EFFORT_GPT5_2_PLUS: Tuple[ReasoningEffort, ...] = ("none", "low", "medium", "high", "xhigh")
110+
_EFFORT_GPT5_PRO: Tuple[ReasoningEffort, ...] = ("high",)
96111

97112

98113
_FAMILIES: Tuple[ModelCapabilities, ...] = (
99114
# gpt-5.x reasoning models. Temperature is rejected unless you use a
100115
# `-chat-latest` variant or set `reasoning_effort="none"`.
101-
_caps("gpt-5.4", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_4),
102-
_caps("gpt-5.3", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_1),
103-
_caps("gpt-5.2", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_1),
116+
#
117+
# gpt-5-pro is a high-only reasoning model and must be registered as its
118+
# own family so longest-prefix matching beats the generic `gpt-5`.
119+
_caps("gpt-5-pro", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_PRO),
120+
_caps("gpt-5.4", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_2_PLUS),
121+
_caps("gpt-5.3", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_2_PLUS),
122+
_caps("gpt-5.2", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_2_PLUS),
104123
_caps("gpt-5.1", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_1),
105-
_caps("gpt-5", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5),
124+
_caps("gpt-5", supports_temperature=False, supports_reasoning=True, reasoning_effort_options=_EFFORT_GPT5_BASE),
106125
# Classic chat families.
107126
_caps("gpt-4.1", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None),
108127
_caps("gpt-4o", supports_temperature=True, supports_reasoning=False, reasoning_effort_options=None),
@@ -142,9 +161,22 @@ def _caps(
142161
)
143162

144163

145-
# Suffixes that override family defaults. A model ending in one of these is
146-
# treated as a non-reasoning chat variant regardless of its family.
147-
_CHAT_VARIANT_SUFFIXES: Tuple[str, ...] = ("-chat-latest", "-search-preview")
164+
# Matches `*-chat-latest` and `*-search-preview` (with an optional trailing
165+
# `-YYYY-MM-DD` snapshot date), e.g. `gpt-4o-search-preview-2025-03-11`.
166+
# These variants behave like classic chat models regardless of family.
167+
_CHAT_VARIANT_RE = re.compile(r"-(?:chat-latest|search-preview)(?:-\d{4}-\d{2}-\d{2})?$")
168+
169+
170+
def _matches_family(model: str, family: str) -> bool:
171+
"""Match ``model`` against a family prefix at a segment boundary.
172+
173+
A model matches when it equals the family exactly or extends it with a
174+
``-`` separator. This prevents collisions like ``gpt-5.10`` being
175+
misclassified as ``gpt-5.1``.
176+
"""
177+
if model == family:
178+
return True
179+
return model.startswith(family + "-")
148180

149181

150182
def get_model_capabilities(model: str) -> Optional[ModelCapabilities]:
@@ -155,11 +187,17 @@ def get_model_capabilities(model: str) -> Optional[ModelCapabilities]:
155187
decide which controls to render) but is only as fresh as this module's
156188
registry. New model families need a corresponding entry here.
157189
190+
Matching is segment-aware: the registered prefix must either equal the
191+
model exactly or be followed by a ``-`` separator. ``"gpt-5.10"`` will
192+
therefore *not* match the ``gpt-5.1`` family and ``"o1-previewed"`` will
193+
not match ``o1-preview``; both fall through to ``None`` so callers treat
194+
them as unknown.
195+
158196
Args:
159197
model: A model identifier such as ``"gpt-5.4-mini"`` or
160198
``"gpt-4o-2024-08-06"``. Date suffixes and size variants
161-
(``-mini``, ``-nano``) are handled automatically by longest-prefix
162-
matching.
199+
(``-mini``, ``-nano``, ``-pro``) are handled automatically by
200+
longest-prefix matching.
163201
164202
Returns:
165203
A :class:`ModelCapabilities` describing the model, or ``None`` if no
@@ -169,11 +207,15 @@ def get_model_capabilities(model: str) -> Optional[ModelCapabilities]:
169207
170208
Example:
171209
>>> get_model_capabilities("gpt-5.4-mini").reasoning_effort_options
172-
('none', 'minimal', 'low', 'medium', 'high', 'xhigh')
210+
('none', 'low', 'medium', 'high', 'xhigh')
173211
>>> get_model_capabilities("gpt-5-chat-latest").supports_temperature
174212
True
175213
>>> get_model_capabilities("gpt-5").supports_temperature
176214
False
215+
>>> get_model_capabilities("gpt-5-pro").reasoning_effort_options
216+
('high',)
217+
>>> get_model_capabilities("gpt-5.10") is None
218+
True
177219
>>> get_model_capabilities("nonexistent-model") is None
178220
True
179221
"""
@@ -184,11 +226,12 @@ def get_model_capabilities(model: str) -> Optional[ModelCapabilities]:
184226
if not isinstance(candidate, str) or not candidate:
185227
return None
186228

187-
# Longest matching prefix wins so that "gpt-5.4" beats "gpt-5", and "o1-pro"
188-
# beats "o1".
229+
# Longest matching family wins so that "gpt-5.4" beats "gpt-5", and
230+
# "o1-pro" beats "o1". Segment boundary check rejects things like
231+
# "gpt-5.10" claiming to be "gpt-5.1".
189232
best: Optional[ModelCapabilities] = None
190233
for entry in _FAMILIES:
191-
if not candidate.startswith(entry.family):
234+
if not _matches_family(candidate, entry.family):
192235
continue
193236
if best is None or len(entry.family) > len(best.family):
194237
best = entry
@@ -199,8 +242,10 @@ def get_model_capabilities(model: str) -> Optional[ModelCapabilities]:
199242
# Chat / search variants override family defaults: gpt-5-chat-latest is a
200243
# non-reasoning model even though gpt-5* normally is one. We still report
201244
# the family so callers can group e.g. "gpt-5.2-chat-latest" with
202-
# "gpt-5.2".
203-
if any(candidate.endswith(suffix) for suffix in _CHAT_VARIANT_SUFFIXES):
245+
# "gpt-5.2". The regex tolerates a trailing date snapshot like
246+
# `-2025-03-11` so dated variants like `gpt-4o-search-preview-2025-03-11`
247+
# are recognized too.
248+
if _CHAT_VARIANT_RE.search(candidate):
204249
return ModelCapabilities(
205250
family=best.family,
206251
supports_temperature=True,

tests/lib/test_model_capabilities.py

Lines changed: 112 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,49 +39,69 @@ def test_gpt_5_chat_latest_is_non_reasoning(self) -> None:
3939

4040

4141
class TestGpt51Family:
42-
def test_gpt_5_1_adds_none_to_effort(self) -> None:
42+
def test_gpt_5_1_drops_minimal_adds_none(self) -> None:
43+
# Per src/openai/types/shared/reasoning.py the supported gpt-5.1
44+
# effort values are none/low/medium/high. `minimal` was removed and
45+
# would trigger 400 responses if surfaced in a UI.
4346
caps = get_model_capabilities("gpt-5.1")
4447
assert caps is not None
4548
assert caps.family == "gpt-5.1"
46-
assert caps.reasoning_effort_options == (
47-
"none",
48-
"minimal",
49-
"low",
50-
"medium",
51-
"high",
52-
)
49+
assert caps.reasoning_effort_options == ("none", "low", "medium", "high")
50+
assert "minimal" not in (caps.reasoning_effort_options or ())
5351

5452
def test_gpt_5_1_codex(self) -> None:
5553
caps = get_model_capabilities("gpt-5.1-codex")
5654
assert caps is not None
5755
assert caps.family == "gpt-5.1"
5856

59-
def test_gpt_5_2_uses_same_effort_scale(self) -> None:
57+
def test_gpt_5_1_codex_max(self) -> None:
58+
# gpt-5.1-codex-max is the boundary referenced in the SDK docs:
59+
# `xhigh` is supported for models *after* this one. So it should
60+
# still match the gpt-5.1 family (no xhigh).
61+
caps = get_model_capabilities("gpt-5.1-codex-max")
62+
assert caps is not None
63+
assert caps.family == "gpt-5.1"
64+
assert "xhigh" not in (caps.reasoning_effort_options or ())
65+
66+
67+
class TestGpt52PlusFamilies:
68+
def test_gpt_5_2_supports_xhigh(self) -> None:
69+
# Per the SDK docstring, `xhigh` is supported for all models *after*
70+
# gpt-5.1-codex-max, which means gpt-5.2 onward.
71+
caps = get_model_capabilities("gpt-5.2")
72+
assert caps is not None
73+
assert caps.family == "gpt-5.2"
74+
assert caps.reasoning_effort_options == ("none", "low", "medium", "high", "xhigh")
75+
76+
def test_gpt_5_2_pro(self) -> None:
6077
caps = get_model_capabilities("gpt-5.2-pro")
6178
assert caps is not None
6279
assert caps.family == "gpt-5.2"
63-
assert caps.reasoning_effort_options == (
64-
"none",
65-
"minimal",
66-
"low",
67-
"medium",
68-
"high",
69-
)
80+
81+
82+
class TestGpt5ProFamily:
83+
def test_gpt_5_pro_only_supports_high(self) -> None:
84+
# Per the SDK docs, gpt-5-pro defaults to (and only supports) `high`.
85+
caps = get_model_capabilities("gpt-5-pro")
86+
assert caps is not None
87+
assert caps.family == "gpt-5-pro"
88+
assert caps.supports_reasoning is True
89+
assert caps.reasoning_effort_options == ("high",)
90+
91+
def test_gpt_5_pro_dated_snapshot(self) -> None:
92+
caps = get_model_capabilities("gpt-5-pro-2025-10-06")
93+
assert caps is not None
94+
assert caps.family == "gpt-5-pro"
95+
assert caps.reasoning_effort_options == ("high",)
7096

7197

7298
class TestGpt54Family:
73-
def test_gpt_5_4_adds_xhigh(self) -> None:
99+
def test_gpt_5_4_full_effort_scale(self) -> None:
74100
caps = get_model_capabilities("gpt-5.4")
75101
assert caps is not None
76102
assert caps.family == "gpt-5.4"
77-
assert caps.reasoning_effort_options == (
78-
"none",
79-
"minimal",
80-
"low",
81-
"medium",
82-
"high",
83-
"xhigh",
84-
)
103+
assert caps.reasoning_effort_options == ("none", "low", "medium", "high", "xhigh")
104+
assert "minimal" not in (caps.reasoning_effort_options or ())
85105

86106
def test_gpt_5_4_size_variants(self) -> None:
87107
for size in ("gpt-5.4", "gpt-5.4-mini", "gpt-5.4-nano"):
@@ -181,6 +201,71 @@ def test_o4_mini(self) -> None:
181201
assert caps.supports_reasoning is True
182202

183203

204+
class TestBoundaryMatching:
205+
"""Family lookup must respect segment boundaries.
206+
207+
A registered prefix should only match the model when the prefix either
208+
equals the model exactly or is followed by ``-``. Without this, a future
209+
model name like ``gpt-5.10`` would silently impersonate ``gpt-5.1``.
210+
"""
211+
212+
@pytest.mark.parametrize(
213+
"model",
214+
[
215+
"gpt-5.10", # version typo / future model: must NOT match gpt-5.1
216+
"gpt-5.10-mini",
217+
"gpt-5.1foo",
218+
"o1-previewed", # near-typo of o1-preview
219+
"o1-previews",
220+
],
221+
)
222+
def test_boundary_collisions_do_not_misclassify(self, model: str) -> None:
223+
# Either we return None (no match) or we match a strictly different
224+
# family (e.g. `gpt-5.10` could match `gpt-5` segment-wise -- in that
225+
# case we want it to NOT claim to be gpt-5.1).
226+
caps = get_model_capabilities(model)
227+
if caps is not None:
228+
assert caps.family != "gpt-5.1", model
229+
assert caps.family != "o1-preview", model
230+
231+
def test_gpt_5_10_does_not_match_gpt_5_1(self) -> None:
232+
# `gpt-5.10` is hypothetical, but the lookup must not classify it as
233+
# gpt-5.1 just because of `startswith`. Today we'd expect None until
234+
# a 5.10 family is registered.
235+
assert get_model_capabilities("gpt-5.10") is None
236+
237+
def test_o1_previewed_does_not_match_o1_preview(self) -> None:
238+
# Without the boundary check, `o1-previewed` would be tagged as
239+
# o1-preview (which has supports_reasoning=False), masking what is
240+
# really an unknown model.
241+
caps = get_model_capabilities("o1-previewed")
242+
if caps is not None:
243+
assert caps.family != "o1-preview"
244+
245+
def test_exact_match_still_works(self) -> None:
246+
# Sanity: removing collisions must not break the exact-match path.
247+
for model in ("gpt-5.1", "o1-preview", "chatgpt-4o-latest", "gpt-5-pro"):
248+
assert get_model_capabilities(model) is not None, model
249+
250+
def test_dated_search_preview_still_recognized_as_chat_variant(self) -> None:
251+
# `gpt-4o-search-preview-2025-03-11` should still count as a chat
252+
# variant despite the trailing date snapshot. The previous suffix
253+
# check (`endswith("-search-preview")`) silently regressed on dated
254+
# models.
255+
caps = get_model_capabilities("gpt-4o-search-preview-2025-03-11")
256+
assert caps is not None
257+
assert caps.supports_temperature is True
258+
assert caps.supports_reasoning is False
259+
260+
def test_dated_chat_latest_still_recognized_as_chat_variant(self) -> None:
261+
caps = get_model_capabilities("gpt-5.2-chat-latest-2025-12-11")
262+
assert caps is not None
263+
assert caps.supports_temperature is True
264+
assert caps.supports_reasoning is False
265+
# Family should still be reported as the underlying gpt-5.2
266+
assert caps.family == "gpt-5.2"
267+
268+
184269
class TestUnknownAndEdgeCases:
185270
def test_unknown_model_returns_none(self) -> None:
186271
assert get_model_capabilities("nonexistent-model") is None
@@ -247,13 +332,14 @@ def effort_options(model: str) -> tuple[str, ...]:
247332
return tuple(opt for opt in caps.reasoning_effort_options if opt is not None)
248333

249334
assert effort_options("gpt-5") == ("minimal", "low", "medium", "high")
335+
assert effort_options("gpt-5.1") == ("none", "low", "medium", "high")
250336
assert effort_options("gpt-5.4") == (
251337
"none",
252-
"minimal",
253338
"low",
254339
"medium",
255340
"high",
256341
"xhigh",
257342
)
343+
assert effort_options("gpt-5-pro") == ("high",)
258344
assert effort_options("gpt-4.1") == ()
259345
assert effort_options("o3") == ("low", "medium", "high")

0 commit comments

Comments
 (0)