Skip to content

Commit dcad77c

Browse files
authored
chat: fix handling of space in reasoning markers (#22353)
* chat: fix handling of space in reasoning markers * fix tests * whitespace
1 parent 98dc141 commit dcad77c

3 files changed

Lines changed: 23 additions & 28 deletions

File tree

common/chat-diff-analyzer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ void analyze_reasoning::compare_reasoning_presence() {
296296
return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest());
297297
});
298298
auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
299-
return p.tag("pre", p.marker() + p.space()) + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
299+
return p.tag("pre", p.marker() + p.space()) + p.literal(reasoning_content) + p.tag("post", (p.space() + p.marker() + p.space())) + p.rest();
300300
});
301301
// try the more aggressive parse first, if it fails, fall back to the delimiter one
302302
auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
@@ -306,11 +306,11 @@ void analyze_reasoning::compare_reasoning_presence() {
306306
if (result.result.success()) {
307307
if (!result.tags["pre"].empty() && !result.tags["post"].empty()) {
308308
mode = reasoning_mode::TAG_BASED;
309-
start = trim_leading_whitespace(result.tags["pre"]);
310-
end = trim_trailing_whitespace(result.tags["post"]);
309+
start = result.tags["pre"];
310+
end = result.tags["post"];
311311
} else if (!result.tags["post"].empty()) {
312312
mode = reasoning_mode::TAG_BASED;
313-
end = trim_trailing_whitespace(result.tags["post"]);
313+
end = result.tags["post"];
314314
}
315315
}
316316
}

tests/test-chat-auto-parser.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1331,7 +1331,7 @@ static void test_nemotron_reasoning_detection(testing & t) {
13311331

13321332
// Check reasoning markers
13331333
t.assert_equal("reasoning_start should be '<think>\\n'", "<think>\n", analysis.reasoning.start);
1334-
t.assert_equal("reasoning_end should be '</think>'", "</think>", analysis.reasoning.end);
1334+
t.assert_equal("reasoning_end should be '\\n</think>\\n'", "\n</think>\n", analysis.reasoning.end);
13351335

13361336
// Check reasoning mode detection
13371337
// Nemotron uses tag-based reasoning; prefill handles the template's forced markers

tests/test-chat.cpp

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,22 +1642,16 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
16421642
// Qwen3.5 (basically same as Nemotron, but keeping separate tests just in case)
16431643
auto tst = peg_tester("models/templates/Qwen3.5-4B.jinja", detailed_debug);
16441644

1645-
tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
1645+
tst.test("I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?")
16461646
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
16471647
.enable_thinking(true)
16481648
.expect(message_assist_thoughts)
16491649
.run();
16501650

1651-
tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
1651+
tst.test("I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?")
16521652
.enable_thinking(true)
16531653
.reasoning_format(COMMON_REASONING_FORMAT_NONE)
1654-
.expect_content("<think>\nI'm\nthinking\n</think>\nHello, world!\nWhat's up?")
1655-
.run();
1656-
1657-
tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
1658-
.enable_thinking(true)
1659-
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
1660-
.expect(message_assist_thoughts)
1654+
.expect_content("<think>\nI'm\nthinking\n</think>\n\nHello, world!\nWhat's up?")
16611655
.run();
16621656

16631657
tst.test(
@@ -1673,7 +1667,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
16731667
.run();
16741668

16751669
tst.test(
1676-
"I'm\nthinking\n</think>\n"
1670+
"I'm\nthinking\n</think>\n\n"
16771671
"<tool_call>\n"
16781672
"<function=special_function>\n"
16791673
"<parameter=arg1>\n1\n</parameter>\n"
@@ -1731,7 +1725,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
17311725

17321726
tst.test(
17331727
"I need to output the invoice details in JSON\n"
1734-
"</think>\n"
1728+
"</think>\n\n"
17351729
R"({"amount": 123.45, "date": "2025-12-03"})")
17361730
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
17371731
.enable_thinking(true)
@@ -1751,7 +1745,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
17511745
"hello()\n"
17521746
"</parameter>\n"
17531747
"</function>\n"
1754-
"</tool_call></think>\n"
1748+
"</tool_call>\n</think>\n\n"
17551749
"<tool_call>\n"
17561750
"<function=python>\n"
17571751
"<parameter=code>\n"
@@ -1994,7 +1988,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
19941988
"hello()\n"
19951989
"</parameter>\n"
19961990
"</function>\n"
1997-
"</tool_call></think>\n"
1991+
"</tool_call>\n</think>\n"
19981992
"<tool_call>\n"
19991993
"<function=python>\n"
20001994
"<parameter=code>\n"
@@ -3463,7 +3457,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
34633457
.run();
34643458

34653459
// Tool call with reasoning (enable_thinking=true)
3466-
tst.test("I'm\nthinking</think><tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>")
3460+
tst.test("I'm\nthinking\n</think>\n\n<tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>")
34673461
.enable_thinking(true)
34683462
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
34693463
.tools({ special_function_tool })
@@ -3487,7 +3481,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
34873481
.run();
34883482

34893483
// Tool call with reasoning and content
3490-
tst.test("I need to call a function</think>"
3484+
tst.test("I need to call a function\n</think>\n\n"
34913485
"Let me check the time.<tool_call>\n{\"name\": \"get_time\", \"arguments\": {\"city\": \"XYZCITY\"}}</tool_call>")
34923486
.enable_thinking(true)
34933487
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
@@ -3514,7 +3508,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
35143508

35153509
// fake tool call marker in reasoning
35163510
tst.test(
3517-
"Let me think about <tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 2}}</tool_call> hmm</think>"
3511+
"Let me think about <tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 2}}</tool_call> hmm\n</think>\n\n"
35183512
"<tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>")
35193513
.enable_thinking(true)
35203514
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
@@ -3542,19 +3536,19 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
35423536
// Format: <minimax:tool_call><invoke name="func"><parameter name="key">value</parameter></invoke></minimax:tool_call>
35433537
{
35443538
auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug);
3545-
tst.test("</think>Hello, world!\nWhat's up?").enable_thinking(true).reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(message_assist).run();
3539+
tst.test("\n</think>\n\nHello, world!\nWhat's up?").enable_thinking(true).reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(message_assist).run();
35463540

3547-
tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?").enable_thinking(true).reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(message_assist_thoughts).run();
3541+
tst.test("I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?").enable_thinking(true).reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(message_assist_thoughts).run();
35483542

3549-
tst.test("Let's call a tool:</think><minimax:tool_call>\n<invoke name=\"empty_args\">\n</invoke>\n</minimax:tool_call>").
3543+
tst.test("Let's call a tool:\n</think>\n\n<minimax:tool_call>\n<invoke name=\"empty_args\">\n</invoke>\n</minimax:tool_call>").
35503544
enable_thinking(true).
35513545
reasoning_format(COMMON_REASONING_FORMAT_AUTO).
35523546
tools({ empty_args_tool }).
35533547
expect(message_with_reasoning_and_tool_call("Let's call a tool:", "empty_args", "{}")).
35543548
run();
35553549

35563550
tst.test(
3557-
"</think><minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter "
3551+
"\n</think>\n\n<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter "
35583552
"name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>")
35593553
.tools({ special_function_tool })
35603554
.expect(message_assist_call)
@@ -3714,7 +3708,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
37143708
.enable_thinking(false)
37153709
.expect(message_assist)
37163710
.run();
3717-
tst.test("I'm\nthinking</think>\n\nHello, world!\nWhat's up?")
3711+
tst.test("I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?")
37183712
.enable_thinking(true)
37193713
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
37203714
.expect(message_assist_thoughts)
@@ -3729,7 +3723,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
37293723
.tools({ special_function_tool })
37303724
.expect(message_assist_call_content)
37313725
.run();
3732-
tst.test("I'm\nthinking</think>\n\n<tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n</tool_call>")
3726+
tst.test("I'm\nthinking\n</think>\n\n<tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n</tool_call>")
37333727
.enable_thinking(true)
37343728
.reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
37353729
.tools({ special_function_tool })
@@ -4006,7 +4000,8 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
40064000

40074001
{
40084002
auto tst = peg_tester("models/templates/StepFun3.5-Flash.jinja", detailed_debug);
4009-
tst.test("I was thinking</think>\nNow I'm not.").
4003+
4004+
tst.test("I was thinking\n</think>\nNow I'm not.").
40104005
enable_thinking(true).
40114006
reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
40124007
expect_reasoning("I was thinking").

0 commit comments

Comments
 (0)