Skip to content

Commit 98bffff

Browse files
fix(json-repair): use char-based indexing to prevent out-of-bounds (#2549)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent 3ce465e commit 98bffff

2 files changed

Lines changed: 68 additions & 9 deletions

File tree

crates/forge_json_repair/src/parser.rs

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -957,30 +957,35 @@ impl JsonRepairParser {
957957
}
958958

959959
fn insert_before_last_whitespace(&self, text_to_insert: &str) -> String {
960-
let mut index = self.output.len();
960+
let chars: Vec<char> = self.output.chars().collect();
961+
let mut index = chars.len();
961962

962-
if index == 0
963-
|| !self.is_whitespace(self.output.chars().nth(index - 1).unwrap_or('\0'), true)
964-
{
963+
if index == 0 || !self.is_whitespace(chars[index - 1], true) {
965964
return format!("{}{}", self.output, text_to_insert);
966965
}
967966

968-
let chars: Vec<char> = self.output.chars().collect();
969967
while index > 0 && self.is_whitespace(chars[index - 1], true) {
970968
index -= 1;
971969
}
972970

971+
// Convert the char-based index back to a byte offset for string slicing.
972+
let byte_index = self
973+
.output
974+
.char_indices()
975+
.nth(index)
976+
.map_or(self.output.len(), |(i, _)| i);
977+
973978
format!(
974979
"{}{}{}",
975-
&self.output[..index],
980+
&self.output[..byte_index],
976981
text_to_insert,
977-
&self.output[index..]
982+
&self.output[byte_index..]
978983
)
979984
}
980985

981986
fn insert_before_last_whitespace_str(&self, text: &str, text_to_insert: &str) -> String {
982-
let mut index = text.len();
983987
let chars: Vec<char> = text.chars().collect();
988+
let mut index = chars.len();
984989

985990
if index == 0 || !self.is_whitespace(chars[index - 1], true) {
986991
return format!("{text}{text_to_insert}");
@@ -990,7 +995,18 @@ impl JsonRepairParser {
990995
index -= 1;
991996
}
992997

993-
format!("{}{}{}", &text[..index], text_to_insert, &text[index..])
998+
// Convert the char-based index back to a byte offset for string slicing.
999+
let byte_index = text
1000+
.char_indices()
1001+
.nth(index)
1002+
.map_or(text.len(), |(i, _)| i);
1003+
1004+
format!(
1005+
"{}{}{}",
1006+
&text[..byte_index],
1007+
text_to_insert,
1008+
&text[byte_index..]
1009+
)
9941010
}
9951011

9961012
fn remove_at_index(&self, start: usize, count: usize) -> String {

crates/forge_json_repair/tests/error_cases.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,46 @@ fn test_string_with_colon_at_start() {
5151
let expected = serde_json::json!(":");
5252
assert_eq!(actual, expected);
5353
}
54+
55+
#[test]
56+
fn test_multibyte_unicode_missing_end_quote() {
57+
// Triggers index out of bounds in insert_before_last_whitespace_str.
58+
// The output buffer contains multi-byte UTF-8 characters (é = 2 bytes each),
59+
// so self.output.len() (byte count) > chars.len() (char count).
60+
// When the repair path calls insert_before_last_whitespace_str with trailing
61+
// whitespace, it initialises `index` from the byte length and then indexes
62+
// into a Vec<char> at that byte-length position, panicking.
63+
let fixture = r#""café "#;
64+
let actual = json_repair::<serde_json::Value>(fixture).unwrap();
65+
let expected = serde_json::json!("café");
66+
assert_eq!(actual, expected);
67+
}
68+
69+
#[test]
70+
fn test_multibyte_unicode_missing_comma_in_object() {
71+
// Triggers index out of bounds in insert_before_last_whitespace_str (line 459).
72+
// parse_string first collects `"é,"` and hits the inner `"test"`. The
73+
// prev_non_whitespace char is `,`, so it retries with stop_at_index=2
74+
// (the comma position). On retry it collects str_content = `"é` (3 bytes,
75+
// 2 chars) and hits stop_at_index, calling insert_before_last_whitespace_str.
76+
// That function sets index = text.len() = 3 (byte count) and then accesses
77+
// chars[index - 1] = chars[2] on a Vec<char> of length 2 — panic.
78+
let fixture = "\"é,\"test\"";
79+
let actual = json_repair::<serde_json::Value>(fixture).unwrap();
80+
let expected = serde_json::json!(["é", "test"]);
81+
assert_eq!(actual, expected);
82+
}
83+
84+
#[test]
85+
fn test_multibyte_unicode_missing_closing_brace() {
86+
// Triggers index out of bounds in insert_before_last_whitespace_str (line 384).
87+
// A string with a multi-byte character followed by trailing whitespace and
88+
// no closing quote hits the "end of text, missing end quote" repair path.
89+
// str_content = `"🎉 ` (6 bytes, 3 chars). insert_before_last_whitespace_str
90+
// sets index = text.len() = 6 and accesses chars[5] on a Vec<char> of
91+
// length 3 — panic.
92+
let fixture = "\"🎉 ";
93+
let actual = json_repair::<serde_json::Value>(fixture).unwrap();
94+
let expected = serde_json::json!("🎉");
95+
assert_eq!(actual, expected);
96+
}

0 commit comments

Comments
 (0)