@@ -196,9 +196,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut
196196 } else return 0 ;
197197}
198198
199- /* internal "unsafe" version that does not check whether uc is in range */
200- static utf8proc_ssize_t unsafe_encode_char (utf8proc_int32_t uc , utf8proc_uint8_t * dst ) {
199+ /* internal version used for inserting 0xff bytes between graphemes */
200+ static utf8proc_ssize_t charbound_encode_char (utf8proc_int32_t uc , utf8proc_uint8_t * dst ) {
201201 if (uc < 0x00 ) {
202+ if (uc == -1 ) { /* internal value used for grapheme breaks */
203+ dst [0 ] = (utf8proc_uint8_t )0xFF ;
204+ return 1 ;
205+ }
202206 return 0 ;
203207 } else if (uc < 0x80 ) {
204208 dst [0 ] = (utf8proc_uint8_t )uc ;
@@ -207,12 +211,6 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t
207211 dst [0 ] = (utf8proc_uint8_t )(0xC0 + (uc >> 6 ));
208212 dst [1 ] = (utf8proc_uint8_t )(0x80 + (uc & 0x3F ));
209213 return 2 ;
210- } else if (uc == 0xFFFF ) {
211- dst [0 ] = (utf8proc_uint8_t )0xFF ;
212- return 1 ;
213- } else if (uc == 0xFFFE ) {
214- dst [0 ] = (utf8proc_uint8_t )0xFE ;
215- return 1 ;
216214 } else if (uc < 0x10000 ) {
217215 dst [0 ] = (utf8proc_uint8_t )(0xE0 + (uc >> 12 ));
218216 dst [1 ] = (utf8proc_uint8_t )(0x80 + ((uc >> 6 ) & 0x3F ));
@@ -480,7 +478,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
480478 int tbc = property -> boundclass ;
481479 boundary = grapheme_break_extended (* last_boundclass , tbc , last_boundclass );
482480 if (boundary ) {
483- if (bufsize >= 1 ) dst [0 ] = 0xFFFF ;
481+ if (bufsize >= 1 ) dst [0 ] = -1 ; /* sentinel value for grapheme break */
484482 if (bufsize >= 2 ) dst [1 ] = uc ;
485483 return 2 ;
486484 }
@@ -686,7 +684,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
686684 if (options & UTF8PROC_CHARBOUND ) {
687685 for (rpos = 0 ; rpos < length ; rpos ++ ) {
688686 uc = buffer [rpos ];
689- wpos += unsafe_encode_char (uc , ((utf8proc_uint8_t * )buffer ) + wpos );
687+ wpos += charbound_encode_char (uc , ((utf8proc_uint8_t * )buffer ) + wpos );
690688 }
691689 } else {
692690 for (rpos = 0 ; rpos < length ; rpos ++ ) {
0 commit comments