Skip to content

Commit a3e84cc

Browse files
committed
Merge pull request godotengine#116542 from Ivorforce/char-range-cpp
Move char ranges from `char_range.inc` to `char_range.cpp` (non-constexpr)
2 parents 9a19d32 + 9dfc2c9 commit a3e84cc

4 files changed

Lines changed: 50 additions & 43 deletions

File tree

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**************************************************************************/
2-
/* char_range.inc */
2+
/* char_range.cpp */
33
/**************************************************************************/
44
/* This file is part of: */
55
/* GODOT ENGINE */
@@ -30,19 +30,13 @@
3030

3131
// This file was generated using the `misc/scripts/char_range_fetch.py` script.
3232

33-
#pragma once
34-
35-
#include "core/typedefs.h"
33+
#include "core/string/char_utils.h"
3634

3735
// Unicode Derived Core Properties
3836
// Source: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt
3937

40-
struct CharRange {
41-
char32_t start;
42-
char32_t end;
43-
};
44-
45-
constexpr inline CharRange xid_start[] = {
38+
const int xid_start_size = 692;
39+
const CharRange xid_start[xid_start_size] = {
4640
{ 0x41, 0x5a },
4741
{ 0x5f, 0x5f },
4842
{ 0x61, 0x7a },
@@ -737,7 +731,8 @@ constexpr inline CharRange xid_start[] = {
737731
{ 0x31350, 0x33479 },
738732
};
739733

740-
constexpr inline CharRange xid_continue[] = {
734+
const int xid_continue_size = 806;
735+
const CharRange xid_continue[xid_continue_size] = {
741736
{ 0x30, 0x39 },
742737
{ 0x41, 0x5a },
743738
{ 0x5f, 0x5f },
@@ -1546,7 +1541,8 @@ constexpr inline CharRange xid_continue[] = {
15461541
{ 0xe0100, 0xe01ef },
15471542
};
15481543

1549-
constexpr inline CharRange uppercase_letter[] = {
1544+
const int uppercase_letter_size = 660;
1545+
const CharRange uppercase_letter[uppercase_letter_size] = {
15501546
{ 0x41, 0x5a },
15511547
{ 0xc0, 0xd6 },
15521548
{ 0xd8, 0xde },
@@ -2209,7 +2205,8 @@ constexpr inline CharRange uppercase_letter[] = {
22092205
{ 0x1f170, 0x1f189 },
22102206
};
22112207

2212-
constexpr inline CharRange lowercase_letter[] = {
2208+
const int lowercase_letter_size = 677;
2209+
const CharRange lowercase_letter[lowercase_letter_size] = {
22132210
{ 0x61, 0x7a },
22142211
{ 0xaa, 0xaa },
22152212
{ 0xb5, 0xb5 },
@@ -2889,7 +2886,8 @@ constexpr inline CharRange lowercase_letter[] = {
28892886
{ 0x1e922, 0x1e943 },
28902887
};
28912888

2892-
constexpr inline CharRange unicode_letter[] = {
2889+
const int unicode_letter_size = 761;
2890+
const CharRange unicode_letter[unicode_letter_size] = {
28932891
{ 0x41, 0x5a },
28942892
{ 0x61, 0x7a },
28952893
{ 0xaa, 0xaa },

core/string/char_utils.h

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,28 @@
3232

3333
#include "core/typedefs.h"
3434

35-
#include "char_range.inc"
36-
3735
static constexpr char hex_char_table_upper[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
3836
static constexpr char hex_char_table_lower[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
3937

40-
#define BSEARCH_CHAR_RANGE(m_array) \
38+
struct CharRange {
39+
char32_t start;
40+
char32_t end;
41+
};
42+
43+
extern const CharRange xid_start[];
44+
extern const int xid_start_size;
45+
extern const CharRange xid_continue[];
46+
extern const int xid_continue_size;
47+
extern const CharRange uppercase_letter[];
48+
extern const int uppercase_letter_size;
49+
extern const CharRange lowercase_letter[];
50+
extern const int lowercase_letter_size;
51+
extern const CharRange unicode_letter[];
52+
extern const int unicode_letter_size;
53+
54+
#define BSEARCH_CHAR_RANGE(m_array, m_size) \
4155
int low = 0; \
42-
int high = std_size(m_array) - 1; \
56+
int high = m_size - 1; \
4357
int middle = (low + high) / 2; \
4458
\
4559
while (low <= high) { \
@@ -56,24 +70,24 @@ static constexpr char hex_char_table_lower[16] = { '0', '1', '2', '3', '4', '5',
5670
\
5771
return false
5872

59-
constexpr bool is_unicode_identifier_start(char32_t p_char) {
60-
BSEARCH_CHAR_RANGE(xid_start);
73+
inline bool is_unicode_identifier_start(char32_t p_char) {
74+
BSEARCH_CHAR_RANGE(xid_start, xid_start_size);
6175
}
6276

63-
constexpr bool is_unicode_identifier_continue(char32_t p_char) {
64-
BSEARCH_CHAR_RANGE(xid_continue);
77+
inline bool is_unicode_identifier_continue(char32_t p_char) {
78+
BSEARCH_CHAR_RANGE(xid_continue, xid_continue_size);
6579
}
6680

67-
constexpr bool is_unicode_upper_case(char32_t p_char) {
68-
BSEARCH_CHAR_RANGE(uppercase_letter);
81+
inline bool is_unicode_upper_case(char32_t p_char) {
82+
BSEARCH_CHAR_RANGE(uppercase_letter, uppercase_letter_size);
6983
}
7084

71-
constexpr bool is_unicode_lower_case(char32_t p_char) {
72-
BSEARCH_CHAR_RANGE(lowercase_letter);
85+
inline bool is_unicode_lower_case(char32_t p_char) {
86+
BSEARCH_CHAR_RANGE(lowercase_letter, lowercase_letter_size);
7387
}
7488

75-
constexpr bool is_unicode_letter(char32_t p_char) {
76-
BSEARCH_CHAR_RANGE(unicode_letter);
89+
inline bool is_unicode_letter(char32_t p_char) {
90+
BSEARCH_CHAR_RANGE(unicode_letter, unicode_letter_size);
7791
}
7892

7993
#undef BSEARCH_CHAR_RANGE

misc/scripts/char_range_fetch.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python3
22

33
# Script used to dump char ranges for specific properties from
4-
# the Unicode Character Database to the `char_range.inc` file.
4+
# the Unicode Character Database to the `char_range.cpp` file.
55
# NOTE: This script is deliberately not integrated into the build system;
66
# you should run it manually whenever you want to update the data.
77
from __future__ import annotations
@@ -89,7 +89,8 @@ def parse_unicode_data() -> None:
8989

9090

9191
def make_array(array_name: str, range_list: list[tuple[int, int]]) -> str:
92-
result: str = f"\n\nconstexpr inline CharRange {array_name}[] = {{\n"
92+
result: str = f"\n\nconst int {array_name}_size = {len(range_list)};\n"
93+
result += f"const CharRange {array_name}[{array_name}_size] = {{\n"
9394

9495
for start, end in range_list:
9596
result += f"\t{{ 0x{start:x}, 0x{end:x} }},\n"
@@ -102,22 +103,16 @@ def make_array(array_name: str, range_list: list[tuple[int, int]]) -> str:
102103
def generate_char_range_inc() -> None:
103104
parse_unicode_data()
104105

105-
source: str = generate_copyright_header("char_range.inc")
106+
source: str = generate_copyright_header("char_range.cpp")
106107

107108
source += f"""
108109
// This file was generated using the `misc/scripts/char_range_fetch.py` script.
109110
110-
#pragma once
111-
112-
#include "core/typedefs.h"
111+
#include "core/string/char_utils.h"
113112
114113
// Unicode Derived Core Properties
115-
// Source: {URL}
116-
117-
struct CharRange {{
118-
\tchar32_t start;
119-
\tchar32_t end;
120-
}};"""
114+
// Source: {URL}\
115+
"""
121116

122117
source += make_array("xid_start", xid_start)
123118
source += make_array("xid_continue", xid_continue)
@@ -127,11 +122,11 @@ def generate_char_range_inc() -> None:
127122

128123
source += "\n"
129124

130-
char_range_path: str = os.path.join(os.path.dirname(__file__), "../../core/string/char_range.inc")
125+
char_range_path: str = os.path.join(os.path.dirname(__file__), "../../core/string/char_range.cpp")
131126
with open(char_range_path, "w", newline="\n") as f:
132127
f.write(source)
133128

134-
print("`char_range.inc` generated successfully.")
129+
print("`char_range.cpp` generated successfully.")
135130

136131

137132
if __name__ == "__main__":

misc/scripts/unicode_ranges_fetch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python3
22

33
# Script used to dump char ranges from
4-
# the Unicode Character Database to the `char_range.inc` file.
4+
# the Unicode Character Database to the `unicode_ranges.inc` file.
55
# NOTE: This script is deliberately not integrated into the build system;
66
# you should run it manually whenever you want to update the data.
77
from __future__ import annotations

0 commit comments

Comments
 (0)