Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ find_package(pugixml CONFIG REQUIRED)
find_package(ZLIB REQUIRED)
find_package(httplib CONFIG REQUIRED)
find_package(fmt CONFIG REQUIRED)
find_package(Boost REQUIRED COMPONENTS locale)

if(APPLE)
# Required for Physfs
Expand Down Expand Up @@ -522,6 +523,7 @@ if(MSVC)
winmm.lib
pugixml::pugixml
fmt::fmt-header-only
Boost::locale
)
elseif(ANDROID)
target_include_directories(${PROJECT_NAME}
Expand Down Expand Up @@ -570,6 +572,7 @@ elseif(ANDROID)
log
pugixml::pugixml
fmt::fmt-header-only
Boost::locale
)

elseif(WASM)
Expand Down Expand Up @@ -624,6 +627,7 @@ elseif(WASM)
Ogg::ogg
Vorbis::vorbisfile
Vorbis::vorbis
Boost::locale
)


Expand Down Expand Up @@ -707,6 +711,7 @@ else() # Linux
Ogg::ogg
Vorbis::vorbisfile
Vorbis::vorbis
Boost::locale
)

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
Expand Down
103 changes: 49 additions & 54 deletions src/framework/stdext/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include <ranges>
#include <vector>
#include <charconv>
#include <boost/locale/encoding.hpp>
#include <boost/locale/encoding_utf.hpp>

#include "exception.h"
#include "types.h"
Expand Down Expand Up @@ -79,81 +81,74 @@ namespace stdext
}

[[nodiscard]] bool is_valid_utf8(std::string_view src) {
for (size_t i = 0; i < src.size();) {
unsigned char c = src[i];
size_t bytes = (c < 0x80) ? 1 : (c < 0xE0) ? 2 : (c < 0xF0) ? 3 : (c < 0xF5) ? 4 : 0;
if (!bytes || i + bytes > src.size() || (bytes > 1 && (src[i + 1] & 0xC0) != 0x80))
return false;
i += bytes;
try {
boost::locale::conv::utf_to_utf<char32_t>(src.data(), src.data() + src.size(), boost::locale::conv::stop);
return true;
} catch (const boost::locale::conv::conversion_error&) {
return false;
}
return true;
}

[[nodiscard]] std::string utf8_to_latin1(std::string_view src) {
std::string out;
out.reserve(src.size()); // Reserve memory to avoid multiple allocations
for (size_t i = 0; i < src.size(); ++i) {
uint8_t c = static_cast<uint8_t>(src[i]);
if ((c >= 32 && c < 128) || c == 0x0d || c == 0x0a || c == 0x09) {
out += c;
} else if (c == 0xc2 || c == 0xc3) {
if (i + 1 < src.size()) {
uint8_t c2 = static_cast<uint8_t>(src[++i]);
out += (c == 0xc2) ? c2 : (c2 + 64);
}
} else {
// Skip multi-byte characters
while (i + 1 < src.size() && (src[i + 1] & 0xC0) == 0x80) {
++i;
}
}
try {
return boost::locale::conv::between(src.data(), src.data() + src.size(), "ISO-8859-1", "UTF-8", boost::locale::conv::skip);
} catch (const boost::locale::conv::conversion_error&) {
return {};
} catch (const boost::locale::conv::invalid_charset_error&) {
return {};
}
return out;
}

[[nodiscard]] std::string latin1_to_utf8(std::string_view src) {
std::string out;
out.reserve(src.size() * 2); // Reserve space to reduce allocations
for (uint8_t c : src) {
if ((c >= 32 && c < 128) || c == 0x0d || c == 0x0a || c == 0x09) {
out += c; // Directly append ASCII characters
} else {
out.push_back(0xc2 + (c > 0xbf));
out.push_back(0x80 + (c & 0x3f));
}
try {
return boost::locale::conv::between(src.data(), src.data() + src.size(), "UTF-8", "ISO-8859-1", boost::locale::conv::stop);
} catch (const boost::locale::conv::conversion_error&) {
return {};
} catch (const boost::locale::conv::invalid_charset_error&) {
return {};
}
return out;
}

#ifdef WIN32
#include <winsock2.h>
#include <windows.h>

std::wstring utf8_to_utf16(const std::string_view src)
{
constexpr size_t BUFFER_SIZE = 65536;

std::wstring res;
wchar_t out[BUFFER_SIZE];
if (MultiByteToWideChar(CP_UTF8, 0, src.data(), -1, out, BUFFER_SIZE))
res = out;
return res;
try {
return boost::locale::conv::utf_to_utf<wchar_t>(src.data(), src.data() + src.size(), boost::locale::conv::stop);
} catch (const boost::locale::conv::conversion_error&) {
return {};
}
}

std::string utf16_to_utf8(const std::wstring_view src)
{
constexpr size_t BUFFER_SIZE = 65536;

std::string res;
char out[BUFFER_SIZE];
if (WideCharToMultiByte(CP_UTF8, 0, src.data(), -1, out, BUFFER_SIZE, nullptr, nullptr))
res = out;
return res;
try {
return boost::locale::conv::utf_to_utf<char>(src.data(), src.data() + src.size(), boost::locale::conv::stop);
} catch (const boost::locale::conv::conversion_error&) {
return {};
}
}

std::wstring latin1_to_utf16(const std::string_view src) { return utf8_to_utf16(latin1_to_utf8(src)); }
std::wstring latin1_to_utf16(const std::string_view src)
{
try {
return boost::locale::conv::to_utf<wchar_t>(src.data(), src.data() + src.size(), "ISO-8859-1", boost::locale::conv::stop);
} catch (const boost::locale::conv::conversion_error&) {
return {};
} catch (const boost::locale::conv::invalid_charset_error&) {
return {};
}
}

std::string utf16_to_latin1(const std::wstring_view src) { return utf8_to_latin1(utf16_to_utf8(src)); }
std::string utf16_to_latin1(const std::wstring_view src)
{
try {
return boost::locale::conv::from_utf(src.data(), src.data() + src.size(), "ISO-8859-1", boost::locale::conv::skip);
} catch (const boost::locale::conv::conversion_error&) {
return {};
} catch (const boost::locale::conv::invalid_charset_error&) {
return {};
}
}
#endif

void tolower(std::string& str) { std::ranges::transform(str, str.begin(), ::tolower); }
Expand Down
1 change: 1 addition & 0 deletions vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"stduuid",
"zlib",
"bshoshany-thread-pool",
"boost-locale",
"fmt",
{
"name": "luajit",
Expand Down
Loading