Skip to content

Commit d2177c3

Browse files
Danny Sufacebook-github-bot
authored andcommitted
btoa implementation (#1255)
Summary: Pull Request resolved: #1255 Implement [btoa](https://html.spec.whatwg.org/multipage/webappapis.html#atob) utility function for encoding a string to base64. This implementation doesn't follow the HTML spec 100% in that for error cases, the code doesn't throw DOMException. Existing alternatives people use with Hermes simply throw Error, which is what this code throws as well. Reviewed By: avp Differential Revision: D51876325 fbshipit-source-id: 085aa069a761d093fd9e504c0478ee18a36e8d34
1 parent 2db2d91 commit d2177c3

File tree

10 files changed

+293
-0
lines changed

10 files changed

+293
-0
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
#ifndef HERMES_VM_JSLIB_BASE64UTIL_H
9+
#define HERMES_VM_JSLIB_BASE64UTIL_H
10+
11+
#include "hermes/VM/Runtime.h"
12+
13+
namespace hermes {
14+
namespace vm {
15+
16+
/// Encode \p str to base64 characters and store the output in \p builder.
17+
/// \return true if successful, false otherwise
18+
template <typename T>
19+
bool base64Encode(llvh::ArrayRef<T> str, StringBuilder &builder);
20+
21+
} // namespace vm
22+
} // namespace hermes
23+
24+
#endif // HERMES_VM_JSLIB_BASE64UTIL_H

include/hermes/VM/NativeFunctions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ NATIVE_FUNCTION(bigintPrototypeValueOf)
6767
NATIVE_FUNCTION(booleanConstructor)
6868
NATIVE_FUNCTION(booleanPrototypeToString)
6969
NATIVE_FUNCTION(booleanPrototypeValueOf)
70+
NATIVE_FUNCTION(btoa)
7071
NATIVE_FUNCTION(callSitePrototypeGetFunctionName)
7172
NATIVE_FUNCTION(callSitePrototypeGetFileName)
7273
NATIVE_FUNCTION(callSitePrototypeGetLineNumber)

include/hermes/VM/PredefinedStrings.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ STR(isNaN, "isNaN")
5252
STR(isFinite, "isFinite")
5353
STR(escape, "escape")
5454
STR(unescape, "unescape")
55+
STR(btoa, "btoa")
5556
STR(decodeURI, "decodeURI")
5657
STR(decodeURIComponent, "decodeURIComponent")
5758
STR(encodeURI, "encodeURI")

lib/VM/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ set(source_files
7979
JSLib/ArrayBuffer.cpp
8080
JSLib/ArrayIterator.cpp
8181
JSLib/AsyncFunction.cpp
82+
JSLib/Base64.cpp
83+
JSLib/Base64Util.cpp
8284
JSLib/BigInt.cpp
8385
JSLib/CallSite.cpp
8486
JSLib/DataView.cpp

lib/VM/JSLib/Base64.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
#include "JSLibInternal.h"
9+
10+
#include "hermes/ADT/SafeInt.h"
11+
#include "hermes/VM/JSLib/Base64Util.h"
12+
#include "hermes/VM/StringBuilder.h"
13+
14+
namespace hermes {
15+
namespace vm {
16+
17+
/// Create a Base64-encoded ASCII string from an input string expected to have
18+
/// each character in the range of U+0000 to U+00FF. Error is thrown if any
19+
/// character is outside of the expected range.
20+
CallResult<HermesValue> btoa(void *, Runtime &runtime, NativeArgs args) {
21+
GCScope gcScope{runtime};
22+
auto res = toString_RJS(runtime, args.getArgHandle(0));
23+
if (LLVM_UNLIKELY(res == ExecutionStatus::EXCEPTION)) {
24+
return ExecutionStatus::EXCEPTION;
25+
}
26+
27+
auto string = runtime.makeHandle(std::move(*res));
28+
29+
// Figure out the expected encoded length
30+
uint64_t expectedLength = ((string->getStringLength() + 2) / 3) * 4;
31+
bool overflow = expectedLength > std::numeric_limits<uint32_t>::max();
32+
if (overflow) {
33+
return runtime.raiseError("String length to convert to base64 is too long");
34+
}
35+
SafeUInt32 outputLength{static_cast<uint32_t>(expectedLength)};
36+
CallResult<StringBuilder> builder =
37+
StringBuilder::createStringBuilder(runtime, outputLength, true);
38+
if (LLVM_UNLIKELY(builder == ExecutionStatus::EXCEPTION)) {
39+
return ExecutionStatus::EXCEPTION;
40+
}
41+
42+
bool success = string->isASCII()
43+
? base64Encode(string->getStringRef<char>(), *builder)
44+
: base64Encode(string->getStringRef<char16_t>(), *builder);
45+
if (!success) {
46+
return runtime.raiseError(
47+
"Found invalid character when converting to base64");
48+
}
49+
50+
return builder->getStringPrimitive().getHermesValue();
51+
}
52+
53+
} // namespace vm
54+
} // namespace hermes

lib/VM/JSLib/Base64Util.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
#include "hermes/VM/JSLib/Base64Util.h"
9+
10+
#include "hermes/VM/StringBuilder.h"
11+
12+
namespace hermes {
13+
namespace vm {
14+
15+
namespace {
16+
constexpr const std::array<char, 64> Base64Chars = {
17+
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
18+
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
19+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
20+
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
21+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
22+
} // namespace
23+
24+
template <typename T>
25+
bool base64Encode(llvh::ArrayRef<T> str, StringBuilder &builder) {
26+
uint64_t strLength = str.size();
27+
28+
// An implementation of the algorithm at
29+
// https://www.rfc-editor.org/rfc/rfc4648#section-4
30+
// Adapted from folly's base64Encode implementation.
31+
uint32_t i = 0;
32+
while ((strLength - i) >= 3) {
33+
if (str[i] > 0xFF || str[i + 1] > 0xFF || str[i + 2] > 0xFF) {
34+
return false;
35+
}
36+
37+
uint8_t aaab = str[i];
38+
uint8_t bbcc = str[i + 1];
39+
uint8_t cddd = str[i + 2];
40+
41+
uint8_t aaa = aaab >> 2;
42+
uint8_t bbb = ((aaab << 4) | (bbcc >> 4)) & 0x3f;
43+
uint8_t ccc = ((bbcc << 2) | (cddd >> 6)) & 0x3f;
44+
uint8_t ddd = cddd & 0x3f;
45+
46+
builder.appendCharacter(Base64Chars[aaa]);
47+
builder.appendCharacter(Base64Chars[bbb]);
48+
builder.appendCharacter(Base64Chars[ccc]);
49+
builder.appendCharacter(Base64Chars[ddd]);
50+
51+
i += 3;
52+
}
53+
54+
if (i == strLength) {
55+
return true;
56+
}
57+
58+
if (str[i] > 0xFF) {
59+
return false;
60+
}
61+
uint8_t aaab = str[i];
62+
uint8_t aaa = aaab >> 2;
63+
builder.appendCharacter(Base64Chars[aaa]);
64+
65+
// Duplicating some tail handling to try to do less jumps.
66+
if (strLength - i == 1) {
67+
uint8_t b00 = aaab << 4 & 0x3f;
68+
builder.appendCharacter(Base64Chars[b00]);
69+
builder.appendCharacter('=');
70+
builder.appendCharacter('=');
71+
return true;
72+
}
73+
74+
// When there are 2 characters left.
75+
assert(strLength - i == 2);
76+
if (str[i + 1] > 0xFF) {
77+
return false;
78+
}
79+
uint8_t bbcc = str[i + 1];
80+
uint8_t bbb = ((aaab << 4) | (bbcc >> 4)) & 0x3f;
81+
uint8_t cc0 = (bbcc << 2) & 0x3f;
82+
builder.appendCharacter(Base64Chars[bbb]);
83+
builder.appendCharacter(Base64Chars[cc0]);
84+
builder.appendCharacter('=');
85+
return true;
86+
}
87+
88+
template bool base64Encode(llvh::ArrayRef<char> str, StringBuilder &builder);
89+
template bool base64Encode(
90+
llvh::ArrayRef<char16_t> str,
91+
StringBuilder &builder);
92+
93+
} // namespace vm
94+
} // namespace hermes

lib/VM/JSLib/GlobalObject.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,9 @@ void initGlobalObject(Runtime &runtime, const JSLibFlags &jsLibFlags) {
740740
// Define the 'unescape' function.
741741
defineGlobalFunc(Predefined::getSymbolID(Predefined::unescape), unescape, 1);
742742

743+
// Define the 'btoa' function.
744+
defineGlobalFunc(Predefined::getSymbolID(Predefined::btoa), btoa, 1);
745+
743746
// Define the 'decodeURI' function.
744747
defineGlobalFunc(
745748
Predefined::getSymbolID(Predefined::decodeURI), decodeURI, 1);

test/hermes/btoa.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/**
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
// RUN: LC_ALL=en_US.UTF-8 %hermes -O -target=HBC %s | %FileCheck --match-full-lines %s
9+
"use strict";
10+
11+
print('btoa');
12+
// CHECK-LABEL: btoa
13+
print(btoa('123'));
14+
// CHECK-NEXT: MTIz
15+
try {
16+
btoa('\u03A9');
17+
} catch (e) {
18+
print(e.message);
19+
// CHECK-NEXT: Found invalid character when converting to base64
20+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
#include "TestHelpers.h"
9+
10+
#include "hermes/ADT/SafeInt.h"
11+
#include "hermes/VM/JSLib/Base64Util.h"
12+
#include "hermes/VM/StringBuilder.h"
13+
14+
using namespace hermes::vm;
15+
16+
namespace {
17+
18+
using Base64UtilTest = RuntimeTestFixture;
19+
20+
#define EXPECT_ENCODED(original, expected) \
21+
{ \
22+
uint64_t expectedLength = ((original.size() + 2) / 3) * 4; \
23+
EXPECT_LE(expectedLength, std::numeric_limits<uint32_t>::max()); \
24+
hermes::SafeUInt32 outputLength{static_cast<uint32_t>(expectedLength)}; \
25+
CallResult<StringBuilder> builder = \
26+
StringBuilder::createStringBuilder(runtime, outputLength, true); \
27+
EXPECT_NE(builder, ExecutionStatus::EXCEPTION); \
28+
\
29+
bool success = base64Encode(original, *builder); \
30+
EXPECT_TRUE(success); \
31+
EXPECT_EQ( \
32+
builder->getStringPrimitive()->getStringRef<char>(), \
33+
createASCIIRef(expected)); \
34+
}
35+
36+
#define EXPECT_ENCODED_ASCII_AND_UTF16(original, expected) \
37+
{ \
38+
ASCIIRef asciiRef = createASCIIRef(original); \
39+
EXPECT_ENCODED(asciiRef, expected); \
40+
\
41+
std::vector<char16_t> converted(asciiRef.size() + 1); \
42+
uint32_t i = 0; \
43+
for (i = 0; i < asciiRef.size(); i++) { \
44+
converted[i] = asciiRef[i]; \
45+
} \
46+
converted[i] = '\0'; \
47+
EXPECT_ENCODED(createUTF16Ref(converted.data()), expected); \
48+
}
49+
50+
TEST_F(Base64UtilTest, EdgeCases) {
51+
EXPECT_ENCODED_ASCII_AND_UTF16("", "");
52+
}
53+
54+
TEST_F(Base64UtilTest, EncodePaddingRequired) {
55+
EXPECT_ENCODED_ASCII_AND_UTF16("a", "YQ==");
56+
EXPECT_ENCODED_ASCII_AND_UTF16("ab", "YWI=");
57+
EXPECT_ENCODED_ASCII_AND_UTF16("abcd", "YWJjZA==");
58+
EXPECT_ENCODED_ASCII_AND_UTF16("abcde", "YWJjZGU=");
59+
EXPECT_ENCODED_ASCII_AND_UTF16(
60+
"less is more than more", "bGVzcyBpcyBtb3JlIHRoYW4gbW9yZQ==");
61+
EXPECT_ENCODED_ASCII_AND_UTF16("<>?su", "PD4/c3U=");
62+
63+
EXPECT_ENCODED(UTF16Ref(std::array<char16_t, 1>{1}), "AQ==");
64+
EXPECT_ENCODED(ASCIIRef(std::array<char, 1>{1}), "AQ==");
65+
EXPECT_ENCODED(UTF16Ref(std::array<char16_t, 2>{1, 0}), "AQA=");
66+
EXPECT_ENCODED(ASCIIRef(std::array<char, 2>{1, 0}), "AQA=");
67+
}
68+
69+
TEST_F(Base64UtilTest, EncodePaddingNotNeeded) {
70+
EXPECT_ENCODED_ASCII_AND_UTF16("abc", "YWJj");
71+
EXPECT_ENCODED_ASCII_AND_UTF16("abcdef", "YWJjZGVm");
72+
73+
EXPECT_ENCODED(UTF16Ref(std::array<char16_t, 3>{0, 0, 0}), "AAAA");
74+
EXPECT_ENCODED(ASCIIRef(std::array<char, 3>{0, 0, 0}), "AAAA");
75+
EXPECT_ENCODED(UTF16Ref(std::array<char16_t, 3>{1, 0, 0}), "AQAA");
76+
EXPECT_ENCODED(ASCIIRef(std::array<char, 3>{1, 0, 0}), "AQAA");
77+
}
78+
79+
TEST_F(Base64UtilTest, EncodeInvalid) {
80+
// Just a long enough buffer. All calls in this function are expected to fail.
81+
hermes::SafeUInt32 outputLength{20};
82+
CallResult<StringBuilder> builder =
83+
StringBuilder::createStringBuilder(runtime, outputLength, true);
84+
EXPECT_NE(builder, ExecutionStatus::EXCEPTION);
85+
EXPECT_FALSE(base64Encode(createUTF16Ref(u"\U0001F600"), *builder));
86+
EXPECT_FALSE(base64Encode(createUTF16Ref(u"a\U0001F600"), *builder));
87+
EXPECT_FALSE(base64Encode(createUTF16Ref(u"ab\U0001F600"), *builder));
88+
EXPECT_FALSE(base64Encode(createUTF16Ref(u"abc\U0001F600"), *builder));
89+
EXPECT_FALSE(base64Encode(createUTF16Ref(u"\U0001F600xyz"), *builder));
90+
EXPECT_FALSE(base64Encode(createUTF16Ref(u"abc\U0001F600xyz"), *builder));
91+
}
92+
93+
} // end anonymous namespace

unittests/VMRuntime/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ set(RTSources
1414
AlignedStorageTest.cpp
1515
ArrayTest.cpp
1616
ArrayStorageTest.cpp
17+
Base64UtilTest.cpp
1718
BigIntPrimitiveTest.cpp
1819
BytecodeProviderTest.cpp
1920
CallResultTest.cpp

0 commit comments

Comments
 (0)