Fix useSTD3ASCIIRules regression, with tests

yandrushchak · web-flow · commit 09cd52e4a87b · 2025-04-17T09:46:23.000+09:00
Closes #69.
diff --git a/index.js b/index.js
@@ -132,7 +132,7 @@ function validateLabel(label, {
     // be a lowercase letter (a-z), a digit (0-9), or a hyphen-minus (U+002D). (Note: This excludes uppercase ASCII
     // A-Z which are mapped in UTS #46 and disallowed in IDNA2008.)"
     if (useSTD3ASCIIRules && codePoint <= 0x7F) {
-      if (!/^[a-z][0-9]-$/u.test(ch)) {
+      if (!/^(?:[a-z]|[0-9]|-)$/u.test(ch)) {
         return false;
       }
     }
diff --git a/test/std3ASCIIRules.js b/test/std3ASCIIRules.js
@@ -0,0 +1,99 @@
+"use strict";
+const { describe, test } = require("node:test");
+const assert = require("assert");
+const tr46 = require("../index.js");
+
+function testToASCIIWithSTD3ASCIIRules(testCase) {
+  const result = tr46.toASCII(testCase.input, {
+    checkBidi: false,
+    checkHyphens: false,
+    checkJoiners: false,
+    useSTD3ASCIIRules: true,
+    verifyDNSLength: false
+  });
+
+  assert.strictEqual(result, testCase.output);
+}
+
+describe("ToASCII with useSTD3ASCIIRules = true", () => {
+  const testCases = [];
+
+  // Add test cases for valid characters
+  const alphabet = "abcdefghijklmnopqrstuvwxyz";
+  const digits = "0123456789";
+  const hyphen = "-";
+  const allowedChars = alphabet + digits + hyphen;
+  for (const char of allowedChars) {
+    testCases.push({
+      input: char,
+      output: char,
+      comment: "STD3 range"
+    });
+  }
+
+  // Add not supported ascii characters
+  for (let i = 0; i < 128; i++) {
+    const char = String.fromCharCode(i);
+    if (allowedChars.includes(char)) {
+      continue;
+    }
+
+    // Upper-case letters are converted to lower case, so we we exclude them from a negative test case
+    if (allowedChars.includes(char.toLowerCase())) {
+      continue;
+    }
+
+    // Dot is exclude from negative test cases, since it is allowed in domain names
+    if (char === ".") {
+      continue;
+    }
+
+    testCases.push({
+      input: char,
+      output: null,
+      comment: "Outside of STD3 range"
+    });
+  }
+
+  // Add a unicode character, since it should be converted to punycode
+  testCases.push({
+    input: "é",
+    output: "xn--9ca",
+    comment: "Unicode"
+  });
+
+  // Additional test cases, with mixed valid and invalid characters
+  testCases.push({
+    input: "inv@alid",
+    output: null,
+    comment: "Invalid label"
+  });
+  testCases.push({
+    input: "valid",
+    output: "valid",
+    comment: "Valid label"
+  });
+  testCases.push({
+    input: "unicodé",
+    output: "xn--unicod-gva",
+    comment: "Valid uncode label"
+  });
+  testCases.push({
+    input: "uni!codé",
+    output: null,
+    comment: "Invalid uncode label"
+  });
+
+  for (const testCase of testCases) {
+    let description = testCase.input;
+
+    if (testCase.comment) {
+      description += ` (${testCase.comment})`;
+    }
+
+    test(description, () => {
+      testToASCIIWithSTD3ASCIIRules(testCase);
+    });
+  }
+});
+

Original file line number	Diff line number	Diff line change
`@@ -132,7 +132,7 @@ function validateLabel(label, {`
`132`	`132`	`// be a lowercase letter (a-z), a digit (0-9), or a hyphen-minus (U+002D). (Note: This excludes uppercase ASCII`
`133`	`133`	`// A-Z which are mapped in UTS #46 and disallowed in IDNA2008.)"`
`134`	`134`	`if (useSTD3ASCIIRules && codePoint <= 0x7F) {`
`135`		`- if (!/^[a-z][0-9]-$/u.test(ch)) {`
	`135`	`+ if (!/^(?:[a-z]\|[0-9]\|-)$/u.test(ch)) {`
`136`	`136`	`return false;`
`137`	`137`	`}`
`138`	`138`	`}`