Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
GOLANG_CI_LINT_VER:=v1.50.0
COVER_PACKAGES=${shell go list ./... | grep -Ev 'test|generator' | tr '\n' ','}
COVER_PACKAGES=${shell go list ./... | grep -Ev 'test|cmd' | tr '\n' ','}

all: lint test
.PHONY: all
Expand Down Expand Up @@ -34,4 +34,4 @@ bin/golangci-lint:
| sh -s $(GOLANG_CI_LINT_VER)

build:
go build -o ./bin/generate/rex ./cmd/generator/main.go
go build -o ./bin/rex ./cmd/generator/main.go
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ re := rex.New(
- `rex.Common` helpers can be deprecated, but not removed.
- `rex.Group` some methods can be deprecated.
- `rex.Helper` can be changed with breaking changes due to specification complexities.
- The test coverage should be `~100%` without covering [test helpers](internal/test/test.go).
- The test coverage should be `~100%` without covering [test helpers](internal/test/test.go) and [cmd](cmd).
- Any breaking change will be prevented as much as possible.

_All of the above may not be respected when upgrading the major version._
Expand Down
10 changes: 4 additions & 6 deletions cmd/generator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,19 @@ import (
)

func main() {
args := os.Args

if len(args) != 2 {
if len(os.Args) != 2 {
log.Fatalln("wrong amount of arguments")
}

if len(args[1]) == 0 {
if len(os.Args[1]) == 0 {
log.Fatalln("given regex is empty")
}

regex := args[1]
regex := os.Args[1]

result, err := generator.GenerateCode(regex)
if err != nil {
log.Fatal(err)
log.Fatalln(err)
}

os.Stdout.WriteString(result + "\n")
Expand Down
115 changes: 65 additions & 50 deletions internal/generator/generator.go
Original file line number Diff line number Diff line change
@@ -1,74 +1,89 @@
package generator

import (
"errors"
"fmt"
"io"
"regexp/syntax"
"strings"
)

var errUnmachedBraces = errors.New("braces are unmatched")

// GenerateCode returns rex code for a given regex.
func GenerateCode(regex string) (generatedCode string, err error) {
var (
beforeCurrentBrace string
previousRune rune
result strings.Builder
afterLastBrace = regex
indentations int
currentOpenBraceI int
bracesCounter int
)

result.WriteString("rex.New(\n")

for i, runeValue := range regex {
if runeValue == '(' && previousRune != '\\' {
bracesCounter++

afterLastBrace = ""
beforeCurrentBrace = regex[currentOpenBraceI:i]

addRawExpressionIfNeeded(beforeCurrentBrace, &result, indentations)

currentOpenBraceI = i + 1
indentations++
result.WriteString(strings.Repeat("\t", indentations) + "rex.Group.Define(\n")
}
regExpr, err := syntax.Parse(regex, syntax.Perl)
if err != nil {
return "", fmt.Errorf("failed to parse regexp: %w", err)
}

if runeValue == ')' && previousRune != '\\' {
bracesCounter--
var strBuilder strings.Builder

beforeCurrentBrace = regex[currentOpenBraceI:i]
currentOpenBraceI = i + 1
strBuilder.Grow(len(regex))
_, _ = strBuilder.WriteString("rex.New(\n")
writeRegexp(&strBuilder, regExpr, 1)
_, _ = strBuilder.WriteString(")")

return strBuilder.String(), nil
}

addRawExpressionIfNeeded(beforeCurrentBrace, &result, indentations)
func writeRegexp(w io.StringWriter, regExpr *syntax.Regexp, indent int) {
//nolint: exhaustive // All cases captured in default.
switch regExpr.Op {
case syntax.OpConcat:
writeConcat(w, regExpr, indent)
case syntax.OpCapture:
writeCapture(w, regExpr, indent)
default:
writeRaw(w, []*syntax.Regexp{regExpr}, indent)
}
}

if i < len(regex)-1 && bracesCounter == 0 {
afterLastBrace = regex[(i + 1):]
}
func writeCapture(w io.StringWriter, regExpr *syntax.Regexp, indent int) {
strIndent := strings.Repeat("\t", indent)

result.WriteString(strings.Repeat("\t", indentations) + "),\n")
indentations--
}
_, _ = w.WriteString(strIndent + "rex.Group.Define(\n")

previousRune = runeValue
if regExpr.Sub[0].Op != syntax.OpEmptyMatch {
writeRegexp(w, regExpr.Sub[0], indent+1)
}

if bracesCounter != 0 {
err = errUnmachedBraces
if regExpr.Name != "" {
_, _ = w.WriteString(fmt.Sprintf("%s).WithName(%q),\n", strIndent, regExpr.Name))
} else {
_, _ = w.WriteString(strIndent + "),\n")
}
}

if len(afterLastBrace) != 0 {
result.WriteString("\trex.Common.Raw(`" + afterLastBrace + "`),\n")
}
func writeConcat(w io.StringWriter, regExpr *syntax.Regexp, indent int) {
rawExprs := make([]*syntax.Regexp, 0, len(regExpr.Sub))

result.WriteRune(')')
for _, sub := range regExpr.Sub {
//nolint: exhaustive // All cases captured in default.
switch sub.Op {
case syntax.OpCapture:
writeRaw(w, rawExprs, indent)

return result.String(), err
rawExprs = rawExprs[:0]

writeRegexp(w, sub, indent)
default:
rawExprs = append(rawExprs, sub)
}
}

writeRaw(w, rawExprs, indent)
}

func addRawExpressionIfNeeded(beforeCurrentBrace string, result *(strings.Builder), indentations int) {
if len(beforeCurrentBrace) != 0 {
(*result).WriteString(strings.Repeat("\t", indentations+1) + "rex.Common.Raw(`" + beforeCurrentBrace + "`),\n")
func writeRaw(w io.StringWriter, regExprs []*syntax.Regexp, indent int) {
if len(regExprs) == 0 {
return
}

strIndent := strings.Repeat("\t", indent)

_, _ = w.WriteString(strIndent + "rex.Common.Raw(`")

for _, re := range regExprs {
_, _ = w.WriteString(re.String())
}

_, _ = w.WriteString("`),\n")
}
159 changes: 90 additions & 69 deletions internal/generator/generator_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package generator_test

import (
"log"
"testing"

"github.com/hedhyw/rex/internal/generator"
Expand All @@ -13,86 +12,108 @@ type generatorTestCase struct {
result string
}

func TestGenerateCode(t *testing.T) {
func TestGenerateCodeOK(t *testing.T) {
t.Parallel()

var actual, expected, givenRegex string
testCases := getSuccessGroupTestCases()

var err error

tests := getGroupTestCases()
for _, testCaseNotInParallel := range tests {
for _, testCaseNotInParallel := range testCases {
testCase := testCaseNotInParallel
expected = testCase.result
givenRegex = testCase.regex

t.Run(testCase.name, func(t *testing.T) {
t.Parallel()
actual, err = generator.GenerateCode(givenRegex)

actual, err := generator.GenerateCode(testCase.regex)
if err != nil {
log.Fatal(err)
t.Fatal(err)
}
if actual != expected {
t.Errorf("Expected:\n %s, \nGot:\n %s", expected, actual)

if actual != testCase.result {
t.Errorf("Expected:\n%s\nGot:\n%s", testCase.result, actual)
}
})
}
}

func getGroupTestCases() []generatorTestCase {
return []generatorTestCase{
{
name: "one letter regex", regex: "a",
result: "rex.New(\n" +
" rex.Common.Raw(`a`),\n" +
")",
},
{
name: "simple regex", regex: "a((\\d+)([a-z]+\\()))",
result: "rex.New(\n" +
" rex.Common.Raw(`a`),\n" +
" rex.Group.Define(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`\\d+`),\n" +
" ),\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[a-z]+\\(`),\n" +
" ),\n" +
" ),\n" +
")",
},
{
name: "long regex",
regex: "((\\d+)([a-z]+))a((\\d+)([a-z]+))a" +
"((\\d+)([a-z]+))a",
result: "rex.New(\n" +
" rex.Group.Define(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`\\d+`),\n" +
" ),\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[a-z]+`),\n" +
" ),\n" +
" ),\n" +
" rex.Common.Raw(`a`),\n" +
" rex.Group.Define(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`\\d+`),\n" +
" ),\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[a-z]+`),\n" +
" ),\n" +
" ),\n" +
" rex.Common.Raw(`a`),\n" +
" rex.Group.Define(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`\\d+`),\n" +
" ),\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[a-z]+`),\n" +
" ),\n" +
" ),\n" +
" rex.Common.Raw(`a`),\n" +
")",
},
func TestGenerateCodeInvalidRegexpr(t *testing.T) {
t.Parallel()

_, err := generator.GenerateCode("(")
if err == nil {
t.Fatal(err)
}
}

// nolint: funlen // test cases.
func getSuccessGroupTestCases() []generatorTestCase {
return []generatorTestCase{{
name: "one_letter_regex",
regex: `a`,
result: "rex.New(\n" +
" rex.Common.Raw(`a`),\n" +
")",
}, {
name: "uncaptured",
regex: `(?P<name>1234)`,
result: "rex.New(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`1234`),\n" +
" ).WithName(\"name\"),\n" +
")",
}, {
name: "concat",
regex: `(1|12|123)`,
result: "rex.New(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`1(?:)|2(?:(?:)|3)`),\n" +
" ),\n" +
")",
}, {
name: "simple regex",
regex: `a((\d+)([a-z]+\())`,
result: "rex.New(\n" +
" rex.Common.Raw(`a`),\n" +
" rex.Group.Define(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[0-9]+`),\n" +
" ),\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[a-z]+\\(`),\n" +
" ),\n" +
" ),\n" +
")",
}, {
name: "long_regex",
regex: "(([0-9]+)([a-z]+))a(([0-9]+)([a-z]+))a" +
"(([0-9]+)([a-z]+))a",
result: "rex.New(\n" +
" rex.Group.Define(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[0-9]+`),\n" +
" ),\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[a-z]+`),\n" +
" ),\n" +
" ),\n" +
" rex.Common.Raw(`a`),\n" +
" rex.Group.Define(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[0-9]+`),\n" +
" ),\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[a-z]+`),\n" +
" ),\n" +
" ),\n" +
" rex.Common.Raw(`a`),\n" +
" rex.Group.Define(\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[0-9]+`),\n" +
" ),\n" +
" rex.Group.Define(\n" +
" rex.Common.Raw(`[a-z]+`),\n" +
" ),\n" +
" ),\n" +
" rex.Common.Raw(`a`),\n" +
")",
}}
}