From 5318e38ba90807e9bd57579e4707ed9634af90b6 Mon Sep 17 00:00:00 2001 From: Armin Date: Thu, 28 Jun 2018 10:27:50 +0200 Subject: [PATCH 1/3] Be more lenient with fieldnames in Grok Processor (#21745) --- .../src/main/java/org/elasticsearch/grok/Grok.java | 6 +++--- .../test/java/org/elasticsearch/grok/GrokTests.java | 13 +++++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java index 02388d838bc2a..6c68710c6d8bd 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/Grok.java @@ -52,7 +52,7 @@ public final class Grok { "%\\{" + "(?" + "(?[A-z0-9]+)" + - "(?::(?[A-z0-9_:.-]+))?" + + "(?::(?[[:alnum:]@\\[\\]_:.-]+))?" + ")" + "(?:=(?" + "(?:" + @@ -81,11 +81,11 @@ public final class Grok { public Grok(Map patternBank, String grokPattern) { this(patternBank, grokPattern, true, ThreadWatchdog.noop()); } - + public Grok(Map patternBank, String grokPattern, ThreadWatchdog threadWatchdog) { this(patternBank, grokPattern, true, threadWatchdog); } - + Grok(Map patternBank, String grokPattern, boolean namedCaptures) { this(patternBank, grokPattern, namedCaptures, ThreadWatchdog.noop()); } diff --git a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java index 8d79aa290ebff..85289a404395a 100644 --- a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java +++ b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java @@ -412,10 +412,10 @@ public void testMultipleNamedCapturesWithSameName() { expected.put("num", "1"); assertThat(grok.captures("12"), equalTo(expected)); } - + public void testExponentialExpressions() { AtomicBoolean run = new AtomicBoolean(true); // to avoid a lingering thread when test has completed - + String grokPattern = "Bonsuche mit folgender Anfrage: Belegart->\\[%{WORD:param2},(?(\\s*%{NOTSPACE})*)\\] " + "Zustand->ABGESCHLOSSEN Kassennummer->%{WORD:param9} Bonnummer->%{WORD:param10} Datum->%{DATESTAMP_OTHER:param11}"; String logLine = "Bonsuche mit folgender Anfrage: Belegart->[EINGESCHRAENKTER_VERKAUF, VERKAUF, NACHERFASSUNG] " + @@ -439,4 +439,13 @@ public void testExponentialExpressions() { run.set(false); assertThat(e.getMessage(), equalTo("grok pattern matching was interrupted after [200] ms")); } + + public void testUnicodeFieldnames() { + for(String fieldName : Arrays.asList("@metadata", "@metädata", "@metädat[a]")) { + String line = "foo"; + Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}"); + Map matches = grok.captures(line); + assertEquals("foo", matches.get(fieldName)); + } + } } From 125933b5756cf8a58c2baacf8818dbd94f86d359 Mon Sep 17 00:00:00 2001 From: Armin Date: Thu, 28 Jun 2018 16:47:23 +0200 Subject: [PATCH 2/3] Be more lenient with fieldnames in Grok Processor (#21745) --- libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java index 85289a404395a..4b431caf5b1d7 100644 --- a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java +++ b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java @@ -441,7 +441,9 @@ public void testExponentialExpressions() { } public void testUnicodeFieldnames() { - for(String fieldName : Arrays.asList("@metadata", "@metädata", "@metädat[a]")) { + for (String fieldName : Arrays.asList("@metadata", "@metädata", "@metädat[a]", + randomAlphaOfLengthBetween(1, 5), String.valueOf(randomIntBetween(0, 100)), + randomAlphaOfLengthBetween(1, 5) + randomIntBetween(0, 100))) { String line = "foo"; Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}"); Map matches = grok.captures(line); From 0ef98d60e584e8a9665e0dc8a4ca373b21dc83d9 Mon Sep 17 00:00:00 2001 From: Armin Date: Thu, 28 Jun 2018 19:18:03 +0200 Subject: [PATCH 3/3] Be more lenient with fieldnames in Grok Processor (#21745) --- .../org/elasticsearch/grok/GrokTests.java | 53 +++++++++++++++---- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java index 4b431caf5b1d7..d30cf3d6fa21b 100644 --- a/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java +++ b/libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java @@ -440,14 +440,49 @@ public void testExponentialExpressions() { assertThat(e.getMessage(), equalTo("grok pattern matching was interrupted after [200] ms")); } - public void testUnicodeFieldnames() { - for (String fieldName : Arrays.asList("@metadata", "@metädata", "@metädat[a]", - randomAlphaOfLengthBetween(1, 5), String.valueOf(randomIntBetween(0, 100)), - randomAlphaOfLengthBetween(1, 5) + randomIntBetween(0, 100))) { - String line = "foo"; - Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}"); - Map matches = grok.captures(line); - assertEquals("foo", matches.get(fieldName)); - } + public void testAtInFieldName() { + assertGrokedField("@metadata"); + } + + public void assertNonAsciiLetterInFieldName() { + assertGrokedField("metädata"); + } + + public void assertSquareBracketInFieldName() { + assertGrokedField("metadat[a]"); + assertGrokedField("metad[a]ta"); + assertGrokedField("[m]etadata"); + } + + public void testUnderscoreInFieldName() { + assertGrokedField("meta_data"); + } + + public void testDotInFieldName() { + assertGrokedField("meta.data"); + } + + public void testMinusInFieldName() { + assertGrokedField("meta-data"); + } + + public void testAlphanumericFieldName() { + assertGrokedField(randomAlphaOfLengthBetween(1, 5)); + assertGrokedField(randomAlphaOfLengthBetween(1, 5) + randomIntBetween(0, 100)); + assertGrokedField(randomIntBetween(0, 100) + randomAlphaOfLengthBetween(1, 5)); + assertGrokedField(String.valueOf(randomIntBetween(0, 100))); + } + + public void testUnsupportedBracketsInFieldName() { + Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}"); + Map matches = grok.captures("line"); + assertNull(matches); + } + + private void assertGrokedField(String fieldName) { + String line = "foo"; + Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}"); + Map matches = grok.captures(line); + assertEquals(line, matches.get(fieldName)); } }