diff --git a/CHANGELOG.md b/CHANGELOG.md index bde9b3f..8f09bb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ # Event Query Language - Changelog The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +# Version 0.9.12 +_Released 2021-XX-XX_ + +### Added +* Support for `?` [optional field syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/eql-syntax.html#eql-syntax-optional-fields) + + # Version 0.9.11 _Released 2021-09-22_ diff --git a/eql/__init__.py b/eql/__init__.py index c3dcce0..466dccf 100644 --- a/eql/__init__.py +++ b/eql/__init__.py @@ -66,7 +66,7 @@ Walker, ) -__version__ = '0.9.11' +__version__ = '0.9.12' __all__ = ( "__version__", "AnalyticOutput", diff --git a/eql/etc/eql.g b/eql/etc/eql.g index f58fed3..92b414d 100644 --- a/eql/etc/eql.g +++ b/eql/etc/eql.g @@ -77,6 +77,7 @@ function_call: (INSENSITIVE_NAME | NAME) "(" [expressions] ")" | base_field base_field: name | escaped_name field: FIELD + | OPTIONAL_FIELD literal: number | boolean | null @@ -102,12 +103,13 @@ escaped_name: ESCAPED_NAME // sequence by pid [1] [true] looks identical to: // sequence by pid[1] [true] FIELD: FIELD_IDENT (ATTR | INDEX)+ +OPTIONAL_FIELD: "?" FIELD_IDENT (ATTR | INDEX)* ATTR: "." WHITESPACE? FIELD_IDENT INDEX: "[" WHITESPACE? UNSIGNED_INTEGER WHITESPACE? "]" FIELD_IDENT: NAME | ESCAPED_NAME // create a non-conflicting helper rule to deconstruct -field_parts: field_ident ("." field_ident | "[" array_index "]")+ +field_parts: field_ident ("." field_ident | "[" array_index "]")* !array_index: UNSIGNED_INTEGER !field_ident: NAME | ESCAPED_NAME diff --git a/eql/parser.py b/eql/parser.py index 9e15ef9..612e4f9 100644 --- a/eql/parser.py +++ b/eql/parser.py @@ -56,6 +56,7 @@ non_nullable_fields = ParserConfig(strict_fields=True) allow_enum_fields = ParserConfig(enable_enum=True) elasticsearch_syntax = ParserConfig(elasticsearch_syntax=True) +elasticsearch_validate_optional_fields = ParserConfig(elasticsearch_syntax=True, validate_optional_fields=True) elastic_endpoint_syntax = ParserConfig(elasticsearch_syntax=True, dollar_var=True) keywords = ("and", "by", "const", "false", "in", "join", "macro", @@ -135,6 +136,7 @@ def __init__(self, text): self._strict_fields = ParserConfig.read_stack("strict_fields", False) self._elasticsearch_syntax = ParserConfig.read_stack("elasticsearch_syntax", False) self._dollar_var = ParserConfig.read_stack("dollar_var", False) + self._validate_optional_fields = ParserConfig.read_stack("validate_optional_fields", False) self._allow_enum = ParserConfig.read_stack("enable_enum", False) self._count_keys = [] self._pipe_schemas = [] @@ -385,9 +387,9 @@ def time_range(self, node): return ast.TimeRange(quantity, unit) # fields - def _update_field_info(self, node_info): + def _update_field_info(self, node_info, optional_syntax=False): type_hint = None - allow_missing = self._schema.allow_missing + allow_missing = self._schema.allow_missing or (optional_syntax and not self._validate_optional_fields) field = node_info.node schema = None schema_hint = None @@ -562,17 +564,25 @@ def varpath(self, node): if node["base_field"]: path = [to_unicode(node["base_field"]["name"])] else: - path = self._field_path(node["field"]) + _, path = self._field_path(node["field"]) field = ast.Field(path[0], path[1:], as_var=True) return NodeInfo(field, source=node, type_info=TypeHint.Unknown) - def _field_path(self, node): + def _field_path(self, node, allow_optional=False): full_path = [] # to get around parser ambiguities, we had to create a token to mash all of the parts together # but we have a separate rule "field_parts" that can safely re-parse and separate out the tokens. # we can walk through each token, and build the field path accordingly - for part in lark_parser.parse(node.children[-1], "field_parts").children: + text = node.children[-1] + optional_syntax = text.startswith("?") + if optional_syntax: + if not allow_optional: + raise self._error(node, "Optional fields are not supported.", cls=EqlSyntaxError, width=1) + + text = text[1:] + + for part in lark_parser.parse(text, "field_parts").children: if part["NAME"]: name = to_unicode(part["NAME"]) full_path.append(name) @@ -586,11 +596,11 @@ def _field_path(self, node): else: raise self._error(node, "Unable to parse field", cls=EqlSyntaxError) - return full_path + return optional_syntax, full_path def field(self, node): """Callback function to walk the AST.""" - full_path = self._field_path(node) + optional_syntax, full_path = self._field_path(node, allow_optional=self._elasticsearch_syntax) base, path = full_path[0], full_path[1:] # if get_variable: @@ -602,7 +612,7 @@ def field(self, node): # # This can be overridden by the parent function that is parsing it # return self._add_variable(node.base) field = ast.Field(base, path) - return self._update_field_info(NodeInfo(field, source=node)) + return self._update_field_info(NodeInfo(field, source=node), optional_syntax=optional_syntax) def string_predicate(self, node): """Callback function to walk the AST.""" diff --git a/setup.py b/setup.py index 0ec37ca..fa527d8 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,8 @@ "pep257==0.7.0", "coverage==4.5.3", "flake8-pep257==1.0.5", - "PyYAML", + "PyYAML<6.0; python_version<'3.4'", + "PyYAML; python_version>='3.4'", "toml~=0.10", "pluggy==1.0.0-dev0; python_version<'3.4'", "configparser<5.0; python_version<'3.4'", diff --git a/tests/test_parser.py b/tests/test_parser.py index d20b444..f2300ff 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -11,7 +11,7 @@ from eql.errors import EqlSyntaxError, EqlSemanticError, EqlParseError from eql.parser import ( parse_query, parse_expression, parse_definitions, ignore_missing_functions, parse_field, parse_literal, - extract_query_terms, keywords, elasticsearch_syntax, elastic_endpoint_syntax + extract_query_terms, keywords, elasticsearch_syntax, elastic_endpoint_syntax, elasticsearch_validate_optional_fields ) from eql.walkers import DepthFirstWalker from eql.pipes import * # noqa: F403 @@ -533,6 +533,7 @@ def test_elasticsearch_flag(self): "pid": "number", "string_array": ["string"], "obj_array": ["string"], + "process": {"name": "string"} } }) @@ -574,6 +575,22 @@ def test_elasticsearch_flag(self): self.assertRaises(EqlSyntaxError, parse_query, "process where process_name == ?'cmd.exe'") self.assertRaises(EqlSyntaxError, parse_query, "process where process_name == ?\"cmd.exe\"") + # optional fields in the schema + parse_query('process where ?process.name : "cmd.exe"') + parse_query('process where ?process_name : "cmd.exe"') + + # optional fields not in the schema + parse_query('process where ?unknown_field : "cmd.exe"') + parse_query('process where ?unknown.field : "cmd.exe"') + + with elasticsearch_validate_optional_fields: + self.assertRaises(EqlSemanticError, parse_query, 'process where ?unknown_field : "cmd.exe"') + self.assertRaises(EqlSemanticError, parse_query, 'process where ?unknown.field : "cmd.exe"') + + # optional fields in the schema + parse_query('process where ?process.name : "cmd.exe"') + parse_query('process where ?process_name : "cmd.exe"') + with schema: parse_query("process where process_name == 'cmd.exe'") parse_query("process where process_name == ?'cmd.exe'") @@ -604,6 +621,14 @@ def test_elasticsearch_flag(self): self.assertRaises(EqlSyntaxError, parse_query, "process where startsWith~(process_name, \"cmd.exe\")") + # optional fields in the schema + self.assertRaises(EqlSyntaxError, parse_query, 'process where ?process.name : "cmd.exe"') + self.assertRaises(EqlSyntaxError, parse_query, 'process where ?process_name : "cmd.exe"') + + # optional fields not in the schema + self.assertRaises(EqlSyntaxError, parse_query, 'process where ?unknown_field : "cmd.exe"') + self.assertRaises(EqlSyntaxError, parse_query, 'process where ?unknown.field : "cmd.exe"') + with elastic_endpoint_syntax, schema, ignore_missing_functions: # check elasticsearch-isms parse_query('process where process_name : ("cmd*.exe", "foo*.exe")')