Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# Event Query Language - Changelog
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).


# Version 0.9.12
_Released 2021-XX-XX_

### Added
* Support for `?` [optional field syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/eql-syntax.html#eql-syntax-optional-fields)


# Version 0.9.11
_Released 2021-09-22_

Expand Down
2 changes: 1 addition & 1 deletion eql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
Walker,
)

__version__ = '0.9.11'
__version__ = '0.9.12'
__all__ = (
"__version__",
"AnalyticOutput",
Expand Down
4 changes: 3 additions & 1 deletion eql/etc/eql.g
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ function_call: (INSENSITIVE_NAME | NAME) "(" [expressions] ")"
| base_field
base_field: name | escaped_name
field: FIELD
| OPTIONAL_FIELD
literal: number
| boolean
| null
Expand All @@ -102,12 +103,13 @@ escaped_name: ESCAPED_NAME
// sequence by pid [1] [true] looks identical to:
// sequence by pid[1] [true]
FIELD: FIELD_IDENT (ATTR | INDEX)+
OPTIONAL_FIELD: "?" FIELD_IDENT (ATTR | INDEX)*
ATTR: "." WHITESPACE? FIELD_IDENT
INDEX: "[" WHITESPACE? UNSIGNED_INTEGER WHITESPACE? "]"
FIELD_IDENT: NAME | ESCAPED_NAME

// create a non-conflicting helper rule to deconstruct
field_parts: field_ident ("." field_ident | "[" array_index "]")+
field_parts: field_ident ("." field_ident | "[" array_index "]")*
!array_index: UNSIGNED_INTEGER
!field_ident: NAME | ESCAPED_NAME

Expand Down
26 changes: 18 additions & 8 deletions eql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
non_nullable_fields = ParserConfig(strict_fields=True)
allow_enum_fields = ParserConfig(enable_enum=True)
elasticsearch_syntax = ParserConfig(elasticsearch_syntax=True)
elasticsearch_validate_optional_fields = ParserConfig(elasticsearch_syntax=True, validate_optional_fields=True)
elastic_endpoint_syntax = ParserConfig(elasticsearch_syntax=True, dollar_var=True)

keywords = ("and", "by", "const", "false", "in", "join", "macro",
Expand Down Expand Up @@ -135,6 +136,7 @@ def __init__(self, text):
self._strict_fields = ParserConfig.read_stack("strict_fields", False)
self._elasticsearch_syntax = ParserConfig.read_stack("elasticsearch_syntax", False)
self._dollar_var = ParserConfig.read_stack("dollar_var", False)
self._validate_optional_fields = ParserConfig.read_stack("validate_optional_fields", False)
self._allow_enum = ParserConfig.read_stack("enable_enum", False)
self._count_keys = []
self._pipe_schemas = []
Expand Down Expand Up @@ -385,9 +387,9 @@ def time_range(self, node):
return ast.TimeRange(quantity, unit)

# fields
def _update_field_info(self, node_info):
def _update_field_info(self, node_info, optional_syntax=False):
type_hint = None
allow_missing = self._schema.allow_missing
allow_missing = self._schema.allow_missing or (optional_syntax and not self._validate_optional_fields)
field = node_info.node
schema = None
schema_hint = None
Expand Down Expand Up @@ -562,17 +564,25 @@ def varpath(self, node):
if node["base_field"]:
path = [to_unicode(node["base_field"]["name"])]
else:
path = self._field_path(node["field"])
_, path = self._field_path(node["field"])

field = ast.Field(path[0], path[1:], as_var=True)
return NodeInfo(field, source=node, type_info=TypeHint.Unknown)

def _field_path(self, node):
def _field_path(self, node, allow_optional=False):
full_path = []
# to get around parser ambiguities, we had to create a token to mash all of the parts together
# but we have a separate rule "field_parts" that can safely re-parse and separate out the tokens.
# we can walk through each token, and build the field path accordingly
for part in lark_parser.parse(node.children[-1], "field_parts").children:
text = node.children[-1]
optional_syntax = text.startswith("?")
if optional_syntax:
if not allow_optional:
raise self._error(node, "Optional fields are not supported.", cls=EqlSyntaxError, width=1)

text = text[1:]

for part in lark_parser.parse(text, "field_parts").children:
if part["NAME"]:
name = to_unicode(part["NAME"])
full_path.append(name)
Expand All @@ -586,11 +596,11 @@ def _field_path(self, node):
else:
raise self._error(node, "Unable to parse field", cls=EqlSyntaxError)

return full_path
return optional_syntax, full_path

def field(self, node):
"""Callback function to walk the AST."""
full_path = self._field_path(node)
optional_syntax, full_path = self._field_path(node, allow_optional=self._elasticsearch_syntax)
base, path = full_path[0], full_path[1:]

# if get_variable:
Expand All @@ -602,7 +612,7 @@ def field(self, node):
# # This can be overridden by the parent function that is parsing it
# return self._add_variable(node.base)
field = ast.Field(base, path)
return self._update_field_info(NodeInfo(field, source=node))
return self._update_field_info(NodeInfo(field, source=node), optional_syntax=optional_syntax)

def string_predicate(self, node):
"""Callback function to walk the AST."""
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"pep257==0.7.0",
"coverage==4.5.3",
"flake8-pep257==1.0.5",
"PyYAML",
"PyYAML<6.0; python_version<'3.4'",
"PyYAML; python_version>='3.4'",
"toml~=0.10",
"pluggy==1.0.0-dev0; python_version<'3.4'",
"configparser<5.0; python_version<'3.4'",
Expand Down
27 changes: 26 additions & 1 deletion tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from eql.errors import EqlSyntaxError, EqlSemanticError, EqlParseError
from eql.parser import (
parse_query, parse_expression, parse_definitions, ignore_missing_functions, parse_field, parse_literal,
extract_query_terms, keywords, elasticsearch_syntax, elastic_endpoint_syntax
extract_query_terms, keywords, elasticsearch_syntax, elastic_endpoint_syntax, elasticsearch_validate_optional_fields
)
from eql.walkers import DepthFirstWalker
from eql.pipes import * # noqa: F403
Expand Down Expand Up @@ -533,6 +533,7 @@ def test_elasticsearch_flag(self):
"pid": "number",
"string_array": ["string"],
"obj_array": ["string"],
"process": {"name": "string"}
}
})

Expand Down Expand Up @@ -574,6 +575,22 @@ def test_elasticsearch_flag(self):
self.assertRaises(EqlSyntaxError, parse_query, "process where process_name == ?'cmd.exe'")
self.assertRaises(EqlSyntaxError, parse_query, "process where process_name == ?\"cmd.exe\"")

# optional fields in the schema
parse_query('process where ?process.name : "cmd.exe"')
parse_query('process where ?process_name : "cmd.exe"')

# optional fields not in the schema
parse_query('process where ?unknown_field : "cmd.exe"')
parse_query('process where ?unknown.field : "cmd.exe"')

with elasticsearch_validate_optional_fields:
self.assertRaises(EqlSemanticError, parse_query, 'process where ?unknown_field : "cmd.exe"')
self.assertRaises(EqlSemanticError, parse_query, 'process where ?unknown.field : "cmd.exe"')

# optional fields in the schema
parse_query('process where ?process.name : "cmd.exe"')
parse_query('process where ?process_name : "cmd.exe"')

with schema:
parse_query("process where process_name == 'cmd.exe'")
parse_query("process where process_name == ?'cmd.exe'")
Expand Down Expand Up @@ -604,6 +621,14 @@ def test_elasticsearch_flag(self):

self.assertRaises(EqlSyntaxError, parse_query, "process where startsWith~(process_name, \"cmd.exe\")")

# optional fields in the schema
self.assertRaises(EqlSyntaxError, parse_query, 'process where ?process.name : "cmd.exe"')
self.assertRaises(EqlSyntaxError, parse_query, 'process where ?process_name : "cmd.exe"')

# optional fields not in the schema
self.assertRaises(EqlSyntaxError, parse_query, 'process where ?unknown_field : "cmd.exe"')
self.assertRaises(EqlSyntaxError, parse_query, 'process where ?unknown.field : "cmd.exe"')

with elastic_endpoint_syntax, schema, ignore_missing_functions:
# check elasticsearch-isms
parse_query('process where process_name : ("cmd*.exe", "foo*.exe")')
Expand Down