Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Event Query Language - Changelog
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).


# Version 0.9.12
_Released 2021-XX-XX_

_Released 2022-03-15_

### Added
* Support for `?` [optional field syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/eql-syntax.html#eql-syntax-optional-fields)

* Support for `?` [optional field syntax](https://www.elastic.co/guide/en/elasticsearch/reference/current/eql-syntax.html#eql-syntax-optional-fields) if the Elasticsearch flag is enabled
* Support for [repeated sequences](https://www.elastic.co/guide/en/elasticsearch/reference/current/eql-syntax.html#eql-with-runs-statement) if the Elasticsearch flag is enabled

# Version 0.9.11
_Released 2021-09-22_
Expand Down Expand Up @@ -182,7 +183,7 @@ _Released 2019-11-01_
* Mathematical operators `+`, `-`, `*`, `/`, `%`
* Documentation for `match()` function
* `between()` function for extracting the first substring between two strings
* `cidrMatch()` function and several helper methods for subnet matching and regex building
* `cidrMatch()` function and several helper methods for subnet matching and regex building
* `extract_query_terms` to extract the original text for each event in a query

### Changed
Expand Down
5 changes: 3 additions & 2 deletions eql/etc/eql.g
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,19 @@ base_query: sequence
| join
| event_query
event_query: [name "where"] expr
sequence: "sequence" [join_values with_params? | with_params join_values?] subquery_by subquery_by+ [until_subquery_by]
sequence: "sequence" [join_values with_params? | with_params join_values?] subquery_by+ [until_subquery_by]
join: "join" join_values? subquery_by subquery_by+ until_subquery_by?
until_subquery_by.2: "until" subquery_by
pipes: pipe+
pipe: "|" name [single_atom single_atom+ | expressions]

join_values.2: "by" expressions
?with_params.2: "with" "maxspan" EQUALS time_range
repeated_sequence.2: "with" "runs" EQUALS UNSIGNED_INTEGER
time_range: number name?


subquery_by: subquery fork_param? join_values?
subquery_by: subquery fork_param? join_values? repeated_sequence?
subquery: "[" event_query "]"
fork_param: "fork" (EQUALS boolean)?

Expand Down
55 changes: 41 additions & 14 deletions eql/parser.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
"""Python parser functions for EQL syntax."""
from __future__ import unicode_literals

import contextlib
import datetime
import re
import sys
from collections import defaultdict
import contextlib

from lark import Lark, Tree, Token
from lark.visitors import Interpreter
from lark import Lark, Token, Tree
from lark.exceptions import LarkError
from lark.visitors import Interpreter

from . import ast
from . import pipes
from .types import TypeHint, NodeInfo, TypeFoldCheck
from .errors import EqlSyntaxError, EqlSemanticError, EqlSchemaError, EqlTypeMismatchError, EqlError
from . import ast, pipes
from .errors import (EqlError, EqlSchemaError, EqlSemanticError,
EqlSyntaxError, EqlTypeMismatchError)
from .etc import get_etc_file
from .functions import get_function, list_functions
from .optimizer import Optimizer
from .schema import EVENT_TYPE_ANY, EVENT_TYPE_GENERIC, Schema
from .utils import to_unicode, load_extensions, ParserConfig, is_string
from .types import NodeInfo, TypeFoldCheck, TypeHint
from .utils import ParserConfig, is_string, load_extensions, to_unicode

__all__ = (
"allow_enum_fields",
Expand Down Expand Up @@ -1002,7 +1002,7 @@ def named_subquery(self, node):
query = self.visit(node["subquery"]["event_query"])
return NodeInfo(ast.NamedSubquery(name, query), TypeHint.Boolean, source=node)

def subquery_by(self, node, num_values=None, position=None, close=None, allow_fork=False):
def subquery_by(self, node, num_values=None, position=None, close=None, allow_fork=False, allow_runs=False):
"""Callback function to walk the AST."""
if not self._subqueries_enabled:
raise self._error(node, "Subqueries not supported")
Expand All @@ -1023,6 +1023,20 @@ def subquery_by(self, node, num_values=None, position=None, close=None, allow_fo
raise self._error(error_node, message, num=num_values, end=end)

kwargs = {}

repeated_sequence = node["repeated_sequence"]

runs_count = 1
if repeated_sequence is not None:
runs_count = int(node["repeated_sequence"]["UNSIGNED_INTEGER"].value)

if allow_runs is False:
raise self._error(repeated_sequence, "Unsupported usage of repeated syntax", cls=EqlSyntaxError)

if runs_count <= 1:
raise self._error(repeated_sequence, "Repeated sequence runs must be greater than 1",
cls=EqlSemanticError)

fork_param = node["fork_param"]

if fork_param is not None:
Expand All @@ -1040,7 +1054,8 @@ def subquery_by(self, node, num_values=None, position=None, close=None, allow_fo
else:
join_values = []

return NodeInfo(ast.SubqueryBy(query, [v.node for v in join_values], **kwargs), source=node), join_values
node_info = NodeInfo(ast.SubqueryBy(query, [v.node for v in join_values], **kwargs), source=node)
return node_info, join_values, runs_count

def join_values(self, node):
"""Return all of the expressions."""
Expand All @@ -1051,15 +1066,16 @@ def join(self, node):
queries, close = self._get_subqueries_and_close(node)
return ast.Join(queries, close)

def _get_subqueries_and_close(self, node, allow_fork=False):
def _get_subqueries_and_close(self, node, allow_fork=False, allow_runs=False):
"""Helper function used by join and sequence to avoid duplicate code."""
if not self._subqueries_enabled:
# Raise the error earlier (instead of waiting until subquery_by) so that it's more meaningful
raise self._error(node, "Subqueries not supported")

# Figure out how many fields are joined by in the first query, and match across all
subquery_nodes = node.get_list("subquery_by")
first, first_info = self.subquery_by(subquery_nodes[0], allow_fork=allow_fork, position=0)
first, first_info, _ = self.subquery_by(subquery_nodes[0], allow_fork=allow_fork,
position=0, allow_runs=allow_runs)
num_values = len(first_info)
subqueries = [(first, first_info)]

Expand All @@ -1068,10 +1084,16 @@ def _get_subqueries_and_close(self, node, allow_fork=False):
close = None

if until_node:
repeated_sequence = until_node["subquery_by"]["repeated_sequence"]
if repeated_sequence:
raise self._error(repeated_sequence, "Unsupported usage of repeated syntax", cls=EqlSyntaxError)
subquery_nodes.append(until_node["subquery_by"])

for pos, subquery in enumerate(subquery_nodes[1:], 1):
subqueries.append(self.subquery_by(subquery, num_values=num_values, allow_fork=allow_fork, position=pos))
subquery, join_values, runs_count = self.subquery_by(subquery, num_values=num_values, allow_fork=allow_fork,
position=pos, allow_runs=allow_runs)
multiple_subqueries = [(subquery, join_values)] * runs_count
subqueries.extend(multiple_subqueries)

# Validate that each field has matching types
default_hint = TypeHint.primitives()
Expand Down Expand Up @@ -1162,7 +1184,12 @@ def sequence(self, node):
if node['with_params']:
params = self.time_range(node['with_params']['time_range'])

queries, close = self._get_subqueries_and_close(node, allow_fork=True)
allow_runs = self._elasticsearch_syntax

queries, close = self._get_subqueries_and_close(node, allow_fork=True, allow_runs=allow_runs)
if len(queries) <= 1 and not self._elasticsearch_syntax:
raise self._error(node, "Only one item in the sequence",
cls=EqlSemanticError if self._elasticsearch_syntax else EqlSyntaxError)
return ast.Sequence(queries, params, close)

def definitions(self, node):
Expand Down
31 changes: 30 additions & 1 deletion tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,11 @@ def test_invalid_queries(self):
'sequence [process where pid == pid] []',
'sequence with maxspan=false [process where true] [process where true]',
'sequence with badparam=100 [process where true] [process where true]',
'sequence [process where opcode == 1] by unique_pid [file where opcode == 0] by unique_pid with runs=2',
# check that the same number of BYs are in every subquery
'sequence with runs=2 [file where opcode == 0] [file where opcode == 0]',
'sequence [file where true] [process where true] until [network where true] with runs=2',
'join [file where opcode == 0] [file where opcode == 0] with runs=1',
'sequence [file where true] [process where true] by field1',
'sequence [file where true] by field [file where true] by field1 until [file where true]',
'sequence by a,b,c [file where true] by field [file where true] by field1 until [file where true]',
Expand Down Expand Up @@ -533,10 +537,29 @@ def test_elasticsearch_flag(self):
"pid": "number",
"string_array": ["string"],
"obj_array": ["string"],
"process": {"name": "string"}
"opcode": "number",
"process": {"name": "string"},
"unique_pid": "string"
},
"file": {
"opcode": "number",
"unique_pid": "string"
}
})

with elasticsearch_syntax:
subquery1 = '[process where opcode == 1] by unique_pid'
runs = [2, 10, 30]
for run in runs:
subquery2_runs = '[file where opcode == 0] by unique_pid with runs={}'.format(run)
parse_query('sequence {} {}'.format(subquery1, subquery2_runs))

self.assertRaises(EqlSyntaxError, parse_query,
'sequence [file where true] by field until [file where true] with runs=2')
self.assertRaises(EqlSemanticError, parse_query, 'sequence [process where opcode == 1] with runs=0')
self.assertRaises(EqlSyntaxError, parse_query, 'sequence [process where opcode == 1] with runs=-1')
self.assertRaises(EqlSemanticError, parse_query, 'sequence [process where opcode == 1] with runs=1')

with elasticsearch_syntax, schema:
parse_query('process where process_name : "cmd.exe"')
parse_query('process where process_name : """cmd.exe"""')
Expand Down Expand Up @@ -629,6 +652,12 @@ def test_elasticsearch_flag(self):
self.assertRaises(EqlSyntaxError, parse_query, 'process where ?unknown_field : "cmd.exe"')
self.assertRaises(EqlSyntaxError, parse_query, 'process where ?unknown.field : "cmd.exe"')

subquery1 = '[process where opcode == 1] by unique_pid'
runs = [-1, 0, 1, 2, 10, 30]
for run in runs:
subquery2_runs = '[file where opcode == 0] by unique_pid with runs={}'.format(run)
self.assertRaises(EqlSyntaxError, parse_query, 'sequence {} {}'.format(subquery1, subquery2_runs))

with elastic_endpoint_syntax, schema, ignore_missing_functions:
# check elasticsearch-isms
parse_query('process where process_name : ("cmd*.exe", "foo*.exe")')
Expand Down