From a79c1f102b5625e751e1ce29d0f7228b3db9870c Mon Sep 17 00:00:00 2001 From: Dimitrios Liappis Date: Thu, 20 Apr 2023 14:13:20 +0300 Subject: [PATCH 1/2] Allow specifying branch together with timestamp in revision With this commit, we add support for an optional branch when a timestamp is specified in --revision. This is intended primarily for Elasticsearch nightly benchmarks. --- docs/command_line_reference.rst | 2 +- esrally/mechanic/supplier.py | 75 ++++++++++++++++++++++++--------- esrally/rally.py | 3 +- tests/mechanic/supplier_test.py | 43 +++++++++++++++---- 4 files changed, 93 insertions(+), 30 deletions(-) diff --git a/docs/command_line_reference.rst b/docs/command_line_reference.rst index b4595de87..b86dcb7f7 100644 --- a/docs/command_line_reference.rst +++ b/docs/command_line_reference.rst @@ -603,7 +603,7 @@ You can specify the revision in different formats: * ``--revision=current``: Use the current revision (i.e. don't alter the local source tree). * ``--revision=abc123``: Where ``abc123`` is some git revision hash. * ``--revision=v8.4.0``: Where ``v8.4.0`` is some git tag. -* ``--revision=@2013-07-27T10:37:00Z``: Determines the revision that is closest to the provided date. Rally logs to which git revision hash the date has been resolved and if you use Elasticsearch as metrics store (instead of the default in-memory one), :doc:`each metric record will contain the git revision hash also in the meta-data section `. +* ``--revision=@2013-07-27T10:37:00Z``: Determines the revision that is closest to the provided date. By default, assumes ``main`` is the branch, but it can be override with ``branchname@timestamp``. Rally logs to which git revision hash the date has been resolved and if you use Elasticsearch as metrics store (instead of the default in-memory one), :doc:`each metric record will contain the git revision hash also in the meta-data section `. * ``--revision=my-branch-name``: Where ``my-branch-name`` is the name of an existing branch name. If you are using a :ref:`remote source repository `, Rally will fetch and checkout the remote branch. Supported date format: If you specify a date, it has to be ISO-8601 conformant and must start with an ``@`` sign to make it easier for Rally to determine that you actually mean a date. diff --git a/esrally/mechanic/supplier.py b/esrally/mechanic/supplier.py index 344c44453..f3ce39e68 100644 --- a/esrally/mechanic/supplier.py +++ b/esrally/mechanic/supplier.py @@ -21,7 +21,6 @@ import grp import logging import os -import re import shutil import urllib.error @@ -30,8 +29,8 @@ from esrally.exceptions import BuildError, SystemSetupError from esrally.utils import console, convert, git, io, jvm, net, process, sysstats -# e.g. my-plugin:current - we cannot simply use String#split(":") as this would not work for timestamp-based revisions -REVISION_PATTERN = r"(\w.*?):(.*)" +DEFAULT_ELASTICSEARCH_BRANCH = "main" +DEFAULT_PLUGIN_BRANCH = "main" def create(cfg, sources, distribution, car, plugins=None): @@ -400,7 +399,7 @@ def __init__(self, revision, es_src_dir, remote_url, car, builder, template_rend self.template_renderer = template_renderer def fetch(self): - return SourceRepository("Elasticsearch", self.remote_url, self.src_dir, branch="main").fetch(self.revision) + return SourceRepository("Elasticsearch", self.remote_url, self.src_dir, branch=DEFAULT_ELASTICSEARCH_BRANCH).fetch(self.revision) def prepare(self): if self.builder: @@ -559,7 +558,7 @@ def can_handle(plugin): def fetch(self): # Just retrieve the current revision *number* and assume that Elasticsearch has prepared the source tree. - return SourceRepository("Elasticsearch", None, self.es_src_dir, branch="main").fetch(revision="current") + return SourceRepository("Elasticsearch", None, self.es_src_dir, branch=DEFAULT_PLUGIN_BRANCH).fetch(revision="current") def prepare(self): if self.builder: @@ -649,13 +648,12 @@ def _config_value(src_config, key): def _extract_revisions(revision): revisions = revision.split(",") if revision else [] if len(revisions) == 1: - r = revisions[0] - if r.startswith("elasticsearch:"): + c, r = _component_from_revision(revisions[0]) + if c.startswith("elasticsearch:"): r = r[len("elasticsearch:") :] - # may as well be just a single plugin - m = re.match(REVISION_PATTERN, r) - if m: - return {m.group(1): m.group(2)} + # elasticsearch or single plugin + if c: + return {c: r} else: return { "elasticsearch": r, @@ -664,15 +662,47 @@ def _extract_revisions(revision): } else: results = {} - for r in revisions: - m = re.match(REVISION_PATTERN, r) - if m: - results[m.group(1)] = m.group(2) + for rev in revisions: + c, r = _component_from_revision(rev) + if c: + results[c] = r else: raise exceptions.SystemSetupError("Revision [%s] does not match expected format [name:revision]." % r) return results +def _branch_from_revision_with_ts(revision, default_branch): + """ + Extracts the branch and revision from a `revision` that uses @timestamp. + If a branch can't be found in `revision`, default_branch is used. + """ + + # ":" separator maybe used in both the timestamp and the component + # e.g. elasticsearch:@TS + _, r = _component_from_revision(revision) + branch, git_ts_revision = r.split("@") + if not branch: + branch = default_branch + return branch, git_ts_revision + + +def _component_from_revision(revision): + """Extracts the (optional) component and remaining data from `revision`""" + + component = "" + r = revision + if "@" not in revision and ":" in revision: + # e.g. @2023-04-20T01:00:00Z + component, r = revision.split(":") + elif "@" in revision and ":" in revision: + # e.g. "elasticsearch:@2023-04-20T01:00:00Z" + revision_without_ts = revision[: revision.find("@")] + if ":" in revision_without_ts: + component = revision_without_ts.split(":")[0] + r = revision[revision.find(":", 1) + 1 :] + return component, r + + class SourceRepository: """ Supplier fetches the benchmark candidate source tree from the remote repository. @@ -709,11 +739,16 @@ def _update(self, revision): git.pull(self.src_dir, remote="origin", branch=self.branch) elif revision == "current": self.logger.info("Skip fetching sources for %s.", self.name) - elif self.has_remote() and revision.startswith("@"): - # convert timestamp annotated for Rally to something git understands -> we strip leading and trailing " and the @. - git_ts_revision = revision[1:] - self.logger.info("Fetching from remote and checking out revision with timestamp [%s] for %s.", git_ts_revision, self.name) - git.pull_ts(self.src_dir, git_ts_revision, remote="origin", branch=self.branch) + # revision contains a timestamp + elif self.has_remote() and "@" in revision: + branch, git_ts_revision = _branch_from_revision_with_ts(revision, self.branch) + self.logger.info( + "Fetching from remote and checking out revision with timestamp [%s] from branch %s for %s.", + git_ts_revision, + branch, + self.name, + ) + git.pull_ts(self.src_dir, git_ts_revision, remote="origin", branch=branch) elif self.has_remote(): # we can have either a commit hash, branch name, or tag git.fetch(self.src_dir, remote="origin") if git.is_branch(self.src_dir, identifier=revision): diff --git a/esrally/rally.py b/esrally/rally.py index 9c79f4a84..95ae67bb2 100644 --- a/esrally/rally.py +++ b/esrally/rally.py @@ -292,7 +292,8 @@ def add_track_source(subparser): help="Define the source code revision for building the benchmark candidate. 'current' uses the source tree as is," " 'latest' fetches the latest version on the main branch. It is also possible to specify a commit id or a timestamp." ' The timestamp must be specified as: "@ts" where "ts" must be a valid ISO 8601 timestamp, ' - 'e.g. "@2013-07-27T10:37:00Z" (default: current).', + 'e.g. "@2013-07-27T10:37:00Z" (default: current). A combination of branch and timestamp is also possible,' + 'e.g. "@feature-branch@2023-04-06T14:52:31Z".', default="current", ) # optimized for local usage, don't fetch sources build_parser.add_argument( diff --git a/tests/mechanic/supplier_test.py b/tests/mechanic/supplier_test.py index a142814e2..082572f83 100644 --- a/tests/mechanic/supplier_test.py +++ b/tests/mechanic/supplier_test.py @@ -38,16 +38,20 @@ def test_single_revision(self): "elasticsearch": "current", "all": "current", } - assert supplier._extract_revisions("@2015-01-01-01:00:00") == { - "elasticsearch": "@2015-01-01-01:00:00", - "all": "@2015-01-01-01:00:00", + assert supplier._extract_revisions("@2022-12-12T11:12:13Z") == { + "elasticsearch": "@2022-12-12T11:12:13Z", + "all": "@2022-12-12T11:12:13Z", + } + assert supplier._extract_revisions("feature/branch@2024-04-20T08:00:00Z") == { + "elasticsearch": "feature/branch@2024-04-20T08:00:00Z", + "all": "feature/branch@2024-04-20T08:00:00Z", } def test_multiple_revisions(self): - assert supplier._extract_revisions("elasticsearch:67c2f42,x-pack:@2015-01-01-01:00:00,some-plugin:current") == { + assert supplier._extract_revisions("elasticsearch:67c2f42,one-plugin:@2023-04-04T12:34:56,another-plugin:current") == { "elasticsearch": "67c2f42", - "x-pack": "@2015-01-01-01:00:00", - "some-plugin": "current", + "one-plugin": "@2023-04-04T12:34:56", + "another-plugin": "current", } def test_invalid_revisions(self): @@ -56,6 +60,29 @@ def test_invalid_revisions(self): assert exc.value.args[0] == "Revision [elasticsearch 67c2f42] does not match expected format [name:revision]." +class TestComponentFromRevision: + def test_revision_sha(self): + assert supplier._component_from_revision("e8d4211de73ad498df865e7df935feb890808eb9") == ( + "", + "e8d4211de73ad498df865e7df935feb890808eb9", + ) + + def test_revision_ts_only(self): + assert supplier._component_from_revision("@2023-04-03T03:04:05Z") == ("", "@2023-04-03T03:04:05Z") + + def test_revision_component_and_sha(self): + assert supplier._component_from_revision("elasticsearch:latest") == ("elasticsearch", "latest") + + def test_revision_component_and_ts(self): + assert supplier._component_from_revision("elasticsearch:@2023-04-03T03:04:05Z") == ("elasticsearch", "@2023-04-03T03:04:05Z") + + def test_revision_component_branch_and_ts(self): + assert supplier._component_from_revision("elasticsearch:feature/branch@2023-04-03T03:04:05Z") == ( + "elasticsearch", + "feature/branch@2023-04-03T03:04:05Z", + ) + + class TestSourceRepository: @mock.patch("esrally.utils.git.head_revision", autospec=True) @mock.patch("esrally.utils.git.pull", autospec=True) @@ -117,10 +144,10 @@ def test_checkout_ts(self, mock_is_working_copy, mock_pull_ts, mock_head_revisio mock_head_revision.return_value = "HEAD" s = supplier.SourceRepository(name="Elasticsearch", remote_url="some-github-url", src_dir="/src", branch="main") - s.fetch("@2015-01-01-01:00:00") + s.fetch("@2023-04-20T11:09:12Z") mock_is_working_copy.assert_called_with("/src") - mock_pull_ts.assert_called_with("/src", "2015-01-01-01:00:00", remote="origin", branch="main") + mock_pull_ts.assert_called_with("/src", "2023-04-20T11:09:12Z", remote="origin", branch="main") mock_head_revision.assert_called_with("/src") @mock.patch("esrally.utils.git.fetch", autospec=True) From 9e1832c4be48393440df4cd12426f59a7ed4ee06 Mon Sep 17 00:00:00 2001 From: Dimitrios Liappis Date: Fri, 21 Apr 2023 09:47:05 +0300 Subject: [PATCH 2/2] Update docs/command_line_reference.rst Co-authored-by: Brad Deam <54515790+b-deam@users.noreply.github.com> --- docs/command_line_reference.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/command_line_reference.rst b/docs/command_line_reference.rst index b86dcb7f7..f5bfe8a79 100644 --- a/docs/command_line_reference.rst +++ b/docs/command_line_reference.rst @@ -603,7 +603,7 @@ You can specify the revision in different formats: * ``--revision=current``: Use the current revision (i.e. don't alter the local source tree). * ``--revision=abc123``: Where ``abc123`` is some git revision hash. * ``--revision=v8.4.0``: Where ``v8.4.0`` is some git tag. -* ``--revision=@2013-07-27T10:37:00Z``: Determines the revision that is closest to the provided date. By default, assumes ``main`` is the branch, but it can be override with ``branchname@timestamp``. Rally logs to which git revision hash the date has been resolved and if you use Elasticsearch as metrics store (instead of the default in-memory one), :doc:`each metric record will contain the git revision hash also in the meta-data section `. +* ``--revision=@2013-07-27T10:37:00Z``: Determines the revision that is closest to the provided date. By default, assumes ``main`` is the branch, but it can be overriden with ``branchname@timestamp``. Rally logs to which git revision hash the date has been resolved and if you use Elasticsearch as metrics store (instead of the default in-memory one), :doc:`each metric record will contain the git revision hash also in the meta-data section `. * ``--revision=my-branch-name``: Where ``my-branch-name`` is the name of an existing branch name. If you are using a :ref:`remote source repository `, Rally will fetch and checkout the remote branch. Supported date format: If you specify a date, it has to be ISO-8601 conformant and must start with an ``@`` sign to make it easier for Rally to determine that you actually mean a date.