Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/v/cloud_storage/recovery_errors.cc
Comment thread
oleiman marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@

namespace cloud_storage {

const char* recovery_error_category::name() const noexcept {
const char* recovery_error_category_type::name() const noexcept {
return "recovery_error_code";
}

std::string recovery_error_category::message(int c) const {
std::string recovery_error_category_type::message(int c) const {
switch (static_cast<recovery_error_code>(c)) {
case recovery_error_code::success:
return "recovery successful";
Expand All @@ -37,13 +37,13 @@ std::string recovery_error_category::message(int c) const {
}
}

const std::error_category& error_category() noexcept {
static const recovery_error_category e;
const std::error_category& recovery_error_category() noexcept {
static const recovery_error_category_type e;
Comment on lines -40 to +41
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Lazin maybe we can do something like this in cloud topics?

return e;
}

std::error_code make_error_code(recovery_error_code e) noexcept {
return {static_cast<int>(e), error_category()};
return {static_cast<int>(e), recovery_error_category()};
}

recovery_error_ctx
Expand Down
4 changes: 2 additions & 2 deletions src/v/cloud_storage/recovery_errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ enum class recovery_error_code {
unknown_error,
};

class recovery_error_category : public std::error_category {
class recovery_error_category_type : public std::error_category {
public:
const char* name() const noexcept final;
std::string message(int c) const final;
};

const std::error_category& error_category() noexcept;
const std::error_category& recovery_error_category() noexcept;

std::error_code make_error_code(recovery_error_code e) noexcept;

Expand Down
21 changes: 20 additions & 1 deletion src/v/cluster/archival/ntp_archiver_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3018,6 +3018,23 @@ ss::future<> ntp_archiver::apply_spillover() {
const auto manifest_upload_timeout = _conf->manifest_upload_timeout();
const auto manifest_upload_backoff = _conf->cloud_storage_initial_backoff();

// Check the spillover invariant.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why isn't the fix to demote the archival_metadata_stm error to a warning? The issue is benign at this point, and expected due to races, right?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it better to eliminate the race?

// The start_offset of the manifest must be equal to the begin_offset of
// the first segment in the manifest.
if (auto so = manifest().get_start_offset();
so.has_value() && !manifest().empty()) {
auto fo = manifest().begin()->base_offset;
if (fo != so.value()) {
vlog(
_rtclog.error,
Comment thread
oleiman marked this conversation as resolved.
"Spillover invariant violated: manifest start_offset {}, first "
"segment base_offset {}",
so.value(),
fo);
co_return;
}
}

if (manifest_size_limit.has_value()) {
vlog(
_rtclog.debug,
Expand Down Expand Up @@ -3314,9 +3331,11 @@ ss::future<> ntp_archiver::apply_retention() {
if (error != cluster::errc::success) {
vlog(
_rtclog.warn,
"Failed to update archival metadata STM start offest according "
"Failed to update archival metadata STM start offset according "
"to retention policy: {}",
error);
throw std::runtime_error(fmt_with_ctx(
fmt::format, "Failed to update start offset: {}", error));
}
} else {
vlog(
Expand Down
4 changes: 4 additions & 0 deletions tests/rptest/tests/cloud_storage_scrubber_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@
# manifest will refuse to apply the command and log the error below.
r"cloud_storage - .* New replacement segment does not line up with previous segment",
r"cluster - .* Can't add segment:",
# The same could happen with spillover. Spillover assumes that the start offset is
# aligned with the first segment in the manifest but this may not be the case after
# after the manifest reset.
r"archival - .* Spillover invariant violated",
Comment thread
oleiman marked this conversation as resolved.
]


Expand Down