Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 55 additions & 18 deletions src/sentry/api/endpoints/organization_trace_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.organizations.services.organization import RpcOrganization
from sentry.search.eap.occurrences.common_queries import count_occurrences_grouped_by_trace_ids
from sentry.search.eap.occurrences.rollout_utils import EAPOccurrencesComparator
from sentry.search.eap.types import EAPResponse, SearchResolverConfig
from sentry.search.events.builder.discover import DiscoverQueryBuilder
from sentry.search.events.types import SnubaData, SnubaParams
from sentry.snuba.dataset import Dataset
from sentry.snuba.occurrences_rpc import OccurrenceCategory
from sentry.snuba.ourlogs import OurLogs
from sentry.snuba.referrer import Referrer
from sentry.snuba.rpc_dataset_common import RPCBase, TableQuery
Expand Down Expand Up @@ -52,6 +55,49 @@ def extract_uptime_count(uptime_result: list[TraceItemTableResponse]) -> int:
return len(first_column.results) if first_column.results else 0


def run_errors_query(trace_id: str, snuba_params: SnubaParams) -> int:
errors_query = DiscoverQueryBuilder(
dataset=Dataset.Events,
selected_columns=[
"count_if(event.type, notEquals, transaction) as errors",
],
params={},
snuba_params=snuba_params,
query=f"trace:{trace_id}",
limit=1,
)
errors = errors_query.run_query(Referrer.API_TRACE_VIEW_GET_EVENTS.value)
snuba_count = int(errors["data"][0].get("errors", 0)) if errors.get("data") else 0
errors_count = snuba_count

callsite = "api.trace.count_errors"
if EAPOccurrencesComparator.should_check_experiment(callsite):
eap_count = count_occurrences_grouped_by_trace_ids(
snuba_params=snuba_params,
trace_ids=[trace_id],
referrer=Referrer.API_TRACE_VIEW_GET_EVENTS.value,
occurrence_category=OccurrenceCategory.ERROR,
).get(trace_id, 0)
Comment on lines +75 to +80
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Do we need grouped_by_trace_ids here? Couldn't we just run a regular Occurrences.run_table_query with query_string=f"trace_id:{trace_id}"? IMO that'd be clearer (since it wouldn't have the grouping postprocessing)... but happy either way.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I thought about that - I ended up going this way to be able to reuse the existing helper and allow the exception handling to just occur there rather than repeat it at the callsite. I would say the main downside is the dict return type (rather than raw int count), but I think I'm okay with that given it's still a relatively simple signature

errors_count = EAPOccurrencesComparator.check_and_choose(
snuba_count,
eap_count,
callsite,
is_experimental_data_a_null_result=(eap_count == 0),
reasonable_match_comparator=lambda snuba, eap: eap <= snuba,
debug_context={
"trace_id": trace_id,
"organization_id": (
snuba_params.organization.id if snuba_params.organization is not None else None
),
"project_ids": [project.id for project in snuba_params.projects],
"start": snuba_params.start.isoformat() if snuba_params.start else None,
"end": snuba_params.end.isoformat() if snuba_params.end else None,
},
)

return errors_count


@region_silo_endpoint
class OrganizationTraceMetaEndpoint(OrganizationEventsEndpointBase):
publish_status = {
Expand Down Expand Up @@ -160,16 +206,6 @@ def get(self, request: Request, organization: Organization, trace_id: str) -> Ht
# This is a hack, long term EAP will store both errors and performance_issues eventually but is not ready
# currently. But we want to move performance data off the old tables immediately. To keep the code simpler I'm
# parallelizing the queries here, but ideally this parallelization happens by calling run_bulk_table_queries
errors_query = DiscoverQueryBuilder(
dataset=Dataset.Events,
selected_columns=[
"count_if(event.type, notEquals, transaction) as errors",
],
params={},
snuba_params=snuba_params,
query=f"trace:{trace_id}",
limit=1,
)
include_uptime = request.GET.get("include_uptime", "0") == "1"
max_workers = 3 + (1 if include_uptime else 0)
with ThreadPoolExecutor(
Expand All @@ -182,9 +218,7 @@ def get(self, request: Request, organization: Organization, trace_id: str) -> Ht
perf_issues_future = query_thread_pool.submit(
count_performance_issues, trace_id, snuba_params
)
errors_future = query_thread_pool.submit(
errors_query.run_query, Referrer.API_TRACE_VIEW_GET_EVENTS.value
)
errors_future = query_thread_pool.submit(run_errors_query, trace_id, snuba_params)

uptime_future = None
if include_uptime:
Expand All @@ -195,8 +229,7 @@ def get(self, request: Request, organization: Organization, trace_id: str) -> Ht

results = spans_future.result()
perf_issues = perf_issues_future.result()
errors = errors_future.result()
results["errors"] = errors
errors_count = errors_future.result()

uptime_count = None
if uptime_future:
Expand All @@ -205,15 +238,19 @@ def get(self, request: Request, organization: Organization, trace_id: str) -> Ht
uptime_count = extract_uptime_count(uptime_result)
except Exception:
logger.exception("Failed to fetch uptime results")
return Response(self.serialize(results, perf_issues, uptime_count))
return Response(self.serialize(results, errors_count, perf_issues, uptime_count))

def serialize(
self, results: dict[str, EAPResponse], perf_issues: int, uptime_count: int | None = None
self,
results: dict[str, EAPResponse],
errors_count: int,
perf_issues: int,
uptime_count: int | None = None,
) -> SerializedResponse:
response: SerializedResponse = {
# Values can be null if there's no result
"logs": results["logs_meta"]["data"][0].get("count()") or 0,
"errors": results["errors"]["data"][0].get("errors") or 0,
"errors": errors_count,
"performance_issues": perf_issues,
"span_count": results["spans_meta"]["data"][0].get("count()") or 0,
"transaction_child_count_map": results["transaction_children"]["data"],
Expand Down
68 changes: 68 additions & 0 deletions tests/snuba/api/endpoints/test_organization_trace_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,74 @@ def test_with_errors(self) -> None:
assert data["span_count"] == 19
assert data["span_count_map"]["http.server"] == 19

def test_with_errors_eap_eval_not_allowlisted_uses_snuba(self) -> None:
self.load_trace()
self.load_errors(self.gen1_project, self.gen1_span_ids[0])
group = self.create_group(project=self.gen1_project)
self.store_eap_items(
[
self.create_eap_occurrence(
project=self.gen1_project,
group_id=group.id,
trace_id=self.trace_id,
occurrence_type="error",
),
]
)
with self.options(
{
EAPOccurrencesComparator._should_eval_option_name(): True,
EAPOccurrencesComparator._callsite_allowlist_option_name(): [],
}
):
with self.feature(self.FEATURES):
response = self.client.get(
self.url,
data={"project": -1},
format="json",
)
assert response.status_code == 200, response.content
data = response.data
assert data["errors"] == 3
assert data["performance_issues"] == 2
assert data["span_count"] == 19
assert data["span_count_map"]["http.server"] == 19

def test_with_errors_eap_allowlisted_uses_eap(self) -> None:
self.load_trace()
self.load_errors(self.gen1_project, self.gen1_span_ids[0])
group = self.create_group(project=self.gen1_project)
self.store_eap_items(
[
self.create_eap_occurrence(
project=self.gen1_project,
group_id=group.id,
trace_id=self.trace_id,
occurrence_type="error",
),
]
)
with self.options(
{
EAPOccurrencesComparator._should_eval_option_name(): True,
EAPOccurrencesComparator._callsite_allowlist_option_name(): [
"api.trace.count_errors"
],
}
):
with self.feature(self.FEATURES):
response = self.client.get(
self.url,
data={"project": -1},
format="json",
)
assert response.status_code == 200, response.content
data = response.data
assert data["errors"] == 1
assert data["performance_issues"] == 2
assert data["span_count"] == 19
assert data["span_count_map"]["http.server"] == 19

def test_with_default(self) -> None:
self.load_trace()
self.load_default()
Expand Down
Loading