diff --git a/src/sentry/api/utils.py b/src/sentry/api/utils.py index 9e78e5a551a7..702f1e298ee5 100644 --- a/src/sentry/api/utils.py +++ b/src/sentry/api/utils.py @@ -61,6 +61,7 @@ RateLimitExceeded, SchemaValidationError, SnubaError, + SnubaUpstreamRequestTimeout, UnqualifiedQueryError, ) from sentry.utils.snuba_rpc import SnubaRPCError, SnubaRPCRateLimitExceeded @@ -417,6 +418,7 @@ def handle_query_errors() -> Generator[None]: QueryMemoryLimitExceeded, QueryExecutionTimeMaximum, QueryTooManySimultaneous, + SnubaUpstreamRequestTimeout, ), ) or isinstance( arg, diff --git a/src/sentry/utils/snuba.py b/src/sentry/utils/snuba.py index 04ebdae2d347..46e6f215d7de 100644 --- a/src/sentry/utils/snuba.py +++ b/src/sentry/utils/snuba.py @@ -368,6 +368,27 @@ class UnexpectedResponseError(SnubaError): """ +class InvalidSnubaResponseError(SnubaError): + """ + Exception raised when a non-2xx response from the Snuba API cannot be + decoded as JSON. This typically indicates the request never reached + Snuba (e.g. it was terminated by an upstream proxy like envoy). + """ + + def __init__(self, message: str, status: int, body: str) -> None: + super().__init__(message) + self.status = status + self.body = body + + +class SnubaUpstreamRequestTimeout(InvalidSnubaResponseError): + """ + Raised when an upstream proxy (e.g. envoy) returns a 504 Gateway Timeout + before the request reaches Snuba. The default envoy 504 body is the + literal string "upstream request timeout". + """ + + class QueryExecutionError(SnubaError): """ Exception raised when a query failed to execute. @@ -1298,11 +1319,26 @@ def _bulk_snuba_query(snuba_requests: Sequence[SnubaRequest]) -> ResultSet: log_snuba_info("{}.err: {}".format(referrer, body["error"])) except ValueError: if response.status != 200: + body_text = (response.data or b"").decode("utf-8", errors="replace").strip() logger.warning( "snuba.query.invalid-json", - extra={"response.data": response.data}, + extra={ + "status": response.status, + "response.data": response.data, + }, + ) + if response.status == 504 or body_text == "upstream request timeout": + raise SnubaUpstreamRequestTimeout( + f"Upstream proxy returned {response.status}: {body_text!r}", + status=response.status, + body=body_text, + ) + raise InvalidSnubaResponseError( + f"Snuba returned non-JSON response with status " + f"{response.status}: {body_text!r}", + status=response.status, + body=body_text, ) - raise SnubaError("Failed to parse snuba error response") raise UnexpectedResponseError(f"Could not decode JSON response: {response.data!r}") allocation_policy_prefix = "allocation_policy." diff --git a/tests/sentry/api/test_utils.py b/tests/sentry/api/test_utils.py index bb266f79ac66..08ef96156921 100644 --- a/tests/sentry/api/test_utils.py +++ b/tests/sentry/api/test_utils.py @@ -10,6 +10,7 @@ from sentry.api.utils import ( MAX_STATS_PERIOD, + TimeoutException, clamp_date_range, get_date_range_from_params, handle_query_errors, @@ -31,6 +32,7 @@ QueryTooManySimultaneous, SchemaValidationError, SnubaError, + SnubaUpstreamRequestTimeout, UnqualifiedQueryError, ) @@ -196,6 +198,17 @@ def test_handle_query_errors(self, mock_parse_error: MagicMock) -> None: except Exception as e: assert isinstance(e, (FooBarError, APIException)) + def test_handle_snuba_upstream_request_timeout(self) -> None: + try: + with handle_query_errors(): + raise SnubaUpstreamRequestTimeout( + "Upstream proxy returned 504: 'upstream request timeout'", + status=504, + body="upstream request timeout", + ) + except Exception as e: + assert isinstance(e, TimeoutException) + def test_handle_postgres_timeout(self) -> None: class TimeoutError(OperationalError): def __str__(self) -> str: diff --git a/tests/sentry/utils/test_snuba.py b/tests/sentry/utils/test_snuba.py index a275d59d35e5..2cb19032c953 100644 --- a/tests/sentry/utils/test_snuba.py +++ b/tests/sentry/utils/test_snuba.py @@ -18,10 +18,12 @@ from sentry.utils import json from sentry.utils.snuba import ( ROUND_UP, + InvalidSnubaResponseError, RateLimitExceeded, RetrySkipTimeout, SnubaQueryParams, SnubaRequest, + SnubaUpstreamRequestTimeout, UnqualifiedQueryError, _bulk_snuba_query, _prepare_query_params, @@ -628,3 +630,83 @@ def test_rate_limit_error_handling_with_stats_but_no_quota_details( assert ( str(exc_info.value) == "Query on could not be run due to allocation policies, info: ..." ) + + +class SnubaInvalidJsonResponseTest(TestCase): + def setUp(self) -> None: + mock_request = Request( + dataset="events", + app_id="test", + query=Query( + match=Entity("events"), + select=[Function("count", parameters=[], alias="count")], + where=[ + Condition(Column("project_id"), Op.EQ, self.project.id), + Condition(Column("timestamp"), Op.GTE, datetime.now() - timedelta(hours=1)), + Condition(Column("timestamp"), Op.LT, datetime.now()), + ], + ), + ) + self.snuba_request = SnubaRequest( + request=mock_request, + referrer="test_referrer", + forward=lambda x: x, + reverse=lambda x: x, + ) + + @mock.patch("sentry.utils.snuba._snuba_query") + def test_envoy_504_upstream_request_timeout(self, mock_snuba_query) -> None: + """ + Envoy returns a 504 with the literal body "upstream request timeout" + when the upstream snuba-api request times out before reaching snuba. + This should surface as SnubaUpstreamRequestTimeout, not as the generic + "Failed to parse snuba error response" SnubaError. + """ + mock_response = mock.Mock(spec=HTTPResponse) + mock_response.status = 504 + mock_response.data = b"upstream request timeout" + + mock_snuba_query.return_value = ("test_referrer", mock_response, lambda x: x, lambda x: x) + + with pytest.raises(SnubaUpstreamRequestTimeout) as exc_info: + _bulk_snuba_query([self.snuba_request]) + + assert exc_info.value.status == 504 + assert exc_info.value.body == "upstream request timeout" + + @mock.patch("sentry.utils.snuba._snuba_query") + def test_upstream_request_timeout_body_with_non_504_status(self, mock_snuba_query) -> None: + """ + Some proxies may return the "upstream request timeout" body with a + different status code; classify these as upstream timeouts as well. + """ + mock_response = mock.Mock(spec=HTTPResponse) + mock_response.status = 503 + mock_response.data = b"upstream request timeout\n" + + mock_snuba_query.return_value = ("test_referrer", mock_response, lambda x: x, lambda x: x) + + with pytest.raises(SnubaUpstreamRequestTimeout) as exc_info: + _bulk_snuba_query([self.snuba_request]) + + assert exc_info.value.status == 503 + assert exc_info.value.body == "upstream request timeout" + + @mock.patch("sentry.utils.snuba._snuba_query") + def test_invalid_json_other_5xx(self, mock_snuba_query) -> None: + """ + Other non-JSON 5xx responses (e.g. 502 from envoy) should surface as + InvalidSnubaResponseError with the status and body preserved. + """ + mock_response = mock.Mock(spec=HTTPResponse) + mock_response.status = 502 + mock_response.data = b"no healthy upstream" + + mock_snuba_query.return_value = ("test_referrer", mock_response, lambda x: x, lambda x: x) + + with pytest.raises(InvalidSnubaResponseError) as exc_info: + _bulk_snuba_query([self.snuba_request]) + + assert not isinstance(exc_info.value, SnubaUpstreamRequestTimeout) + assert exc_info.value.status == 502 + assert exc_info.value.body == "no healthy upstream"