diff --git a/CHANGELOG.md b/CHANGELOG.md index 90f7e56e4c6..d0274459e02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ It is recommend to upgrade the storage components first (Receive, Store, etc.) a - [#8714](https://github.com/thanos-io/thanos/pull/8714): Tracing: Fix `tls_config` fields (`ca_file`, `cert_file`, `key_file`) being silently ignored when using the OTLP gRPC exporter. Previously, deployments using a private CA or mTLS client certificates had to work around this via `OTEL_EXPORTER_OTLP_CERTIFICATE` and related environment variables. - [#8128](https://github.com/thanos-io/thanos/issues/8128): Query-Frontend: Fix panic in `AnalyzesMerge` caused by indexing the wrong slice variable, leading to an out-of-range access when merging more than two query analyses. - [#8720](https://github.com/thanos-io/thanos/issues/8720): Receive: Fix 503 errors during restarts in some cases. +- [#8762](https://github.com/thanos-io/thanos/pull/8762): Query-Frontend: Fix trace ID missing from slow query logs, regression from #8618. ### Added diff --git a/internal/cortex/frontend/transport/handler.go b/internal/cortex/frontend/transport/handler.go index f58bf01fc03..00fcefb3162 100644 --- a/internal/cortex/frontend/transport/handler.go +++ b/internal/cortex/frontend/transport/handler.go @@ -127,7 +127,7 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // Ensure we log slow queries and query statistics regardless of how the request completes. defer func() { - f.reportQueryStatsAndSlowQueries(r, resp, buf, startTime, stats) + f.reportQueryStatsAndSlowQueries(r, w.Header(), buf, startTime, stats) }() var err error @@ -153,7 +153,7 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } // reportQueryStatsAndSlowQueries handles logging of slow queries and query statistics. -func (f *Handler) reportQueryStatsAndSlowQueries(r *http.Request, resp *http.Response, buf bytes.Buffer, startTime time.Time, stats *querier_stats.Stats) { +func (f *Handler) reportQueryStatsAndSlowQueries(r *http.Request, responseHeaders http.Header, buf bytes.Buffer, startTime time.Time, stats *querier_stats.Stats) { queryResponseTime := time.Since(startTime) shouldReportSlowQuery := f.cfg.LogQueriesLongerThan != 0 && @@ -164,10 +164,6 @@ func (f *Handler) reportQueryStatsAndSlowQueries(r *http.Request, resp *http.Res queryString := f.parseRequestQueryString(r, buf) if shouldReportSlowQuery { - var responseHeaders http.Header - if resp != nil { - responseHeaders = resp.Header - } f.reportSlowQuery(r, responseHeaders, queryString, queryResponseTime, stats) } if f.cfg.QueryStatsEnabled { diff --git a/internal/cortex/frontend/transport/handler_test.go b/internal/cortex/frontend/transport/handler_test.go index 85ad403e194..f06fd1f0bd6 100644 --- a/internal/cortex/frontend/transport/handler_test.go +++ b/internal/cortex/frontend/transport/handler_test.go @@ -50,6 +50,7 @@ func TestHandler_SlowQueryLog(t *testing.T) { "param_query=absent(up)", "param_start=1714262400", "param_end=1714266000", + "trace_id=test-trace-id-123", }, }, { @@ -114,7 +115,9 @@ func TestHandler_SlowQueryLog(t *testing.T) { handler := NewHandler(cfg, fakeRoundTripper, logger, prometheus.NewRegistry()) - handler.ServeHTTP(httptest.NewRecorder(), httptest.NewRequest("GET", tt.url, nil)) + recorder := httptest.NewRecorder() + recorder.Header().Set("X-Thanos-Trace-Id", "test-trace-id-123") + handler.ServeHTTP(recorder, httptest.NewRequest("GET", tt.url, nil)) for _, part := range tt.logParts { require.Contains(t, logWriter.String(), part) @@ -141,11 +144,19 @@ func TestHandler_SlowQueryLogOnError(t *testing.T) { logger := log.NewLogfmtLogger(log.NewSyncWriter(logWriter)) handler := NewHandler(cfg, fakeRT, logger, prometheus.NewRegistry()) - handler.ServeHTTP(httptest.NewRecorder(), httptest.NewRequest("GET", "/api/v1/query?query=up", nil)) + + // Simulate tracing middleware setting the trace ID on the ResponseWriter + // before the handler runs. The slow query log must include this trace ID + // even when the round-tripper returns an error (resp is nil). + recorder := httptest.NewRecorder() + recorder.Header().Set("X-Thanos-Trace-Id", "test-trace-abc123") + + handler.ServeHTTP(recorder, httptest.NewRequest("GET", "/api/v1/query?query=up", nil)) // Verify slow query is logged even when round-tripper returns an error require.Contains(t, logWriter.String(), "slow query detected") require.Contains(t, logWriter.String(), "time_taken=") require.Contains(t, logWriter.String(), "path=/api/v1/query") require.Contains(t, logWriter.String(), "param_query=up") + require.Contains(t, logWriter.String(), "test-trace-abc123") }