From 52ba5e91db775ec737c47fd9b01c865650da5aa5 Mon Sep 17 00:00:00 2001 From: Oleksandr Sanin Date: Fri, 29 May 2026 17:23:55 +0000 Subject: [PATCH] fix(detectors): skip GitLab v1 candidates with no digits to reduce false positives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v1 GitLab detector uses PrefixRegex which searches up to 40 chars ahead of a "gitlab" keyword, crossing newlines. In Dockerfiles like: ARG GITLAB_ACCESS_TOKEN_TYPE=Private-Token ARG GITLAB_ACCESS_TOKEN ARG MAVEN_SETTINGS_PROFILE=test "MAVEN_SETTINGS_PROFILE" (22 chars, all [a-zA-Z0-9_]) is within 40 characters of the second GITLAB keyword and passes the Shannon entropy check (~4.1 > 3.6) because its letters are varied. It is then reported as a GitLab secret — a false positive. Real GitLab personal access tokens are randomly generated and always contain at least one digit. Variable names like MAVEN_SETTINGS_PROFILE never do. Add a KeyIsRandom guard (already used elsewhere in the codebase) to discard digit-free candidates before verification. Closes #4756 Signed-off-by: Oleksandr Sanin --- pkg/detectors/gitlab/v1/gitlab.go | 8 ++++++++ pkg/detectors/gitlab/v1/gitlab_v1_test.go | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/pkg/detectors/gitlab/v1/gitlab.go b/pkg/detectors/gitlab/v1/gitlab.go index d08cdb421eaa..977b8411c43e 100644 --- a/pkg/detectors/gitlab/v1/gitlab.go +++ b/pkg/detectors/gitlab/v1/gitlab.go @@ -79,6 +79,14 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result continue } + // real tokens are random and always contain at least one digit; + // variable names like MAVEN_SETTINGS_PROFILE have no digits and + // are a common source of false positives when they appear within + // 40 characters of a "gitlab" keyword on a preceding line. + if !detectors.KeyIsRandom(resMatch) { + continue + } + for _, endpoint := range s.Endpoints() { s1 := detectors.Result{ DetectorType: detector_typepb.DetectorType_Gitlab, diff --git a/pkg/detectors/gitlab/v1/gitlab_v1_test.go b/pkg/detectors/gitlab/v1/gitlab_v1_test.go index b5d479bf76f2..8bdcf1cb5ef4 100644 --- a/pkg/detectors/gitlab/v1/gitlab_v1_test.go +++ b/pkg/detectors/gitlab/v1/gitlab_v1_test.go @@ -45,6 +45,16 @@ func TestGitLab_Pattern(t *testing.T) { input: "GITLAB_TOKEN=ABc123456789dEFghIJK", want: []string{"ABc123456789dEFghIJKhttps://gitlab.com"}, }, + { + // Regression test for https://github.com/trufflesecurity/trufflehog/issues/4756 + // ARG variable names that appear after GITLAB_* args in a Dockerfile must not be + // flagged as secrets because they contain no digits (KeyIsRandom check). + name: "no false positive for Dockerfile ARG variable name after GITLAB_ACCESS_TOKEN", + input: `ARG GITLAB_ACCESS_TOKEN_TYPE=Private-Token +ARG GITLAB_ACCESS_TOKEN +ARG MAVEN_SETTINGS_PROFILE=test`, + want: []string{}, + }, } for _, test := range tests {