Skip to content

Commit 05529c9

Browse files
aThorp96pipelines-as-code[bot]
authored andcommitted
fix(gitlab): workaround the gitlab diff API limit when necessary
When the Gitlab Merge Request exceeds the Diff API limit[1] the merge request diff API will not return the full list of changed files. To circumvent this limitation, we can use the Get Merge Request API to detect that the diff exceeds the limit and then switch to the Compare API to get the full diff. We do not use the Compare API in all instances, since it does not support paging and would increase the memory used by PaC during merge request event processing; by only using the Compare API when we know the limit is exceeded, PaC's memory usage will be more consistent and only spike when such a merge request is made. [1] - https://docs.gitlab.com/administration/instance_limits/#diff-limits
1 parent 88f59ae commit 05529c9

File tree

2 files changed

+198
-33
lines changed

2 files changed

+198
-33
lines changed

pkg/provider/gitlab/gitlab.go

Lines changed: 97 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"path"
1010
"path/filepath"
1111
"regexp"
12+
"strconv"
1213
"strings"
1314

1415
"github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/v1alpha1"
@@ -596,24 +597,22 @@ func (v *Provider) fetchChangedFiles(_ context.Context, runevent *info.Event) (c
596597

597598
switch runevent.TriggerTarget {
598599
case triggertype.PullRequest:
599-
opt := &gitlab.ListMergeRequestDiffsOptions{
600-
ListOptions: gitlab.ListOptions{
601-
OrderBy: "id",
602-
Pagination: "keyset",
603-
PerPage: defaultGitlabListOptions.PerPage,
604-
Sort: "asc",
605-
},
600+
var err error
601+
changedFiles, err = v.mergeRequestFilesChanged(runevent)
602+
if err != nil {
603+
return changedfiles.ChangedFiles{}, err
606604
}
607-
options := []gitlab.RequestOptionFunc{}
605+
case triggertype.Push:
606+
options := gitlab.GetCommitDiffOptions{ListOptions: defaultGitlabListOptions}
607+
pageOpts := []gitlab.RequestOptionFunc{}
608608

609609
for {
610-
mrchanges, resp, err := v.Client().MergeRequests.ListMergeRequestDiffs(v.targetProjectID, int64(runevent.PullRequestNumber), opt, options...)
610+
pushChanges, resp, err := v.Client().Commits.GetCommitDiff(v.sourceProjectID, runevent.SHA, &options, pageOpts...)
611611
if err != nil {
612-
// TODO: Should this return the files found so far?
613612
return changedfiles.ChangedFiles{}, err
614613
}
615614

616-
for _, change := range mrchanges {
615+
for _, change := range pushChanges {
617616
changedFiles.All = append(changedFiles.All, change.NewPath)
618617
if change.NewFile {
619618
changedFiles.Added = append(changedFiles.Added, change.NewPath)
@@ -629,27 +628,81 @@ func (v *Provider) fetchChangedFiles(_ context.Context, runevent *info.Event) (c
629628
}
630629
}
631630

632-
// Exit the loop when we've seen all pages.
633631
if resp.NextLink == "" {
632+
// Exit the loop when we've seen all pages.
634633
break
635634
}
636-
637635
// Otherwise, set param to query the next page
638-
options = []gitlab.RequestOptionFunc{
636+
pageOpts = []gitlab.RequestOptionFunc{
639637
gitlab.WithKeysetPaginationParameters(resp.NextLink),
640638
}
641639
}
642-
case triggertype.Push:
643-
options := gitlab.GetCommitDiffOptions{ListOptions: defaultGitlabListOptions}
644-
pageOpts := []gitlab.RequestOptionFunc{}
640+
default:
641+
// No action necessary
642+
}
643+
return changedFiles, nil
644+
}
645645

646+
func (v *Provider) mergeRequestFilesChanged(runevent *info.Event) (changedfiles.ChangedFiles, error) {
647+
diffTruncated, changeCount, err := v.isMergeRequestDiffTruncated(v.targetProjectID, int64(runevent.PullRequestNumber))
648+
if err != nil {
649+
return changedfiles.ChangedFiles{}, err
650+
}
651+
652+
changedFiles := changedfiles.ChangedFiles{
653+
All: make([]string, 0, changeCount),
654+
Added: []string{},
655+
Deleted: []string{},
656+
Renamed: []string{},
657+
}
658+
659+
options := []gitlab.RequestOptionFunc{}
660+
661+
// Only use the repository/compare API if the standard merge_request/diff API endpoint will
662+
// return a truncated set of changes. The repository/compare API returns the entire set of
663+
// changes without paging, so it can have a significantly heavier memory footprint if used
664+
// in all cases.
665+
if diffTruncated {
666+
compareOpts := &gitlab.CompareOptions{
667+
From: &runevent.BaseBranch,
668+
To: &runevent.SHA,
669+
}
670+
comparison, _, err := v.Client().Repositories.Compare(v.targetProjectID, compareOpts, options...)
671+
if err != nil {
672+
return changedfiles.ChangedFiles{}, err
673+
}
674+
675+
for _, change := range comparison.Diffs {
676+
changedFiles.All = append(changedFiles.All, change.NewPath)
677+
if change.NewFile {
678+
changedFiles.Added = append(changedFiles.Added, change.NewPath)
679+
}
680+
if change.DeletedFile {
681+
changedFiles.Deleted = append(changedFiles.Deleted, change.NewPath)
682+
}
683+
if !change.RenamedFile && !change.DeletedFile && !change.NewFile {
684+
changedFiles.Modified = append(changedFiles.Modified, change.NewPath)
685+
}
686+
if change.RenamedFile {
687+
changedFiles.Renamed = append(changedFiles.Renamed, change.NewPath)
688+
}
689+
}
690+
} else {
691+
diffOpts := &gitlab.ListMergeRequestDiffsOptions{
692+
ListOptions: gitlab.ListOptions{
693+
OrderBy: "id",
694+
Pagination: "keyset",
695+
PerPage: defaultGitlabListOptions.PerPage,
696+
Sort: "asc",
697+
},
698+
}
646699
for {
647-
pushChanges, resp, err := v.Client().Commits.GetCommitDiff(v.sourceProjectID, runevent.SHA, &options, pageOpts...)
700+
mrchanges, resp, err := v.Client().MergeRequests.ListMergeRequestDiffs(v.targetProjectID, int64(runevent.PullRequestNumber), diffOpts, options...)
648701
if err != nil {
702+
// TODO: Should this return the files found so far?
649703
return changedfiles.ChangedFiles{}, err
650704
}
651-
652-
for _, change := range pushChanges {
705+
for _, change := range mrchanges {
653706
changedFiles.All = append(changedFiles.All, change.NewPath)
654707
if change.NewFile {
655708
changedFiles.Added = append(changedFiles.Added, change.NewPath)
@@ -665,21 +718,41 @@ func (v *Provider) fetchChangedFiles(_ context.Context, runevent *info.Event) (c
665718
}
666719
}
667720

721+
// Exit the loop when we've seen all pages.
668722
if resp.NextLink == "" {
669-
// Exit the loop when we've seen all pages.
670723
break
671724
}
725+
672726
// Otherwise, set param to query the next page
673-
pageOpts = []gitlab.RequestOptionFunc{
727+
options = []gitlab.RequestOptionFunc{
674728
gitlab.WithKeysetPaginationParameters(resp.NextLink),
675729
}
676730
}
677-
default:
678-
// No action necessary
679731
}
680732
return changedFiles, nil
681733
}
682734

735+
// isMergeRequestDiffTruncated checks if the merge request is affected by the Gitlab API's Diff Limits.
736+
// This is determined by the Get Merge Request API's returning a ChangeCount number with a "+" suffix.
737+
// See also: https://docs.gitlab.com/administration/diff_limits/
738+
// Returns (bool: isTruncated, int: changeCount, err: error).
739+
func (v *Provider) isMergeRequestDiffTruncated(projectID, mergeRequestID int64) (bool, int, error) {
740+
out, _, err := v.Client().MergeRequests.GetMergeRequest(projectID, mergeRequestID, &gitlab.GetMergeRequestsOptions{})
741+
if err != nil {
742+
return false, 0, fmt.Errorf("error getting merge request %d: %w", mergeRequestID, err)
743+
}
744+
fileCount := 0
745+
truncated := strings.HasSuffix(out.ChangesCount, "+")
746+
if out.ChangesCount != "" {
747+
countStr := strings.TrimSuffix(out.ChangesCount, "+")
748+
fileCount, err = strconv.Atoi(countStr)
749+
if err != nil {
750+
return false, 0, err
751+
}
752+
}
753+
return truncated, fileCount, nil
754+
}
755+
683756
func (v *Provider) CreateToken(_ context.Context, _ []string, _ *info.Event) (string, error) {
684757
return "", nil
685758
}

pkg/provider/gitlab/gitlab_test.go

Lines changed: 101 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"net/http"
88
"os"
9+
"strconv"
910
"strings"
1011
"testing"
1112
"time"
@@ -1234,13 +1235,15 @@ func TestGetFiles(t *testing.T) {
12341235
name string
12351236
event *info.Event
12361237
mrchanges []*gitlab.MergeRequestDiff
1238+
diffAPILimitExceeded bool
12371239
pushChanges []*gitlab.Diff
12381240
wantAddedFilesCount int
12391241
wantDeletedFilesCount int
12401242
wantModifiedFilesCount int
12411243
wantRenamedFilesCount int
12421244
sourceProjectID, targetProjectID int
12431245
wantError bool
1246+
apiCallCount int64
12441247
}{
12451248
{
12461249
name: "pull-request",
@@ -1249,6 +1252,9 @@ func TestGetFiles(t *testing.T) {
12491252
Organization: "pullrequestowner",
12501253
Repository: "pullrequestrepository",
12511254
PullRequestNumber: 10,
1255+
BaseBranch: "main",
1256+
HeadBranch: "feature",
1257+
SHA: "abc123",
12521258
},
12531259
mrchanges: []*gitlab.MergeRequestDiff{
12541260
{
@@ -1272,6 +1278,44 @@ func TestGetFiles(t *testing.T) {
12721278
wantModifiedFilesCount: 1,
12731279
wantRenamedFilesCount: 1,
12741280
targetProjectID: 10,
1281+
apiCallCount: 2,
1282+
},
1283+
{
1284+
name: "merge request exceeding gitlab Diff API",
1285+
event: &info.Event{
1286+
TriggerTarget: "pull_request",
1287+
Organization: "pullrequestowner",
1288+
Repository: "pullrequestrepository",
1289+
PullRequestNumber: 10,
1290+
BaseBranch: "main",
1291+
HeadBranch: "feature",
1292+
SHA: "abc123",
1293+
},
1294+
mrchanges: []*gitlab.MergeRequestDiff{
1295+
{
1296+
NewPath: "modified.yaml",
1297+
},
1298+
{
1299+
NewPath: "added.doc",
1300+
NewFile: true,
1301+
},
1302+
{
1303+
NewPath: "removed.yaml",
1304+
DeletedFile: true,
1305+
},
1306+
{
1307+
NewPath: "renamed.doc",
1308+
RenamedFile: true,
1309+
},
1310+
},
1311+
wantAddedFilesCount: 1,
1312+
wantDeletedFilesCount: 1,
1313+
wantModifiedFilesCount: 1,
1314+
wantRenamedFilesCount: 1,
1315+
targetProjectID: 10,
1316+
wantError: false,
1317+
apiCallCount: 2,
1318+
diffAPILimitExceeded: true,
12751319
},
12761320
{
12771321
name: "pull-request with wrong project ID",
@@ -1304,6 +1348,7 @@ func TestGetFiles(t *testing.T) {
13041348
wantRenamedFilesCount: 0,
13051349
targetProjectID: 12,
13061350
wantError: true,
1351+
apiCallCount: 1,
13071352
},
13081353
{
13091354
name: "push",
@@ -1335,6 +1380,7 @@ func TestGetFiles(t *testing.T) {
13351380
wantModifiedFilesCount: 1,
13361381
wantRenamedFilesCount: 1,
13371382
sourceProjectID: 0,
1383+
apiCallCount: 1,
13381384
},
13391385
}
13401386
for _, tt := range tests {
@@ -1343,10 +1389,43 @@ func TestGetFiles(t *testing.T) {
13431389
metricsutils.ResetMetrics()
13441390
fakeclient, mux, teardown := thelp.Setup(t)
13451391
defer teardown()
1392+
13461393
if tt.event.TriggerTarget == "pull_request" {
1394+
mux.HandleFunc(fmt.Sprintf("/projects/10/merge_requests/%d", tt.event.PullRequestNumber),
1395+
func(rw http.ResponseWriter, _ *http.Request) {
1396+
resp := gitlab.MergeRequest{
1397+
ChangesCount: strconv.Itoa(len(tt.mrchanges)),
1398+
}
1399+
if tt.diffAPILimitExceeded {
1400+
resp.ChangesCount = strconv.Itoa(len(tt.mrchanges)-1) + "+"
1401+
}
1402+
jeez, err := json.Marshal(resp)
1403+
assert.NilError(t, err)
1404+
_, _ = rw.Write(jeez)
1405+
})
13471406
mux.HandleFunc(fmt.Sprintf("/projects/10/merge_requests/%d/diffs",
13481407
tt.event.PullRequestNumber), func(rw http.ResponseWriter, _ *http.Request) {
1349-
jeez, err := json.Marshal(tt.mrchanges)
1408+
diffAPIChanges := tt.mrchanges
1409+
if tt.diffAPILimitExceeded {
1410+
// The Diff API will not return the full list of files
1411+
diffAPIChanges = diffAPIChanges[:len(tt.mrchanges)-1]
1412+
}
1413+
1414+
jeez, err := json.Marshal(diffAPIChanges)
1415+
assert.NilError(t, err)
1416+
_, _ = rw.Write(jeez)
1417+
})
1418+
mux.HandleFunc("/projects/10/repository/compare", func(rw http.ResponseWriter, req *http.Request) {
1419+
if req.URL.Query().Get("from") != tt.event.BaseBranch || req.URL.Query().Get("to") != tt.event.SHA {
1420+
t.Errorf("expecting compare API call with 'from=%s&to=%s', got %s", tt.event.BaseBranch, tt.event.SHA, req.URL.Query().Encode())
1421+
}
1422+
1423+
// always return the full list, not subject to the Diff API size limitations
1424+
diffs := []*gitlab.Diff{}
1425+
for _, d := range tt.mrchanges {
1426+
diffs = append(diffs, &gitlab.Diff{NewPath: d.NewPath, NewFile: d.NewFile, RenamedFile: d.RenamedFile, DeletedFile: d.DeletedFile})
1427+
}
1428+
jeez, err := json.Marshal(gitlab.Compare{Diffs: diffs})
13501429
assert.NilError(t, err)
13511430
_, _ = rw.Write(jeez)
13521431
})
@@ -1385,14 +1464,14 @@ func TestGetFiles(t *testing.T) {
13851464
}
13861465

13871466
// Check caching
1388-
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 1)
1467+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, tt.apiCallCount)
13891468
_, _ = providerInfo.GetFiles(ctx, tt.event)
13901469
if tt.wantError {
13911470
// No caching on error
1392-
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 2)
1471+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, tt.apiCallCount*2)
13931472
} else {
13941473
// Cache API results on success
1395-
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 1)
1474+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, tt.apiCallCount)
13961475
}
13971476
})
13981477
}
@@ -1428,10 +1507,23 @@ func TestGetFilesPaging(t *testing.T) {
14281507
fakeclient, mux, teardown := thelp.Setup(t)
14291508
defer teardown()
14301509

1510+
changeCount := 5
1511+
apiCallCount := changeCount
14311512
if tt.event.TriggerTarget == "pull_request" {
1513+
// Extra request made to check the diff API limit
1514+
apiCallCount++
1515+
}
1516+
1517+
if tt.event.TriggerTarget == "pull_request" {
1518+
mux.HandleFunc(fmt.Sprintf("/projects/0/merge_requests/%d",
1519+
tt.event.PullRequestNumber), func(rw http.ResponseWriter, _ *http.Request) {
1520+
jeez, err := json.Marshal(gitlab.MergeRequest{ChangesCount: strconv.Itoa(changeCount)})
1521+
assert.NilError(t, err)
1522+
_, _ = rw.Write(jeez)
1523+
})
14321524
mux.HandleFunc(fmt.Sprintf("/projects/0/merge_requests/%d/diffs",
14331525
tt.event.PullRequestNumber), func(rw http.ResponseWriter, req *http.Request) {
1434-
pageCount := thelp.SetPagingHeader(t, rw, req, 5)
1526+
pageCount := thelp.SetPagingHeader(t, rw, req, changeCount)
14351527
jeez, err := json.Marshal([]*gitlab.MergeRequestDiff{{NewPath: fmt.Sprintf("change-%d.txt", pageCount)}})
14361528
assert.NilError(t, err)
14371529
_, _ = rw.Write(jeez)
@@ -1440,7 +1532,7 @@ func TestGetFilesPaging(t *testing.T) {
14401532
if tt.event.TriggerTarget == "push" {
14411533
mux.HandleFunc(fmt.Sprintf("/projects/0/repository/commits/%s/diff",
14421534
tt.event.SHA), func(rw http.ResponseWriter, req *http.Request) {
1443-
pageCount := thelp.SetPagingHeader(t, rw, req, 5)
1535+
pageCount := thelp.SetPagingHeader(t, rw, req, changeCount)
14441536
jeez, err := json.Marshal([]*gitlab.Diff{{NewPath: fmt.Sprintf("change-%d.txt", pageCount)}})
14451537
assert.NilError(t, err)
14461538
_, _ = rw.Write(jeez)
@@ -1454,12 +1546,12 @@ func TestGetFilesPaging(t *testing.T) {
14541546
changedFiles, err := providerInfo.GetFiles(ctx, tt.event)
14551547
assert.NilError(t, err, nil)
14561548
assert.DeepEqual(t, changedFiles.All, []string{"change-1.txt", "change-2.txt", "change-3.txt", "change-4.txt", "change-5.txt"})
1457-
assert.Equal(t, len(changedFiles.Modified), 5)
1549+
assert.Equal(t, len(changedFiles.Modified), changeCount)
14581550

14591551
// Check caching
1460-
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 5)
1552+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, int64(apiCallCount))
14611553
_, _ = providerInfo.GetFiles(ctx, tt.event)
1462-
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 5)
1554+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, int64(apiCallCount))
14631555
})
14641556
}
14651557
}

0 commit comments

Comments
 (0)