|
| 1 | +From a8afee1089ec2ae9ab5837b438d07338aefb3bc4 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Samuel Attard <sam@electronjs.org> |
| 3 | +Date: Wed, 22 Apr 2026 16:27:51 -0700 |
| 4 | +Subject: [PATCH] siso: retry transient ERROR_INVALID_PARAMETER when opening |
| 5 | + ninja files on Windows |
| 6 | + |
| 7 | +ManifestParser.Load fans out across all subninja files (~90k in a |
| 8 | +Chromium build) at NumCPU parallelism. On Windows builders where out/ |
| 9 | +is served through a filesystem filter driver (e.g. bindflt/wcifs for |
| 10 | +container bind mounts), CreateFileW can intermittently return |
| 11 | +ERROR_INVALID_PARAMETER under this concurrent open burst. The previous |
| 12 | +patch removes the redundant per-chunk re-open, but the single remaining |
| 13 | +open per file can still hit the race; without a retry a single transient |
| 14 | +failure aborts the entire manifest load. |
| 15 | + |
| 16 | +Wrap the remaining os.Open call in readFile in a small Windows-only |
| 17 | +retry for ERROR_INVALID_PARAMETER (5 attempts, 5-80ms backoff). Each |
| 18 | +retry is logged via clog.Warningf and also written to stderr so it is |
| 19 | +visible in CI step output where glog warnings are file-only by default. |
| 20 | +Other platforms keep the direct os.Open path. |
| 21 | +--- |
| 22 | + siso/toolsupport/ninjautil/file_parser.go | 3 +- |
| 23 | + siso/toolsupport/ninjautil/openfile_other.go | 18 +++++++ |
| 24 | + .../toolsupport/ninjautil/openfile_windows.go | 50 +++++++++++++++++++ |
| 25 | + 3 files changed, 69 insertions(+), 2 deletions(-) |
| 26 | + create mode 100644 siso/toolsupport/ninjautil/openfile_other.go |
| 27 | + create mode 100644 siso/toolsupport/ninjautil/openfile_windows.go |
| 28 | + |
| 29 | +diff --git a/siso/toolsupport/ninjautil/file_parser.go b/siso/toolsupport/ninjautil/file_parser.go |
| 30 | +index 6311666..324528d 100644 |
| 31 | +--- a/siso/toolsupport/ninjautil/file_parser.go |
| 32 | ++++ b/siso/toolsupport/ninjautil/file_parser.go |
| 33 | +@@ -7,7 +7,6 @@ package ninjautil |
| 34 | + import ( |
| 35 | + "context" |
| 36 | + "fmt" |
| 37 | +- "os" |
| 38 | + "runtime/trace" |
| 39 | + "sync" |
| 40 | + "time" |
| 41 | +@@ -91,7 +90,7 @@ func (p *fileParser) parseFile(ctx context.Context, fname string) error { |
| 42 | + // readFile reads a file of fname in parallel. |
| 43 | + func (p *fileParser) readFile(ctx context.Context, fname string) ([]byte, error) { |
| 44 | + defer trace.StartRegion(ctx, "ninja.read").End() |
| 45 | +- f, err := os.Open(fname) |
| 46 | ++ f, err := openFile(ctx, fname) |
| 47 | + if err != nil { |
| 48 | + return nil, err |
| 49 | + } |
| 50 | +diff --git a/siso/toolsupport/ninjautil/openfile_other.go b/siso/toolsupport/ninjautil/openfile_other.go |
| 51 | +new file mode 100644 |
| 52 | +index 0000000..9fca690 |
| 53 | +--- /dev/null |
| 54 | ++++ b/siso/toolsupport/ninjautil/openfile_other.go |
| 55 | +@@ -0,0 +1,18 @@ |
| 56 | ++// Copyright 2026 The Chromium Authors |
| 57 | ++// Use of this source code is governed by a BSD-style license that can be |
| 58 | ++// found in the LICENSE file. |
| 59 | ++ |
| 60 | ++//go:build !windows |
| 61 | ++ |
| 62 | ++package ninjautil |
| 63 | ++ |
| 64 | ++import ( |
| 65 | ++ "context" |
| 66 | ++ "os" |
| 67 | ++) |
| 68 | ++ |
| 69 | ++// openFile opens fname for reading. |
| 70 | ++// See openfile_windows.go for the Windows variant with transient-error retry. |
| 71 | ++func openFile(ctx context.Context, fname string) (*os.File, error) { |
| 72 | ++ return os.Open(fname) |
| 73 | ++} |
| 74 | +diff --git a/siso/toolsupport/ninjautil/openfile_windows.go b/siso/toolsupport/ninjautil/openfile_windows.go |
| 75 | +new file mode 100644 |
| 76 | +index 0000000..f9d8e9d |
| 77 | +--- /dev/null |
| 78 | ++++ b/siso/toolsupport/ninjautil/openfile_windows.go |
| 79 | +@@ -0,0 +1,50 @@ |
| 80 | ++// Copyright 2026 The Chromium Authors |
| 81 | ++// Use of this source code is governed by a BSD-style license that can be |
| 82 | ++// found in the LICENSE file. |
| 83 | ++ |
| 84 | ++//go:build windows |
| 85 | ++ |
| 86 | ++package ninjautil |
| 87 | ++ |
| 88 | ++import ( |
| 89 | ++ "context" |
| 90 | ++ "errors" |
| 91 | ++ "fmt" |
| 92 | ++ "os" |
| 93 | ++ "time" |
| 94 | ++ |
| 95 | ++ "golang.org/x/sys/windows" |
| 96 | ++ |
| 97 | ++ "go.chromium.org/build/siso/o11y/clog" |
| 98 | ++) |
| 99 | ++ |
| 100 | ++// openFile opens fname for reading, retrying transient |
| 101 | ++// ERROR_INVALID_PARAMETER failures. |
| 102 | ++// |
| 103 | ++// On Windows, CreateFileW can intermittently return |
| 104 | ++// ERROR_INVALID_PARAMETER when the target lives behind a filesystem |
| 105 | ++// filter driver (e.g. bindflt/wcifs for container bind mounts) under |
| 106 | ++// highly concurrent opens. loadFile fans out across ~90k subninja |
| 107 | ++// files at NumCPU parallelism, so a single transient failure would |
| 108 | ++// otherwise abort the whole manifest load. |
| 109 | ++func openFile(ctx context.Context, fname string) (*os.File, error) { |
| 110 | ++ const maxAttempts = 5 |
| 111 | ++ delay := 5 * time.Millisecond |
| 112 | ++ for i := 0; ; i++ { |
| 113 | ++ f, err := os.Open(fname) |
| 114 | ++ if err == nil { |
| 115 | ++ return f, nil |
| 116 | ++ } |
| 117 | ++ if i+1 >= maxAttempts || !errors.Is(err, windows.ERROR_INVALID_PARAMETER) { |
| 118 | ++ return nil, err |
| 119 | ++ } |
| 120 | ++ clog.Warningf(ctx, "open %s: %v; retrying (%d/%d) after %s", fname, err, i+1, maxAttempts, delay) |
| 121 | ++ fmt.Fprintf(os.Stderr, "siso: open %s: %v; retrying (%d/%d) after %s\n", fname, err, i+1, maxAttempts, delay) |
| 122 | ++ select { |
| 123 | ++ case <-time.After(delay): |
| 124 | ++ case <-ctx.Done(): |
| 125 | ++ return nil, context.Cause(ctx) |
| 126 | ++ } |
| 127 | ++ delay *= 2 |
| 128 | ++ } |
| 129 | ++} |
| 130 | +-- |
| 131 | +2.53.0 |
| 132 | + |
0 commit comments