Skip to content

Commit 6ebdcc0

Browse files
authored
Make strict dropCR behavior optional via ParseOptions (#78)
1 parent 005b930 commit 6ebdcc0

6 files changed

Lines changed: 336 additions & 20 deletions

File tree

diff/diff.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,18 @@ import (
55
"time"
66
)
77

8+
// ParseOptions specifies options for parsing diffs.
9+
type ParseOptions struct {
10+
// KeepCR specifies whether to keep trailing carriage return characters (\r) in lines.
11+
KeepCR bool
12+
}
13+
814
// A FileDiff represents a unified diff for a single file.
915
//
1016
// A file unified diff has a header that resembles the following:
1117
//
12-
// --- oldname 2009-10-11 15:12:20.000000000 -0700
13-
// +++ newname 2009-10-11 15:12:30.000000000 -0700
18+
// --- oldname 2009-10-11 15:12:20.000000000 -0700
19+
// +++ newname 2009-10-11 15:12:30.000000000 -0700
1420
type FileDiff struct {
1521
// the original name of the file
1622
OrigName string

diff/diff_test.go

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@ package diff
22

33
import (
44
"bytes"
5+
"github.com/google/go-cmp/cmp"
56
"io"
67
"io/ioutil"
78
"path/filepath"
89
"reflect"
910
"strings"
1011
"testing"
1112
"time"
12-
13-
"github.com/google/go-cmp/cmp"
1413
)
1514

1615
func unix(sec int64) *time.Time {
@@ -1123,3 +1122,99 @@ func TestFileDiff_Stat(t *testing.T) {
11231122
}
11241123
}
11251124
}
1125+
1126+
func TestParseMultiFileDiff_Comprehensive(t *testing.T) {
1127+
diffData, err := ioutil.ReadFile(filepath.Join("testdata", "sample_multi_file.diff"))
1128+
if err != nil {
1129+
t.Fatal(err)
1130+
}
1131+
1132+
fds, err := ParseMultiFileDiff(diffData)
1133+
if err != nil {
1134+
t.Fatalf("ParseMultiFileDiff failed: %v", err)
1135+
}
1136+
1137+
if len(fds) != 2 {
1138+
t.Fatalf("Expected 2 file diffs, got %d", len(fds))
1139+
}
1140+
1141+
// Verify first file
1142+
fd1 := fds[0]
1143+
if fd1.OrigName != "oldname1" || fd1.NewName != "newname1" {
1144+
t.Errorf("Unexpected names for file 1: %q -> %q", fd1.OrigName, fd1.NewName)
1145+
}
1146+
if len(fd1.Extended) != 3 {
1147+
t.Errorf("Expected 3 extended headers for file 1, got %d", len(fd1.Extended))
1148+
}
1149+
1150+
if len(fd1.Hunks) != 2 {
1151+
t.Fatalf("Expected 2 hunks for file 1, got %d", len(fd1.Hunks))
1152+
}
1153+
1154+
h1 := fd1.Hunks[0]
1155+
if h1.OrigStartLine != 1 || h1.OrigLines != 3 || h1.NewStartLine != 1 || h1.NewLines != 9 {
1156+
t.Errorf("Unexpected dimensions for file 1, hunk 1: Orig(%d, %d) New(%d, %d)", h1.OrigStartLine, h1.OrigLines, h1.NewStartLine, h1.NewLines)
1157+
}
1158+
1159+
h2 := fd1.Hunks[1]
1160+
if h2.OrigStartLine != 5 || h2.OrigLines != 16 || h2.NewStartLine != 11 || h2.NewLines != 10 {
1161+
t.Errorf("Unexpected dimensions for file 1, hunk 2: Orig(%d, %d) New(%d, %d)", h2.OrigStartLine, h2.OrigLines, h2.NewStartLine, h2.NewLines)
1162+
}
1163+
1164+
// Verify second file
1165+
fd2 := fds[1]
1166+
if fd2.OrigName != "oldname2" || fd2.NewName != "newname2" {
1167+
t.Errorf("Unexpected names for file 2: %q -> %q", fd2.OrigName, fd2.NewName)
1168+
}
1169+
if len(fd2.Hunks) != 2 {
1170+
t.Fatalf("Expected 2 hunks for file 2, got %d", len(fd2.Hunks))
1171+
}
1172+
1173+
h3 := fd2.Hunks[0]
1174+
if h3.OrigStartLine != 1 || h3.OrigLines != 3 || h3.NewStartLine != 1 || h3.NewLines != 9 {
1175+
t.Errorf("Unexpected dimensions for file 2, hunk 1: Orig(%d, %d) New(%d, %d)", h3.OrigStartLine, h3.OrigLines, h3.NewStartLine, h3.NewLines)
1176+
}
1177+
}
1178+
1179+
func TestParseMultiFileDiff_KeepCR_E2E(t *testing.T) {
1180+
// A full multi-file diff with CRLF line endings
1181+
input := "--- file1\r\n" +
1182+
"+++ file1\r\n" +
1183+
"@@ -1,3 +1,3 @@\r\n" +
1184+
" line1\r\n" +
1185+
"-line2\r\n" +
1186+
"+line2_new\r\n" +
1187+
" line3\r\n" +
1188+
"diff --git a/file2 b/file2\r\n" +
1189+
"new file mode 100644\r\n" +
1190+
"index 0000000..e69de29\r\n"
1191+
1192+
opts := ParseOptions{KeepCR: true}
1193+
fds, err := ParseMultiFileDiffOptions([]byte(input), opts)
1194+
if err != nil {
1195+
t.Fatalf("ParseMultiFileDiffOptions failed: %v", err)
1196+
}
1197+
1198+
if len(fds) != 2 {
1199+
t.Fatalf("Expected 2 file diffs, got %d", len(fds))
1200+
}
1201+
1202+
// File 1 Verify Hunks contain \r
1203+
fd1 := fds[0]
1204+
if len(fd1.Hunks) != 1 {
1205+
t.Fatalf("Expected 1 hunk for file 1, got %d", len(fd1.Hunks))
1206+
}
1207+
h1 := fd1.Hunks[0]
1208+
if !strings.Contains(string(h1.Body), "\r\n") {
1209+
t.Errorf("Expected Hunk body to contain CRLF, got: %q", string(h1.Body))
1210+
}
1211+
1212+
// File 2 Verify filename is NOT "file2\r"
1213+
fd2 := fds[1]
1214+
if fd2.NewName != "b/file2" {
1215+
t.Errorf("Expected NewName 'b/file2', got %q", fd2.NewName)
1216+
}
1217+
if len(fd2.Extended) != 3 {
1218+
t.Errorf("Expected 3 extended headers for file 2, got %d", len(fd2.Extended))
1219+
}
1220+
}

diff/parse.go

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package diff
22

33
import (
4-
"bufio"
54
"bytes"
65
"errors"
76
"fmt"
@@ -17,13 +16,24 @@ import (
1716
// case of per-file errors. If it cannot detect when the diff of the next file
1817
// begins, the hunks are added to the FileDiff of the previous file.
1918
func ParseMultiFileDiff(diff []byte) ([]*FileDiff, error) {
20-
return NewMultiFileDiffReader(bytes.NewReader(diff)).ReadAllFiles()
19+
return ParseMultiFileDiffOptions(diff, ParseOptions{})
20+
}
21+
22+
// ParseMultiFileDiffOptions parses a multi-file unified diff with the given options.
23+
func ParseMultiFileDiffOptions(diff []byte, opts ParseOptions) ([]*FileDiff, error) {
24+
return NewMultiFileDiffReaderOptions(bytes.NewReader(diff), opts).ReadAllFiles()
2125
}
2226

2327
// NewMultiFileDiffReader returns a new MultiFileDiffReader that reads
2428
// a multi-file unified diff from r.
2529
func NewMultiFileDiffReader(r io.Reader) *MultiFileDiffReader {
26-
return &MultiFileDiffReader{reader: newLineReader(r)}
30+
return NewMultiFileDiffReaderOptions(r, ParseOptions{})
31+
}
32+
33+
// NewMultiFileDiffReaderOptions returns a new MultiFileDiffReader that reads
34+
// a multi-file unified diff from r with the given options.
35+
func NewMultiFileDiffReaderOptions(r io.Reader, opts ParseOptions) *MultiFileDiffReader {
36+
return &MultiFileDiffReader{reader: newLineReaderOptions(r, opts)}
2737
}
2838

2939
// MultiFileDiffReader reads a multi-file unified diff.
@@ -153,13 +163,24 @@ func (r *MultiFileDiffReader) ReadAllFiles() ([]*FileDiff, error) {
153163

154164
// ParseFileDiff parses a file unified diff.
155165
func ParseFileDiff(diff []byte) (*FileDiff, error) {
156-
return NewFileDiffReader(bytes.NewReader(diff)).Read()
166+
return ParseFileDiffOptions(diff, ParseOptions{})
167+
}
168+
169+
// ParseFileDiffOptions parses a file unified diff with the given options.
170+
func ParseFileDiffOptions(diff []byte, opts ParseOptions) (*FileDiff, error) {
171+
return NewFileDiffReaderOptions(bytes.NewReader(diff), opts).Read()
157172
}
158173

159174
// NewFileDiffReader returns a new FileDiffReader that reads a file
160175
// unified diff.
161176
func NewFileDiffReader(r io.Reader) *FileDiffReader {
162-
return &FileDiffReader{reader: &lineReader{reader: bufio.NewReader(r)}}
177+
return NewFileDiffReaderOptions(r, ParseOptions{})
178+
}
179+
180+
// NewFileDiffReaderOptions returns a new FileDiffReader that reads a file
181+
// unified diff with the given options.
182+
func NewFileDiffReaderOptions(r io.Reader, opts ParseOptions) *FileDiffReader {
183+
return &FileDiffReader{reader: newLineReaderOptions(r, opts)}
163184
}
164185

165186
// FileDiffReader reads a unified file diff.
@@ -405,6 +426,7 @@ func readQuotedFilename(text string) (value string, remainder string, err error)
405426
// valid syntax, it may be impossible to extract filenames; if so, the
406427
// function returns ("", "", true).
407428
func parseDiffGitArgs(diffArgs string) (string, string, bool) {
429+
diffArgs = strings.TrimSuffix(diffArgs, "\r")
408430
length := len(diffArgs)
409431
if length < 3 {
410432
return "", "", false
@@ -540,6 +562,7 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
540562
return
541563
}
542564
rawFilename := header[len(prefix):]
565+
rawFilename = strings.TrimSuffix(rawFilename, "\r")
543566

544567
// extract the filename prefix (e.g. "a/") from the 'diff --git' line.
545568
var prefixLetterIndex int
@@ -586,7 +609,12 @@ var (
586609
// only of hunks and not include a file header; if it has a file
587610
// header, use ParseFileDiff.
588611
func ParseHunks(diff []byte) ([]*Hunk, error) {
589-
r := NewHunksReader(bytes.NewReader(diff))
612+
return ParseHunksOptions(diff, ParseOptions{})
613+
}
614+
615+
// ParseHunksOptions parses hunks from a unified diff with the given options.
616+
func ParseHunksOptions(diff []byte, opts ParseOptions) ([]*Hunk, error) {
617+
r := NewHunksReaderOptions(bytes.NewReader(diff), opts)
590618
hunks, err := r.ReadAllHunks()
591619
if err != nil {
592620
return nil, err
@@ -597,7 +625,13 @@ func ParseHunks(diff []byte) ([]*Hunk, error) {
597625
// NewHunksReader returns a new HunksReader that reads unified diff hunks
598626
// from r.
599627
func NewHunksReader(r io.Reader) *HunksReader {
600-
return &HunksReader{reader: &lineReader{reader: bufio.NewReader(r)}}
628+
return NewHunksReaderOptions(r, ParseOptions{})
629+
}
630+
631+
// NewHunksReaderOptions returns a new HunksReader that reads unified diff hunks
632+
// from r with the given options.
633+
func NewHunksReaderOptions(r io.Reader, opts ParseOptions) *HunksReader {
634+
return &HunksReader{reader: newLineReaderOptions(r, opts)}
601635
}
602636

603637
// A HunksReader reads hunks from a unified diff.
@@ -701,7 +735,7 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) {
701735
// handle that case.
702736
return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}}
703737
}
704-
if bytes.Equal(line, []byte(noNewlineMessage)) {
738+
if bytes.Equal(bytes.TrimSuffix(line, []byte("\r")), []byte(noNewlineMessage)) {
705739
if lastLineFromOrig {
706740
// Retain the newline in the body (otherwise the
707741
// diff line would be like "-a+b", where "+b" is
@@ -755,6 +789,7 @@ func linePrefix(c byte) bool {
755789
// if its value is 1. normalizeHeader returns an error if the header
756790
// is not in the correct format.
757791
func normalizeHeader(header string) (string, string, error) {
792+
header = strings.TrimSuffix(header, "\r")
758793
// Split the header into five parts: the first '@@', the two
759794
// ranges, the last '@@', and the optional section.
760795
pieces := strings.SplitN(header, " ", 5)
@@ -815,7 +850,8 @@ func parseOnlyInMessage(line []byte) (bool, []byte, []byte) {
815850
if idx < 0 {
816851
return false, nil, nil
817852
}
818-
return true, line[:idx], line[idx+2:]
853+
filename := bytes.TrimSuffix(line[idx+2:], []byte("\r"))
854+
return true, line[:idx], filename
819855
}
820856

821857
// A ParseError is a description of a unified diff syntax error.

diff/reader_util.go

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,29 @@ func newLineReader(r io.Reader) *lineReader {
1313
return &lineReader{reader: bufio.NewReader(r)}
1414
}
1515

16+
func newLineReaderOptions(r io.Reader, opts ParseOptions) *lineReader {
17+
return &lineReader{
18+
reader: bufio.NewReader(r),
19+
keepCR: opts.KeepCR,
20+
}
21+
}
22+
1623
// lineReader is a wrapper around a bufio.Reader that caches the next line to
1724
// provide lookahead functionality for the next two lines.
1825
type lineReader struct {
1926
reader *bufio.Reader
2027

2128
cachedNextLine []byte
2229
cachedNextLineErr error
30+
31+
keepCR bool
2332
}
2433

2534
// readLine returns the next unconsumed line and advances the internal cache of
2635
// the lineReader.
2736
func (l *lineReader) readLine() ([]byte, error) {
2837
if l.cachedNextLine == nil && l.cachedNextLineErr == nil {
29-
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
38+
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR)
3039
}
3140

3241
if l.cachedNextLineErr != nil {
@@ -35,7 +44,7 @@ func (l *lineReader) readLine() ([]byte, error) {
3544

3645
next := l.cachedNextLine
3746

38-
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
47+
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR)
3948

4049
return next, nil
4150
}
@@ -47,7 +56,7 @@ func (l *lineReader) readLine() ([]byte, error) {
4756
// be used when at the end of the file.
4857
func (l *lineReader) nextLineStartsWith(prefix string) (bool, error) {
4958
if l.cachedNextLine == nil && l.cachedNextLineErr == nil {
50-
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
59+
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR)
5160
}
5261

5362
return l.lineHasPrefix(l.cachedNextLine, prefix, l.cachedNextLineErr)
@@ -64,7 +73,7 @@ func (l *lineReader) nextLineStartsWith(prefix string) (bool, error) {
6473
// returned.
6574
func (l *lineReader) nextNextLineStartsWith(prefix string) (bool, error) {
6675
if l.cachedNextLine == nil && l.cachedNextLineErr == nil {
67-
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
76+
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR)
6877
}
6978

7079
next, err := l.reader.Peek(len(prefix))
@@ -93,7 +102,7 @@ func (l *lineReader) lineHasPrefix(line []byte, prefix string, readErr error) (b
93102
// the next line in the Reader with the trailing newline stripped. It will return an
94103
// io.EOF error when there is nothing left to read (at the start of the function call). It
95104
// will return any other errors it receives from the underlying call to ReadBytes.
96-
func readLine(r *bufio.Reader) ([]byte, error) {
105+
func readLine(r *bufio.Reader, keepCR bool) ([]byte, error) {
97106
line_, err := r.ReadBytes('\n')
98107
if err == io.EOF {
99108
if len(line_) == 0 {
@@ -103,12 +112,18 @@ func readLine(r *bufio.Reader) ([]byte, error) {
103112
// ReadBytes returned io.EOF, because it didn't find another newline, but there is
104113
// still the remainder of the file to return as a line.
105114
line := line_
115+
if !keepCR {
116+
return dropCR(line), nil
117+
}
106118
return line, nil
107119
} else if err != nil {
108120
return nil, err
109121
}
110122
line := line_[0 : len(line_)-1]
111-
return dropCR(line), nil
123+
if !keepCR {
124+
return dropCR(line), nil
125+
}
126+
return line, nil
112127
}
113128

114129
// dropCR drops a terminal \r from the data.

diff/reader_util_test.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ index 0000000..3be2928`,
5151
in := bufio.NewReader(strings.NewReader(test.input))
5252
out := []string{}
5353
for {
54-
l, err := readLine(in)
54+
l, err := readLine(in, false)
5555
if err == io.EOF {
5656
break
5757
}
@@ -207,3 +207,24 @@ ccc rest of line`
207207

208208
}
209209
}
210+
211+
func TestReadLine_KeepCR(t *testing.T) {
212+
input := "line1\r\nline2\r\n"
213+
in := bufio.NewReader(strings.NewReader(input))
214+
215+
l, err := readLine(in, true)
216+
if err != nil {
217+
t.Fatal(err)
218+
}
219+
if string(l) != "line1\r" {
220+
t.Errorf("expected line1\\r, got %q", string(l))
221+
}
222+
223+
l, err = readLine(in, true)
224+
if err != nil {
225+
t.Fatal(err)
226+
}
227+
if string(l) != "line2\r" {
228+
t.Errorf("expected line2\\r, got %q", string(l))
229+
}
230+
}

0 commit comments

Comments
 (0)