Skip to content

Commit 01ddf6c

Browse files
authored
pkg/importsdk: fix wildcard generation for subdir csv files (pingcap#67472)
close pingcap#67471
1 parent d66a662 commit 01ddf6c

File tree

2 files changed

+100
-3
lines changed

2 files changed

+100
-3
lines changed

pkg/importsdk/pattern.go

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,18 +158,61 @@ func longestCommonSuffix(strs []string, prefixLen int) string {
158158
return suffix
159159
}
160160

161-
// generatePrefixSuffixPattern returns a wildcard pattern that matches all and only the given paths
162-
// by finding the longest common prefix and suffix among them, and placing a '*' wildcard in between.
163-
func generatePrefixSuffixPattern(paths []string) string {
161+
func generateFlatPrefixSuffixPattern(paths []string) string {
164162
if len(paths) == 0 {
165163
return ""
166164
}
167165
if len(paths) == 1 {
168166
return paths[0]
169167
}
168+
allSame := true
169+
for _, p := range paths[1:] {
170+
if p != paths[0] {
171+
allSame = false
172+
break
173+
}
174+
}
175+
if allSame {
176+
return paths[0]
177+
}
170178

171179
prefix := longestCommonPrefix(paths)
172180
suffix := longestCommonSuffix(paths, len(prefix))
173181

174182
return prefix + "*" + suffix
175183
}
184+
185+
// generatePrefixSuffixPattern returns a wildcard pattern that matches all and only the given paths.
186+
// When all paths have the same number of '/'-separated components, it generates the wildcard
187+
// component by component so every '*' stays within a single path segment, which is required by
188+
// filepath.Match.
189+
func generatePrefixSuffixPattern(paths []string) string {
190+
if len(paths) <= 1 {
191+
return generateFlatPrefixSuffixPattern(paths)
192+
}
193+
194+
componentCount := -1
195+
pathComponents := make([][]string, 0, len(paths))
196+
for _, p := range paths {
197+
components := strings.Split(p, "/")
198+
if componentCount == -1 {
199+
componentCount = len(components)
200+
} else if len(components) != componentCount {
201+
return generateFlatPrefixSuffixPattern(paths)
202+
}
203+
pathComponents = append(pathComponents, components)
204+
}
205+
if componentCount <= 1 {
206+
return generateFlatPrefixSuffixPattern(paths)
207+
}
208+
209+
componentPatterns := make([]string, 0, componentCount)
210+
for componentIdx := range componentCount {
211+
componentValues := make([]string, 0, len(pathComponents))
212+
for _, components := range pathComponents {
213+
componentValues = append(componentValues, components[componentIdx])
214+
}
215+
componentPatterns = append(componentPatterns, generateFlatPrefixSuffixPattern(componentValues))
216+
}
217+
return strings.Join(componentPatterns, "/")
218+
}

pkg/importsdk/pattern_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,16 @@ func TestValidatePattern(t *testing.T) {
131131
// If pattern doesn't match our table's file, it's also invalid
132132
require.False(t, isValidPattern("*.csv", tableFiles, smallAll))
133133

134+
subdirTableFiles := map[string]struct{}{
135+
"dir/subdir1/a.csv": {},
136+
"dir/subdir2/b.csv": {},
137+
}
138+
subdirAll := map[string]mydump.FileInfo{
139+
"dir/subdir1/a.csv": {},
140+
"dir/subdir2/b.csv": {},
141+
}
142+
require.True(t, isValidPattern("dir/subdir*/*.csv", subdirTableFiles, subdirAll))
143+
134144
// empty pattern => invalid
135145
require.False(t, isValidPattern("", tableFiles, smallAll))
136146
}
@@ -205,4 +215,48 @@ func TestGenerateWildcardPath(t *testing.T) {
205215
_, err = generateWildcardPath(files4, allFiles4)
206216
require.Error(t, err)
207217
require.Contains(t, err.Error(), "cannot generate a unique wildcard pattern")
218+
219+
files5 := []mydump.FileInfo{
220+
{
221+
TableName: filter.Table{Schema: "db", Name: "tb"},
222+
FileMeta: mydump.SourceFileMeta{
223+
Path: "dir/subdir1/a.csv",
224+
Type: mydump.SourceTypeCSV,
225+
Compression: mydump.CompressionNone,
226+
},
227+
},
228+
{
229+
TableName: filter.Table{Schema: "db", Name: "tb"},
230+
FileMeta: mydump.SourceFileMeta{
231+
Path: "dir/subdir1/b.csv",
232+
Type: mydump.SourceTypeCSV,
233+
Compression: mydump.CompressionNone,
234+
},
235+
},
236+
{
237+
TableName: filter.Table{Schema: "db", Name: "tb"},
238+
FileMeta: mydump.SourceFileMeta{
239+
Path: "dir/subdir2/c.csv",
240+
Type: mydump.SourceTypeCSV,
241+
Compression: mydump.CompressionNone,
242+
},
243+
},
244+
{
245+
TableName: filter.Table{Schema: "db", Name: "tb"},
246+
FileMeta: mydump.SourceFileMeta{
247+
Path: "dir/subdir2/d.csv",
248+
Type: mydump.SourceTypeCSV,
249+
Compression: mydump.CompressionNone,
250+
},
251+
},
252+
}
253+
allFiles5 := map[string]mydump.FileInfo{
254+
files5[0].FileMeta.Path: files5[0],
255+
files5[1].FileMeta.Path: files5[1],
256+
files5[2].FileMeta.Path: files5[2],
257+
files5[3].FileMeta.Path: files5[3],
258+
}
259+
path5, err := generateWildcardPath(files5, allFiles5)
260+
require.NoError(t, err)
261+
require.Equal(t, "dir/subdir*/*.csv", path5)
208262
}

0 commit comments

Comments
 (0)