Skip to content

Commit f358c66

Browse files
knutwannhedenMBoegerstimtebeekgithub-actions[bot]
authored
Reduce the size of the match string in TextMatches data table (#4874)
* Reduce the size of the match string in `TextMatches` data table When a pattern is matched against a very long line, the result in the data table is now truncated to include at most 50 characters before and 50 characters after the match of that line. * Polish * make context size configurable * fix test setup * fix test setup of DeclarativeRecipeTest * Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * rework the truncate logic to be simpler --------- Co-authored-by: Merlin Bögershausen <merlin.boegershausen@rwth-aachen.de> Co-authored-by: Tim te Beek <tim@moderne.io> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 9c74556 commit f358c66

4 files changed

Lines changed: 166 additions & 23 deletions

File tree

rewrite-core/src/main/java/org/openrewrite/text/Find.java

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,14 @@ public String getDescription() {
9797
@Nullable
9898
Boolean description;
9999

100+
@Option(displayName = "Context size for Datatable",
101+
description = "The number of characters to include in the datatable before and after the match. Default `0`, " +
102+
"`-1` indicates that the whole text should be used.",
103+
required = false,
104+
example = "50")
105+
@Nullable
106+
Integer contextSize;
107+
100108
@Override
101109
public String getInstanceName() {
102110
return String.format("Find text `%s`", find);
@@ -106,6 +114,7 @@ public String getInstanceName() {
106114
public TreeVisitor<?, ExecutionContext> getVisitor() {
107115

108116
TreeVisitor<?, ExecutionContext> visitor = new TreeVisitor<Tree, ExecutionContext>() {
117+
109118
@Override
110119
public Tree visit(@Nullable Tree tree, ExecutionContext ctx) {
111120
SourceFile sourceFile = (SourceFile) requireNonNull(tree);
@@ -164,24 +173,41 @@ public Tree visit(@Nullable Tree tree, ExecutionContext ctx) {
164173
}
165174

166175
int startLine = lastNewLineIndex + 1;
167-
int endLine = rawText.indexOf('\n', matcher.end());
176+
int endLine = nextNewLineIndex > matcher.end() ? nextNewLineIndex : rawText.indexOf('\n', matcher.end());
168177
if (endLine == -1) {
169178
endLine = rawText.length();
170179
}
171180

172-
//noinspection StringBufferReplaceableByString
173-
textMatches.insertRow(ctx, new TextMatches.Row(
174-
sourceFilePath,
175-
new StringBuilder(endLine - startLine + 3)
176-
.append(rawText, startLine, matchStart)
177-
.append("~~>")
178-
.append(rawText, matchStart, endLine)
179-
.toString()
180-
));
181+
String context = truncateContext(endLine, startLine, matcher, matchStart, rawText);
182+
183+
textMatches.insertRow(ctx, new TextMatches.Row(sourceFilePath, context));
181184
} while (matcher.find());
182185
snippets.add(snippet(rawText.substring(previousEnd)));
183186
return plainText.withText("").withSnippets(snippets);
184187
}
188+
189+
private String truncateContext(int endLine, int startLine, Matcher matcher, int matchStart, String rawText) {
190+
String matchText = matcher.group();
191+
int contextLength = contextSize == null ? 0 : contextSize;
192+
int contextStart = contextLength == -1 ? startLine : matchStart - contextLength;
193+
int contextEnd = contextLength == -1 ? endLine : matchStart + matchText.length() + contextLength;
194+
195+
StringBuilder sb = new StringBuilder();
196+
197+
if (contextStart > startLine) {
198+
sb.append("...");
199+
}
200+
201+
sb.append(rawText, Math.max(contextStart, startLine), matchStart)
202+
.append("~~>")
203+
.append(rawText, matchStart, Math.min(contextEnd, endLine));
204+
205+
if (contextEnd < endLine) {
206+
sb.append("...");
207+
}
208+
209+
return sb.toString();
210+
}
185211
};
186212
if (filePattern != null) {
187213
visitor = Preconditions.check(new FindSourceFiles(filePattern), visitor);

rewrite-core/src/test/java/org/openrewrite/config/DeclarativeRecipeTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ void preconditionOnNestedDeclarative() {
336336
void exposesUnderlyingDataTables() {
337337
DeclarativeRecipe dr = new DeclarativeRecipe("org.openrewrite.DeclarativeDataTable", "declarative with data table",
338338
"test", emptySet(), null, URI.create("dummy"), true, Collections.emptyList());
339-
dr.addUninitialized(new Find("sam", null, null, null, null, null, null));
339+
dr.addUninitialized(new Find("sam", null, null, null, null, null, null, null));
340340
dr.initialize(List.of(), Map.of());
341341
assertThat(dr.getDataTableDescriptors()).anyMatch(it -> "org.openrewrite.table.TextMatches".equals(it.getName()));
342342
}

rewrite-core/src/test/java/org/openrewrite/rpc/RewriteRpcTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ void runRecipe() {
135135
.validateRecipeSerialization(false)
136136
.dataTable(TextMatches.Row.class, rows -> {
137137
assertThat(rows).contains(new TextMatches.Row(
138-
"hello.txt", "~~>Hello Jon!"));
138+
"hello.txt", "~~>Hello..."));
139139
latch.countDown();
140140
}),
141141
text(

rewrite-core/src/test/java/org/openrewrite/text/FindTest.java

Lines changed: 128 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class FindTest implements RewriteTest {
3030
@Test
3131
void regex() {
3232
rewriteRun(
33-
spec -> spec.recipe(new Find("[T\\s]", true, true, null, null, null, null)),
33+
spec -> spec.recipe(new Find("[T\\s]", true, true, null, null, null, null, null)),
3434
text(
3535
"""
3636
This is\ttext.
@@ -42,10 +42,30 @@ void regex() {
4242
);
4343
}
4444

45+
@Test
46+
void isFullMatch() {
47+
rewriteRun(
48+
spec -> spec.recipe(new Find("This is text.", true, true, null, null, null, null, null))
49+
.dataTable(TextMatches.Row.class, rows -> {
50+
assertThat(rows)
51+
.hasSize(1)
52+
.allSatisfy(r -> assertThat(r.getMatch()).isEqualTo("~~>This is text."));
53+
}),
54+
text(
55+
"""
56+
This is text.
57+
""",
58+
"""
59+
~~>This is text.
60+
"""
61+
)
62+
);
63+
}
64+
4565
@Test
4666
void dataTable() {
4767
rewriteRun(
48-
spec -> spec.recipe(new Find("text", null, null, null, null, null, null))
68+
spec -> spec.recipe(new Find("text", null, null, null, null, null, null, 50))
4969
.dataTable(TextMatches.Row.class, rows -> {
5070
assertThat(rows).hasSize(1);
5171
assertThat(rows.get(0).getMatch()).isEqualTo("This is ~~>text.");
@@ -68,7 +88,7 @@ void dataTable() {
6888
@Test
6989
void plainText() {
7090
rewriteRun(
71-
spec -> spec.recipe(new Find("\\s", null, null, null, null, null, null)),
91+
spec -> spec.recipe(new Find("\\s", null, null, null, null, null, null, 50)),
7292
text(
7393
"""
7494
This i\\s text.
@@ -83,7 +103,7 @@ void plainText() {
83103
@Test
84104
void caseInsensitive() {
85105
rewriteRun(
86-
spec -> spec.recipe(new Find("text", null, null, null, null, "**/foo/**;**/baz/**", null)),
106+
spec -> spec.recipe(new Find("text", null, null, null, null, "**/foo/**;**/baz/**", null, 50)),
87107
dir("foo",
88108
text(
89109
"""
@@ -115,7 +135,7 @@ void caseInsensitive() {
115135
@Test
116136
void regexBasicMultiLine() {
117137
rewriteRun(
118-
spec -> spec.recipe(new Find("[T\\s]", true, true, true, null, null, null)),
138+
spec -> spec.recipe(new Find("[T\\s]", true, true, true, null, null, null, 50)),
119139
text(
120140
"""
121141
This is\ttext.
@@ -132,7 +152,7 @@ void regexBasicMultiLine() {
132152
@Test
133153
void regexWithoutMultilineAndDotall() {
134154
rewriteRun(
135-
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, false, null, null)),
155+
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, false, null, null, 50)),
136156
text(
137157
"""
138158
This is text.
@@ -148,7 +168,7 @@ void regexWithoutMultilineAndDotall() {
148168
@Test
149169
void regexMatchingWhitespaceWithoutMultilineWithDotall() {
150170
rewriteRun(
151-
spec -> spec.recipe(new Find("One.Two$", true, true, false, true, null, null)),
171+
spec -> spec.recipe(new Find("One.Two$", true, true, false, true, null, null, 50)),
152172
//language=csv
153173
text( // the `.` above matches the space character on the same line
154174
"""
@@ -163,7 +183,7 @@ void regexMatchingWhitespaceWithoutMultilineWithDotall() {
163183
@Test
164184
void regexWithoutMultilineAndWithDotAll() {
165185
rewriteRun(
166-
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, true, null, null)),
186+
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, false, true, null, null, 50)),
167187
text(
168188
"""
169189
This is text.
@@ -186,7 +206,7 @@ void regexWithoutMultilineAndWithDotAll() {
186206
@Test
187207
void regexWithMultilineAndWithoutDotall() {
188208
rewriteRun(
189-
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, false, null, null)),
209+
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, false, null, null, 50)),
190210
text(
191211
"""
192212
This is text.
@@ -209,7 +229,7 @@ void regexWithMultilineAndWithoutDotall() {
209229
@Test
210230
void regexWithBothMultilineAndDotAll() {
211231
rewriteRun(
212-
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, true, null, null)),
232+
spec -> spec.recipe(new Find("^This.*below\\.$", true, true, true, true, null, null, 50)),
213233
text(
214234
"""
215235
The first line.
@@ -232,7 +252,7 @@ void regexWithBothMultilineAndDotAll() {
232252
@Test
233253
void description() {
234254
rewriteRun(
235-
spec -> spec.recipe(new Find("text", null, null, null, null, null, true)),
255+
spec -> spec.recipe(new Find("text", null, null, null, null, null, true, 50)),
236256
text(
237257
"""
238258
This is text.
@@ -243,4 +263,101 @@ void description() {
243263
)
244264
);
245265
}
266+
267+
@Test
268+
void longLine() {
269+
rewriteRun(
270+
spec -> spec.recipe(new Find("very", null, null, null, null, null, null, 50))
271+
.dataTable(TextMatches.Row.class, rows -> {
272+
assertThat(rows).hasSize(18);
273+
assertThat(rows).satisfiesExactly(
274+
r -> assertThat(r.getMatch()).isEqualTo("This is a ~~>very, very, very, very, very, very, very, very, very, ..."),
275+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, ~~>very, very, very, very, very, very, very, very, very, ..."),
276+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, ~~>very, very, very, very, very, very, very, very, very, ..."),
277+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, ~~>very, very, very, very, very, very, very, very, very, ..."),
278+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, ..."),
279+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, ..."),
280+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, ..."),
281+
r -> assertThat(r.getMatch()).isEqualTo("...is is a very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, ..."),
282+
r -> assertThat(r.getMatch()).isEqualTo("...a very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, ..."),
283+
r -> assertThat(r.getMatch()).isEqualTo("..., very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very l..."),
284+
r -> assertThat(r.getMatch()).isEqualTo("..., very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very long li..."),
285+
r -> assertThat(r.getMatch()).isEqualTo("..., very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very long line."),
286+
r -> assertThat(r.getMatch()).isEqualTo("..., very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very long line."),
287+
r -> assertThat(r.getMatch()).isEqualTo("..., very, very, very, very, very, very, very, very, ~~>very, very, very, very, very long line."),
288+
r -> assertThat(r.getMatch()).isEqualTo("..., very, very, very, very, very, very, very, very, ~~>very, very, very, very long line."),
289+
r -> assertThat(r.getMatch()).isEqualTo("..., very, very, very, very, very, very, very, very, ~~>very, very, very long line."),
290+
r -> assertThat(r.getMatch()).isEqualTo("..., very, very, very, very, very, very, very, very, ~~>very, very long line."),
291+
r -> assertThat(r.getMatch()).isEqualTo("..., very, very, very, very, very, very, very, very, ~~>very long line.")
292+
);
293+
}),
294+
text(
295+
"""
296+
This is a very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very long line.
297+
""",
298+
"""
299+
This is a ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very long line.
300+
"""
301+
)
302+
);
303+
}
304+
305+
@Test
306+
void justMatch() {
307+
rewriteRun(
308+
spec -> spec.recipe(new Find("very", null, null, null, null, null, null, null))
309+
.dataTable(TextMatches.Row.class, rows -> {
310+
assertThat(rows).hasSize(18);
311+
assertThat(rows).allSatisfy(
312+
r -> assertThat(r.getMatch()).isEqualTo("...~~>very...")
313+
);
314+
}),
315+
text(
316+
"""
317+
This is a very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very long line.
318+
""",
319+
"""
320+
This is a ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very long line.
321+
"""
322+
)
323+
);
324+
}
325+
326+
@Test
327+
void noTruncate() {
328+
rewriteRun(
329+
spec -> spec.recipe(new Find("very", null, null, null, null, null, null, -1))
330+
.dataTable(TextMatches.Row.class, rows -> {
331+
assertThat(rows).hasSize(18);
332+
assertThat(rows).satisfiesExactly(
333+
r -> assertThat(r.getMatch()).isEqualTo("This is a ~~>very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very long line."),
334+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, ~~>very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very long line."),
335+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, ~~>very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very long line."),
336+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, ~~>very, very, very, very, very, very, very, very, very, very, very, very, very, very, very long line."),
337+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, very, very, very, very, very long line."),
338+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, very, very, very, very long line."),
339+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, very, very, very long line."),
340+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, very, very long line."),
341+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very, very long line."),
342+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very, very long line."),
343+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very, very long line."),
344+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very, very long line."),
345+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, very, very, very, very, ~~>very, very, very, very, very, very long line."),
346+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, very, very, very, very, very, ~~>very, very, very, very, very long line."),
347+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, very, very, very, very, very, very, ~~>very, very, very, very long line."),
348+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, ~~>very, very, very long line."),
349+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, ~~>very, very long line."),
350+
r -> assertThat(r.getMatch()).isEqualTo("This is a very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, ~~>very long line.")
351+
);
352+
}),
353+
text(
354+
"""
355+
This is a very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very, very long line.
356+
""",
357+
"""
358+
This is a ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very, ~~>very long line.
359+
"""
360+
)
361+
);
362+
}
246363
}

0 commit comments

Comments
 (0)