Skip to content

Commit fb04160

Browse files
authored
Add support for including keys based on regex patterns for select entries processor (opensearch-project#6094)
Signed-off-by: Taylor Gray <tylgry@amazon.com>
1 parent 4071ed0 commit fb04160

File tree

4 files changed

+272
-11
lines changed

4 files changed

+272
-11
lines changed

data-prepper-plugins/mutate-event-processors/src/main/java/org/opensearch/dataprepper/plugins/processor/mutateevent/SelectEntriesProcessor.java

Lines changed: 68 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,18 @@
1616
import org.opensearch.dataprepper.model.record.Record;
1717

1818
import java.util.Collection;
19+
import java.util.Collections;
1920
import java.util.HashMap;
2021
import java.util.List;
2122
import java.util.Map;
2223
import java.util.Objects;
24+
import java.util.regex.Pattern;
2325

2426
@DataPrepperPlugin(name = "select_entries", pluginType = Processor.class, pluginConfigurationType = SelectEntriesProcessorConfig.class)
2527
public class SelectEntriesProcessor extends AbstractProcessor<Record<Event>, Record<Event>> {
2628
private final List<String> keysToInclude;
29+
private final List<Pattern> includeKeysRegex;
30+
private final String includeKeysRegexPointer;
2731
private final String selectWhen;
2832

2933
private final ExpressionEvaluator expressionEvaluator;
@@ -39,6 +43,8 @@ public SelectEntriesProcessor(final PluginMetrics pluginMetrics, final SelectEnt
3943
"See https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/ for valid expression syntax.", selectWhen));
4044
}
4145
this.keysToInclude = config.getIncludeKeys();
46+
this.includeKeysRegex = config.getIncludeKeysRegex();
47+
this.includeKeysRegexPointer = config.getIncludeKeysRegexPointer();
4248
this.expressionEvaluator = expressionEvaluator;
4349
}
4450

@@ -52,19 +58,20 @@ public Collection<Record<Event>> doExecute(final Collection<Record<Event>> recor
5258
}
5359
// To handle nested case, just get the values and store
5460
// in a temporary map.
55-
Map<String, Object> outMap = new HashMap<>();
56-
for (String keyToInclude: keysToInclude) {
57-
Object value = recordEvent.get(keyToInclude, Object.class);
58-
if (value != null) {
59-
outMap.put(keyToInclude, value);
60-
}
61-
}
61+
Map<String, Object> outMap = getIncludeKeysOutputMap(recordEvent);
62+
63+
Map<String, Object> regexOutMap = getIncludeKeysRegexOutputMap(recordEvent);
64+
6265
recordEvent.clear();
6366

6467
// add back only the keys selected
6568
for (Map.Entry<String, Object> entry: outMap.entrySet()) {
6669
recordEvent.put(entry.getKey(), entry.getValue());
6770
}
71+
72+
for (Map.Entry<String, Object> entry: regexOutMap.entrySet()) {
73+
recordEvent.put(entry.getKey(), entry.getValue());
74+
}
6875
}
6976

7077
return records;
@@ -82,5 +89,59 @@ public boolean isReadyForShutdown() {
8289
@Override
8390
public void shutdown() {
8491
}
92+
93+
private Map<String, Object> getIncludeKeysOutputMap(final Event event) {
94+
Map<String, Object> outMap = new HashMap<>();
95+
if (keysToInclude != null) {
96+
for (String keyToInclude: keysToInclude) {
97+
Object value = event.get(keyToInclude, Object.class);
98+
if (value != null) {
99+
outMap.put(keyToInclude, value);
100+
}
101+
}
102+
}
103+
104+
return outMap;
105+
}
106+
107+
private Map<String, Object> getIncludeKeysRegexOutputMap(final Event event) {
108+
if (includeKeysRegex == null || includeKeysRegex.isEmpty()) {
109+
return Collections.emptyMap();
110+
}
111+
112+
final Map<String, Object> outputMap = new HashMap<>();
113+
114+
Map<String, Object> eventMap;
115+
116+
if (includeKeysRegexPointer != null && !includeKeysRegexPointer.equals("/")) {
117+
if (!event.containsKey(includeKeysRegexPointer)) {
118+
return Collections.emptyMap();
119+
}
120+
121+
eventMap = event.get(includeKeysRegexPointer, Map.class);
122+
} else {
123+
eventMap = event.toMap();
124+
}
125+
126+
for (final Map.Entry<String, Object> entry : eventMap.entrySet()) {
127+
if (keysToInclude != null && keysToInclude.contains(entry.getKey())) {
128+
continue;
129+
}
130+
131+
for (final Pattern includeKeyRegex : includeKeysRegex) {
132+
if (includeKeyRegex.matcher(entry.getKey()).matches()) {
133+
final String fullKey = getFullKey(entry.getKey(), includeKeysRegexPointer);
134+
outputMap.put(fullKey, entry.getValue());
135+
break;
136+
}
137+
}
138+
}
139+
140+
return outputMap;
141+
}
142+
143+
private String getFullKey(final String key, final String includeKeysRegexPointer) {
144+
return includeKeysRegexPointer != null ? includeKeysRegexPointer + "/" + key : key;
145+
}
85146
}
86147

data-prepper-plugins/mutate-event-processors/src/main/java/org/opensearch/dataprepper/plugins/processor/mutateevent/SelectEntriesProcessorConfig.java

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,38 @@
66
package org.opensearch.dataprepper.plugins.processor.mutateevent;
77

88
import com.fasterxml.jackson.annotation.JsonClassDescription;
9+
import com.fasterxml.jackson.annotation.JsonIgnore;
910
import com.fasterxml.jackson.annotation.JsonProperty;
1011
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
1112
import com.fasterxml.jackson.annotation.JsonPropertyDescription;
12-
import jakarta.validation.constraints.NotEmpty;
13-
import jakarta.validation.constraints.NotNull;
13+
import jakarta.validation.constraints.AssertTrue;
1414
import org.opensearch.dataprepper.model.annotations.ExampleValues;
1515
import org.opensearch.dataprepper.model.annotations.ExampleValues.Example;
1616

1717
import java.util.List;
18+
import java.util.regex.Pattern;
19+
import java.util.regex.PatternSyntaxException;
20+
import java.util.stream.Collectors;
1821

1922
@JsonPropertyOrder
2023
@JsonClassDescription("The <code>select_entries</code> processor selects entries from an event.")
2124
public class SelectEntriesProcessorConfig {
22-
@NotEmpty
23-
@NotNull
25+
2426
@JsonProperty("include_keys")
2527
@JsonPropertyDescription("A list of keys to be selected from an event.")
2628
private List<String> includeKeys;
2729

30+
@JsonProperty("include_keys_regex")
31+
@JsonPropertyDescription("A list of regex patterns to match keys be selected from an event.")
32+
private List<String> includeKeysRegex;
33+
34+
@JsonIgnore
35+
private List<Pattern> includeKeysRegexPatterns;
36+
37+
// The processor is implemented to support this, but can be made configurable when there is a feature request
38+
@JsonIgnore
39+
private String includeKeysRegexPointer;
40+
2841
@JsonProperty("select_when")
2942
@JsonPropertyDescription("A <a href=\"https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/\">conditional expression</a>, " +
3043
"such as <code>/some-key == \"test\"</code>, that will be evaluated to determine whether the processor will be " +
@@ -38,8 +51,39 @@ public List<String> getIncludeKeys() {
3851
return includeKeys;
3952
}
4053

54+
public List<Pattern> getIncludeKeysRegex() {
55+
return includeKeysRegexPatterns;
56+
}
57+
58+
public String getIncludeKeysRegexPointer() {
59+
return includeKeysRegexPointer;
60+
}
61+
4162
public String getSelectWhen() {
4263
return selectWhen;
4364
}
65+
66+
private void setIncludeKeysRegex() {
67+
includeKeysRegexPatterns = includeKeysRegex.stream().map(Pattern::compile).collect(Collectors.toList());
68+
}
69+
70+
71+
@AssertTrue(message = "At least one of include_keys and include_keys_regex is required.")
72+
boolean isValidIncludeKeys() {
73+
return (includeKeys != null && !includeKeys.isEmpty()) || (includeKeysRegex != null && !includeKeysRegex.isEmpty());
74+
}
75+
76+
@AssertTrue(message = "Invalid regex pattern found in include_keys_regex.")
77+
boolean isValidIncludeKeysRegex() {
78+
if (includeKeysRegex != null && !includeKeysRegex.isEmpty()) {
79+
try {
80+
setIncludeKeysRegex();
81+
} catch (final PatternSyntaxException e) {
82+
return false;
83+
}
84+
}
85+
86+
return true;
87+
}
4488
}
4589

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package org.opensearch.dataprepper.plugins.processor.mutateevent;
2+
3+
import org.junit.jupiter.api.Test;
4+
import org.opensearch.dataprepper.test.helper.ReflectivelySetField;
5+
6+
import java.util.List;
7+
8+
import static org.hamcrest.CoreMatchers.equalTo;
9+
import static org.hamcrest.MatcherAssert.assertThat;
10+
11+
public class SelectEntriesProcessorConfigTests {
12+
13+
@Test
14+
void testIsValidKeysRegexPatterns_with_valid_pattern() throws NoSuchFieldException, IllegalAccessException {
15+
final SelectEntriesProcessorConfig objectUnderTest = new SelectEntriesProcessorConfig();
16+
final List<String> validPatterns = List.of("test.*");
17+
ReflectivelySetField.setField(SelectEntriesProcessorConfig.class, objectUnderTest, "includeKeysRegex", validPatterns);
18+
19+
assertThat(objectUnderTest.isValidIncludeKeysRegex(), equalTo(true));
20+
21+
}
22+
23+
@Test
24+
void testIsValidKeysRegexPatterns_with_invalid_pattern() throws NoSuchFieldException, IllegalAccessException {
25+
final SelectEntriesProcessorConfig objectUnderTest = new SelectEntriesProcessorConfig();
26+
final List<String> validPatterns = List.of("(abc");
27+
ReflectivelySetField.setField(SelectEntriesProcessorConfig.class, objectUnderTest, "includeKeysRegex", validPatterns);
28+
29+
assertThat(objectUnderTest.isValidIncludeKeysRegex(), equalTo(false));
30+
31+
}
32+
}

data-prepper-plugins/mutate-event-processors/src/test/java/org/opensearch/dataprepper/plugins/processor/mutateevent/SelectEntriesProcessorTests.java

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.List;
2323
import java.util.Map;
2424
import java.util.UUID;
25+
import java.util.regex.Pattern;
2526

2627
import static org.hamcrest.CoreMatchers.is;
2728
import static org.hamcrest.CoreMatchers.equalTo;
@@ -138,6 +139,129 @@ public void testNestedSelectEntriesProcessor() {
138139
assertThat(editedRecords.get(0).getData().get("nested/nested2/key2", String.class), equalTo(value2));
139140
}
140141

142+
@Test
143+
public void testSelectEntriesProcessorWithIncludeKeysRegex() {
144+
when(mockConfig.getIncludeKeys()).thenReturn(null);
145+
when(mockConfig.getIncludeKeysRegex()).thenReturn(List.of(Pattern.compile("include.*"), Pattern.compile("other.*")));
146+
when(mockConfig.getSelectWhen()).thenReturn(null);
147+
final String value1 = UUID.randomUUID().toString();
148+
final String value2 = UUID.randomUUID().toString();
149+
final SelectEntriesProcessor processor = createObjectUnderTest();
150+
final Record<Event> record = getEvent("thisisamessage");
151+
record.getData().put("include-key", value1);
152+
record.getData().put("include-key2", value2);
153+
record.getData().put("other-key1", value2);
154+
record.getData().put("exclude-key", value1);
155+
record.getData().put("exclude-key2", value2);
156+
final List<Record<Event>> editedRecords = (List<Record<Event>>) processor.doExecute(Collections.singletonList(record));
157+
assertThat(editedRecords.get(0).getData().containsKey("include-key"), is(true));
158+
assertThat(editedRecords.get(0).getData().containsKey("include-key2"), is(true));
159+
assertThat(editedRecords.get(0).getData().containsKey("other-key1"), is(true));
160+
assertThat(editedRecords.get(0).getData().containsKey("message"), is(false));
161+
assertThat(editedRecords.get(0).getData().containsKey("exclude-key"), is(false));
162+
assertThat(editedRecords.get(0).getData().containsKey("exclude-key2"), is(false));
163+
164+
assertThat(editedRecords.get(0).getData().get("include-key", String.class), equalTo(value1));
165+
assertThat(editedRecords.get(0).getData().get("include-key2", String.class), equalTo(value2));
166+
}
167+
168+
@Test
169+
public void testSelectEntriesProcessorWithIncludeKeys_and_IncludeKeysRegex() {
170+
when(mockConfig.getIncludeKeys()).thenReturn(List.of("exclude-key", "message"));
171+
when(mockConfig.getIncludeKeysRegex()).thenReturn(List.of(Pattern.compile("include.*")));
172+
when(mockConfig.getSelectWhen()).thenReturn(null);
173+
final String value1 = UUID.randomUUID().toString();
174+
final String value2 = UUID.randomUUID().toString();
175+
final SelectEntriesProcessor processor = createObjectUnderTest();
176+
final Record<Event> record = getEvent("thisisamessage");
177+
record.getData().put("include-key", value1);
178+
record.getData().put("include-key2", value2);
179+
record.getData().put("exclude-key", value1);
180+
record.getData().put("exclude-key2", value2);
181+
final List<Record<Event>> editedRecords = (List<Record<Event>>) processor.doExecute(Collections.singletonList(record));
182+
assertThat(editedRecords.get(0).getData().containsKey("include-key"), is(true));
183+
assertThat(editedRecords.get(0).getData().containsKey("include-key2"), is(true));
184+
assertThat(editedRecords.get(0).getData().containsKey("message"), is(true));
185+
assertThat(editedRecords.get(0).getData().containsKey("exclude-key"), is(true));
186+
assertThat(editedRecords.get(0).getData().containsKey("exclude-key2"), is(false));
187+
188+
assertThat(editedRecords.get(0).getData().get("include-key", String.class), equalTo(value1));
189+
assertThat(editedRecords.get(0).getData().get("include-key2", String.class), equalTo(value2));
190+
assertThat(editedRecords.get(0).getData().get("exclude-key", String.class), equalTo(value1));
191+
}
192+
193+
@Test
194+
public void testSelectEntriesProcessorWithIncludeKeysRegexPointerThatDoesNotExist() {
195+
when(mockConfig.getIncludeKeys()).thenReturn(List.of("message"));
196+
when(mockConfig.getIncludeKeysRegex()).thenReturn(List.of(Pattern.compile("include.*")));
197+
when(mockConfig.getSelectWhen()).thenReturn(null);
198+
when(mockConfig.getIncludeKeysRegexPointer()).thenReturn("/nested");
199+
final String value1 = UUID.randomUUID().toString();
200+
final String value2 = UUID.randomUUID().toString();
201+
final SelectEntriesProcessor processor = createObjectUnderTest();
202+
final Record<Event> record = getEvent("thisisamessage");
203+
record.getData().put("include-key", value1);
204+
record.getData().put("include-key2", value2);
205+
record.getData().put("exclude-key", value1);
206+
record.getData().put("exclude-key2", value2);
207+
final List<Record<Event>> editedRecords = (List<Record<Event>>) processor.doExecute(Collections.singletonList(record));
208+
assertThat(editedRecords.get(0).getData().containsKey("include-key"), is(false));
209+
assertThat(editedRecords.get(0).getData().containsKey("include-key2"), is(false));
210+
assertThat(editedRecords.get(0).getData().containsKey("message"), is(true));
211+
assertThat(editedRecords.get(0).getData().containsKey("exclude-key"), is(false));
212+
assertThat(editedRecords.get(0).getData().containsKey("exclude-key2"), is(false));
213+
}
214+
215+
@Test
216+
public void testSelectEntriesProcessorWithIncludeKeysRegexPointerThatDoesExist() {
217+
when(mockConfig.getIncludeKeys()).thenReturn(Collections.emptyList());
218+
when(mockConfig.getIncludeKeysRegex()).thenReturn(List.of(Pattern.compile("include.*")));
219+
when(mockConfig.getSelectWhen()).thenReturn(null);
220+
when(mockConfig.getIncludeKeysRegexPointer()).thenReturn("/nested");
221+
final String value1 = UUID.randomUUID().toString();
222+
final String value2 = UUID.randomUUID().toString();
223+
final SelectEntriesProcessor processor = createObjectUnderTest();
224+
final Record<Event> record = getEvent("thisisamessage");
225+
final Map<String, Object> nestedData = new HashMap<>();
226+
nestedData.put("include-key", value1);
227+
nestedData.put("include-key2", value2);
228+
nestedData.put("exclude-key", value1);
229+
nestedData.put("exclude-key2", value2);
230+
231+
record.getData().put("/nested", nestedData);
232+
final List<Record<Event>> editedRecords = (List<Record<Event>>) processor.doExecute(Collections.singletonList(record));
233+
assertThat(editedRecords.get(0).getData().containsKey("/nested/include-key"), is(true));
234+
assertThat(editedRecords.get(0).getData().containsKey("/nested/include-key2"), is(true));
235+
assertThat(editedRecords.get(0).getData().containsKey("message"), is(false));
236+
assertThat(editedRecords.get(0).getData().containsKey("/nested/exclude-key"), is(false));
237+
assertThat(editedRecords.get(0).getData().containsKey("/nested/exclude-key2"), is(false));
238+
}
239+
240+
@Test
241+
public void testSelectEntriesProcessorWithIncludeKeysRegexPointerThatDoesExist_and_include_keys() {
242+
when(mockConfig.getIncludeKeys()).thenReturn(List.of("message", "/nested/exclude-key"));
243+
when(mockConfig.getIncludeKeysRegex()).thenReturn(List.of(Pattern.compile("include.*")));
244+
when(mockConfig.getSelectWhen()).thenReturn(null);
245+
when(mockConfig.getIncludeKeysRegexPointer()).thenReturn("/nested");
246+
final String value1 = UUID.randomUUID().toString();
247+
final String value2 = UUID.randomUUID().toString();
248+
final SelectEntriesProcessor processor = createObjectUnderTest();
249+
final Record<Event> record = getEvent("thisisamessage");
250+
final Map<String, Object> nestedData = new HashMap<>();
251+
nestedData.put("include-key", value1);
252+
nestedData.put("include-key2", value2);
253+
nestedData.put("exclude-key", value1);
254+
nestedData.put("exclude-key2", value2);
255+
256+
record.getData().put("/nested", nestedData);
257+
final List<Record<Event>> editedRecords = (List<Record<Event>>) processor.doExecute(Collections.singletonList(record));
258+
assertThat(editedRecords.get(0).getData().containsKey("/nested/include-key"), is(true));
259+
assertThat(editedRecords.get(0).getData().containsKey("/nested/include-key2"), is(true));
260+
assertThat(editedRecords.get(0).getData().containsKey("message"), is(true));
261+
assertThat(editedRecords.get(0).getData().containsKey("/nested/exclude-key"), is(true));
262+
assertThat(editedRecords.get(0).getData().containsKey("exclude-key2"), is(false));
263+
}
264+
141265

142266
private SelectEntriesProcessor createObjectUnderTest() {
143267
return new SelectEntriesProcessor(pluginMetrics, mockConfig, expressionEvaluator);

0 commit comments

Comments
 (0)