Skip to content

Commit f0f5634

Browse files
authored
2026-03-11T16:33:10+0800 (#83)
* repo-sync-2026-03-11T16:33:10+0800 * repo-sync-2026-03-11T16:33:10+0800 * repo-sync-2026-03-11T16:33:10+0800 * repo-sync-2026-03-11T16:33:10+0800 * add license * dep kingbase jar * dep kingbase jar
1 parent ef2bd72 commit f0f5634

49 files changed

Lines changed: 5208 additions & 234 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

dataproxy-common/src/main/java/org/secretflow/dataproxy/common/utils/ArrowUtil.java

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@
2929
public class ArrowUtil {
3030

3131
/**
32-
* Parse Kuscia column type to Arrow type
33-
* @param type Column type string (e.g., "int32", "interval_year_month", "large_string")
34-
* @return ArrowType
35-
*/
32+
+ * Parse Kuscia column type to Arrow type
33+
+ * @param type Column type string (e.g., "int32", "interval_year_month", "large_string")
34+
+ * @return ArrowType
35+
+ */
3636
public static ArrowType parseKusciaColumnType(String type) {
3737
String typeLower = type.toLowerCase();
3838
return switch (typeLower) {
@@ -45,50 +45,50 @@ public static ArrowType parseKusciaColumnType(String type) {
4545
case "uint16" -> Types.MinorType.UINT2.getType();
4646
case "uint32" -> Types.MinorType.UINT4.getType();
4747
case "uint64" -> Types.MinorType.UINT8.getType();
48-
48+
4949
// Floating point types
5050
case "float32" -> Types.MinorType.FLOAT4.getType();
5151
case "float64", "float" -> Types.MinorType.FLOAT8.getType();
52-
52+
5353
// Date types
54-
case "date32" -> Types.MinorType.DATEDAY.getType();
54+
case "date32", "date" -> Types.MinorType.DATEDAY.getType();
5555
case "date64" -> Types.MinorType.DATEMILLI.getType();
56-
56+
5757
// Time types
5858
case "time32" -> Types.MinorType.TIMEMILLI.getType();
5959
case "time64" -> Types.MinorType.TIMEMICRO.getType();
60-
60+
6161
// Timestamp types
6262
case "timestamp" -> Types.MinorType.TIMESTAMPMICRO.getType();
6363
case "timestamp_us" -> Types.MinorType.TIMESTAMPMICRO.getType();
6464
case "timestamp_ms" -> Types.MinorType.TIMESTAMPMILLI.getType();
6565
case "timestamp_ns" -> Types.MinorType.TIMESTAMPNANO.getType();
6666
case "timestamp_tz" -> Types.MinorType.TIMESTAMPMICROTZ.getType();
67-
67+
6868
// Boolean types
6969
case "bool" -> Types.MinorType.BIT.getType();
70-
70+
7171
// String types
7272
case "string", "str" -> Types.MinorType.VARCHAR.getType();
73-
case "large_string", "large_utf8", "utf8_large" -> Types.MinorType.LARGEVARCHAR.getType();
74-
73+
case "large_string", "large_utf8", "utf8_large", "large_str" -> Types.MinorType.LARGEVARCHAR.getType();
74+
7575
// Binary types
76-
case "binary" -> Types.MinorType.VARBINARY.getType();
76+
case "binary","bytea" -> Types.MinorType.VARBINARY.getType();
7777
case "large_binary", "large_varbinary", "varbinary_large" -> Types.MinorType.LARGEVARBINARY.getType();
78-
78+
7979
// Decimal types
8080
// Note: Types.MinorType.DECIMAL.getType() throws UnsupportedOperationException
8181
// Decimal requires precision/scale, must use new ArrowType.Decimal(precision, scale, bitWidth)
8282
case "decimal" -> new ArrowType.Decimal(38, 10, 128);
83-
83+
8484
// Interval types
85-
case "interval_year_month", "interval_ym" ->
86-
Types.MinorType.INTERVALYEAR.getType();
87-
case "interval_day_time", "interval_dt" ->
88-
Types.MinorType.INTERVALDAY.getType();
85+
case "interval_year_month", "interval_ym" ->
86+
Types.MinorType.INTERVALYEAR.getType();
87+
case "interval_day_time", "interval_dt" ->
88+
Types.MinorType.INTERVALDAY.getType();
8989
case "interval" -> Types.MinorType.INTERVALYEAR.getType();
90-
90+
9191
default -> throw DataproxyException.of(DataproxyErrorCode.PARAMS_UNRELIABLE, "Unsupported field types: " + type);
9292
};
9393
}
94-
}
94+
}
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
/*
2+
* Copyright 2025 Ant Group Co., Ltd.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.secretflow.dataproxy.common.utils;
18+
19+
import org.junit.jupiter.api.Test;
20+
import org.junit.jupiter.api.extension.ExtendWith;
21+
import org.secretflow.dataproxy.common.exceptions.DataproxyException;
22+
import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension;
23+
24+
import java.lang.reflect.Method;
25+
26+
import static org.junit.jupiter.api.Assertions.*;
27+
28+
/**
29+
* Simple test for ArrowUtil that doesn't require Arrow library initialization
30+
* This test verifies the logic in parseKusciaColumnType without directly calling Arrow classes
31+
*
32+
* @author songquan
33+
* @date 2025/09/29 16:56
34+
**/
35+
@ExtendWith({SystemStubsExtension.class})
36+
public class ArrowUtilTest {
37+
38+
@Test
39+
void testParseKusciaColumnTypeLargeUtf8() {
40+
// Test that large UTF8 type strings are handled correctly by using reflection
41+
// to avoid direct initialization of Arrow classes which require flatbuffers dependency
42+
try {
43+
Class<?> arrowUtilClass = Class.forName("org.secretflow.dataproxy.common.utils.ArrowUtil");
44+
Method parseMethod = arrowUtilClass.getDeclaredMethod("parseKusciaColumnType", String.class);
45+
parseMethod.setAccessible(true);
46+
47+
// Call the method - we're not checking the return value since that would require
48+
// Arrow classes to be initialized, but we're verifying it doesn't throw
49+
// "Unsupported field types" exception
50+
Object result = parseMethod.invoke(null, "large_utf8");
51+
52+
// If we get here without exception, the method is working correctly
53+
assertNotNull(result);
54+
} catch (Exception e) {
55+
// Check if it's the expected DataproxyException for unsupported types
56+
if (e.getCause() instanceof DataproxyException) {
57+
DataproxyException de = (DataproxyException) e.getCause();
58+
if (de.getMessage().contains("Unsupported field types")) {
59+
fail("large_utf8 should be supported but got: " + de.getMessage());
60+
}
61+
}
62+
// Any other exception might be due to missing dependencies, which is OK for this test
63+
}
64+
}
65+
66+
@Test
67+
void testParseKusciaColumnTypeLargeString() {
68+
try {
69+
Class<?> arrowUtilClass = Class.forName("org.secretflow.dataproxy.common.utils.ArrowUtil");
70+
Method parseMethod = arrowUtilClass.getDeclaredMethod("parseKusciaColumnType", String.class);
71+
parseMethod.setAccessible(true);
72+
73+
Object result = parseMethod.invoke(null, "large_string");
74+
assertNotNull(result);
75+
} catch (Exception e) {
76+
if (e.getCause() instanceof DataproxyException) {
77+
DataproxyException de = (DataproxyException) e.getCause();
78+
if (de.getMessage().contains("Unsupported field types")) {
79+
fail("large_string should be supported but got: " + de.getMessage());
80+
}
81+
}
82+
}
83+
}
84+
85+
@Test
86+
void testParseKusciaColumnTypeLargeStr() {
87+
try {
88+
Class<?> arrowUtilClass = Class.forName("org.secretflow.dataproxy.common.utils.ArrowUtil");
89+
Method parseMethod = arrowUtilClass.getDeclaredMethod("parseKusciaColumnType", String.class);
90+
parseMethod.setAccessible(true);
91+
92+
Object result = parseMethod.invoke(null, "large_str");
93+
assertNotNull(result);
94+
} catch (Exception e) {
95+
if (e.getCause() instanceof DataproxyException) {
96+
DataproxyException de = (DataproxyException) e.getCause();
97+
if (de.getMessage().contains("Unsupported field types")) {
98+
fail("large_str should be supported but got: " + de.getMessage());
99+
}
100+
}
101+
}
102+
}
103+
104+
@Test
105+
void testParseKusciaColumnTypeString() {
106+
try {
107+
Class<?> arrowUtilClass = Class.forName("org.secretflow.dataproxy.common.utils.ArrowUtil");
108+
Method parseMethod = arrowUtilClass.getDeclaredMethod("parseKusciaColumnType", String.class);
109+
parseMethod.setAccessible(true);
110+
111+
Object result = parseMethod.invoke(null, "string");
112+
assertNotNull(result);
113+
} catch (Exception e) {
114+
if (e.getCause() instanceof DataproxyException) {
115+
DataproxyException de = (DataproxyException) e.getCause();
116+
if (de.getMessage().contains("Unsupported field types")) {
117+
fail("string should be supported but got: " + de.getMessage());
118+
}
119+
}
120+
}
121+
}
122+
123+
@Test
124+
void testParseKusciaColumnTypeStr() {
125+
try {
126+
Class<?> arrowUtilClass = Class.forName("org.secretflow.dataproxy.common.utils.ArrowUtil");
127+
Method parseMethod = arrowUtilClass.getDeclaredMethod("parseKusciaColumnType", String.class);
128+
parseMethod.setAccessible(true);
129+
130+
Object result = parseMethod.invoke(null, "str");
131+
assertNotNull(result);
132+
} catch (Exception e) {
133+
if (e.getCause() instanceof DataproxyException) {
134+
DataproxyException de = (DataproxyException) e.getCause();
135+
if (de.getMessage().contains("Unsupported field types")) {
136+
fail("str should be supported but got: " + de.getMessage());
137+
}
138+
}
139+
}
140+
}
141+
142+
@Test
143+
void testParseKusciaColumnTypeUnsupported() {
144+
// This test should work regardless of flatbuffers dependency
145+
try {
146+
Class<?> arrowUtilClass = Class.forName("org.secretflow.dataproxy.common.utils.ArrowUtil");
147+
Method parseMethod = arrowUtilClass.getDeclaredMethod("parseKusciaColumnType", String.class);
148+
parseMethod.setAccessible(true);
149+
150+
// This should throw DataproxyException for unsupported type
151+
parseMethod.invoke(null, "unsupported_type");
152+
fail("Should have thrown DataproxyException for unsupported type");
153+
} catch (Exception e) {
154+
// Check if it's the expected DataproxyException for unsupported types
155+
if (e.getCause() instanceof DataproxyException) {
156+
DataproxyException de = (DataproxyException) e.getCause();
157+
assertTrue(de.getMessage().contains("Unsupported field types"),
158+
"Expected unsupported type message but got: " + de.getMessage());
159+
} else {
160+
fail("Expected DataproxyException but got: " + e.getCause());
161+
}
162+
}
163+
}
164+
165+
@Test
166+
void testParseKusciaColumnTypedate() {
167+
// Test that large UTF8 type strings are handled correctly by using reflection
168+
// to avoid direct initialization of Arrow classes which require flatbuffers dependency
169+
try {
170+
Class<?> arrowUtilClass = Class.forName("org.secretflow.dataproxy.common.utils.ArrowUtil");
171+
Method parseMethod = arrowUtilClass.getDeclaredMethod("parseKusciaColumnType", String.class);
172+
parseMethod.setAccessible(true);
173+
174+
// Call the method - we're not checking the return value since that would require
175+
// Arrow classes to be initialized, but we're verifying it doesn't throw
176+
// "Unsupported field types" exception
177+
Object result = parseMethod.invoke(null, "date32");
178+
179+
// If we get here without exception, the method is working correctly
180+
assertNotNull(result);
181+
} catch (Exception e) {
182+
// Check if it's the expected DataproxyException for unsupported types
183+
if (e.getCause() instanceof DataproxyException) {
184+
DataproxyException de = (DataproxyException) e.getCause();
185+
if (de.getMessage().contains("Unsupported field types")) {
186+
fail("large_utf8 should be supported but got: " + de.getMessage());
187+
}
188+
}
189+
// Any other exception might be due to missing dependencies, which is OK for this test
190+
}
191+
}
192+
193+
@Test
194+
void testParseKusciaColumnTypetimestemp() {
195+
// Test that large UTF8 type strings are handled correctly by using reflection
196+
// to avoid direct initialization of Arrow classes which require flatbuffers dependency
197+
try {
198+
Class<?> arrowUtilClass = Class.forName("org.secretflow.dataproxy.common.utils.ArrowUtil");
199+
Method parseMethod = arrowUtilClass.getDeclaredMethod("parseKusciaColumnType", String.class);
200+
parseMethod.setAccessible(true);
201+
202+
// Call the method - we're not checking the return value since that would require
203+
// Arrow classes to be initialized, but we're verifying it doesn't throw
204+
// "Unsupported field types" exception
205+
Object result = parseMethod.invoke(null, "timestamp");
206+
207+
// If we get here without exception, the method is working correctly
208+
assertNotNull(result);
209+
} catch (Exception e) {
210+
// Check if it's the expected DataproxyException for unsupported types
211+
if (e.getCause() instanceof DataproxyException) {
212+
DataproxyException de = (DataproxyException) e.getCause();
213+
if (de.getMessage().contains("Unsupported field types")) {
214+
fail("large_utf8 should be supported but got: " + de.getMessage());
215+
}
216+
}
217+
// Any other exception might be due to missing dependencies, which is OK for this test
218+
}
219+
}
220+
221+
@Test
222+
void testParseKusciaColumnTypeBynary() {
223+
// Test that large UTF8 type strings are handled correctly by using reflection
224+
// to avoid direct initialization of Arrow classes which require flatbuffers dependency
225+
try {
226+
Class<?> arrowUtilClass = Class.forName("org.secretflow.dataproxy.common.utils.ArrowUtil");
227+
Method parseMethod = arrowUtilClass.getDeclaredMethod("parseKusciaColumnType", String.class);
228+
parseMethod.setAccessible(true);
229+
230+
// Call the method - we're not checking the return value since that would require
231+
// Arrow classes to be initialized, but we're verifying it doesn't throw
232+
// "Unsupported field types" exception
233+
Object result = parseMethod.invoke(null, "binary");
234+
235+
// If we get here without exception, the method is working correctly
236+
assertNotNull(result);
237+
} catch (Exception e) {
238+
// Check if it's the expected DataproxyException for unsupported types
239+
if (e.getCause() instanceof DataproxyException) {
240+
DataproxyException de = (DataproxyException) e.getCause();
241+
if (de.getMessage().contains("Unsupported field types")) {
242+
fail("large_utf8 should be supported but got: " + de.getMessage());
243+
}
244+
}
245+
// Any other exception might be due to missing dependencies, which is OK for this test
246+
}
247+
}
248+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Copyright 2025 Ant Group Co., Ltd.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.secretflow.dataproxy.core.converter;
18+
19+
import org.apache.arrow.vector.ValueVector;
20+
import org.apache.arrow.vector.VarBinaryVector;
21+
import org.secretflow.dataproxy.core.visitor.ValueVisitor;
22+
23+
/**
24+
* @author chenmingliang
25+
* @date 2025/12/15
26+
*/
27+
public class BinaryVectorConverter extends AbstractValueConverter<byte[]> {
28+
29+
public BinaryVectorConverter(ValueVisitor<byte[]> visitor) {
30+
super(visitor);
31+
}
32+
33+
@Override
34+
public void convertAndSet(ValueVector vector, int index, Object value) {
35+
if (vector instanceof VarBinaryVector varBinaryVector) {
36+
varBinaryVector.setSafe(index, this.visit(value));
37+
} else {
38+
throw new IllegalArgumentException("BinaryVectorConverter unsupported vector type: " + vector.getClass().getName());
39+
}
40+
}
41+
}

0 commit comments

Comments
 (0)