Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,28 @@ public OneForOneBlockFetcher(
* @return whether the array contains only shuffle block IDs
*/
private boolean areShuffleBlocksOrChunks(String[] blockIds) {
if (Arrays.stream(blockIds).anyMatch(blockId -> !blockId.startsWith(SHUFFLE_BLOCK_PREFIX))) {
if (isAnyBlockNotStartWithShuffleBlockPrefix(blockIds)) {
// It comes here because there is a blockId which doesn't have "shuffle_" prefix so we
// check if all the block ids are shuffle chunk Ids.
return Arrays.stream(blockIds).allMatch(blockId -> blockId.startsWith(SHUFFLE_CHUNK_PREFIX));
return isAllBlocksStartWithShuffleChunkPrefix(blockIds);
}
return true;
}

private boolean isAnyBlockNotStartWithShuffleBlockPrefix(String[] blockIds) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these be static methods?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all change to static or only OneForOneBlockFetcher ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

9c979c4 changed all possible

for (String blockId : blockIds) {
if (!blockId.startsWith(SHUFFLE_BLOCK_PREFIX)) {
return true;
}
}
return false;
}

private boolean isAllBlocksStartWithShuffleChunkPrefix(String[] blockIds) {
for (String blockId : blockIds) {
if (!blockId.startsWith(SHUFFLE_CHUNK_PREFIX)) {
return false;
}
}
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,20 @@ class FilteredObjectInputStream extends ObjectInputStream {
protected Class<?> resolveClass(ObjectStreamClass desc)
throws IOException, ClassNotFoundException {

boolean isValid = ALLOWED_PACKAGES.stream().anyMatch(p -> desc.getName().startsWith(p));
boolean isValid = isValid(desc);
if (!isValid) {
throw new IllegalArgumentException(
String.format("Unexpected class in stream: %s", desc.getName()));
}
return super.resolveClass(desc);
}

private boolean isValid(ObjectStreamClass desc) {
for (String p : ALLOWED_PACKAGES) {
if (desc.getName().startsWith(p)) {
return true;
}
}
return false;
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given this is not a perf critical path, I would recommend to keep this as is.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.connector.expressions;

import java.util.Arrays;
import java.util.*;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I think we've avoided wildcard imports, just enumerate them?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


import org.apache.spark.annotation.Evolving;

Expand All @@ -44,7 +44,16 @@ public interface Expression {
* List of fields or columns that are referenced by this expression.
*/
default NamedReference[] references() {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compare

public static TestValue[] distinctUseStreamApi(TestObj[] input) {
      return Arrays.stream(input).map(s -> s.values)
        .flatMap(Arrays::stream).distinct().toArray(TestValue[]::new);
    }

and

 public static TestValue[] distinctUseLoopApi(TestObj[] input) {
        List<TestValue> list = new ArrayList<>();
        Set<TestValue> uniqueValues = new HashSet<>();
        for (TestObj s : input) {
            TestValue[] values = s.values;
            for (TestValue testValue : values) {
                if (uniqueValues.add(testValue)) {
                    list.add(testValue);
                }
            }
        }
        return list.toArray(new TestValue[0]);
    }

TestValue and TestObj define as follows:

public static class TestObj {
        TestValue[] values;

        public TestObj(int size, int range) {
            values = new TestValue[size];
            for (int i = 0; i < values.length; i++) {
                values[i] = new TestValue(RandomUtils.nextInt(0, range));
            }
        }
    }

    public static class TestValue {
        private int value;

        public TestValue(int value) {
            this.value = value;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;
            TestValue testValue = (TestValue) o;
            return value == testValue.value;
        }

        @Override
        public int hashCode() {
            return Objects.hashCode(value);
        }
    }

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use following method build test object

 public static TestObj[] objs(int length, int size, int range) {
        TestObj[] objects = new TestObj[length];
        for (int i = 0; i < length; i++) {
            objects[i] = new TestObj(size, range);
        }
        return objects;
    }

and test length, size, range:

-1, 5, 100

  • 5, 5, 100
  • 10, 5, 100
  • 20, 5, 100
  • 50, 5, 100
  • 100, 5, 100
  • 500, 5, 100
  • 1000, 5, 100

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Java 8

OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 1:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                 35             35           1          2.8         351.9       1.0X
Use Loop api                                         18             18           0          5.5         180.6       1.9X

OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 5:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                129            130           1          0.8        1288.7       1.0X
Use Loop api                                         82             83           1          1.2         824.4       1.6X

OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 10:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                228            229           1          0.4        2280.0       1.0X
Use Loop api                                        160            161           1          0.6        1599.7       1.4X

OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 20:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                430            431           1          0.2        4301.0       1.0X
Use Loop api                                        311            312           1          0.3        3109.9       1.4X

OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 50:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                860            862           2          0.1        8597.6       1.0X
Use Loop api                                        701            702           1          0.1        7013.1       1.2X

OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 100:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                               1454           1456           3          0.1       14540.1       1.0X
Use Loop api                                       1317           1318           2          0.1       13168.9       1.1X

OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 500:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                               5584           5586           2          0.0       55841.2       1.0X
Use Loop api                                       5784           5786           3          0.0       57839.1       1.0X

OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 1000:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                              10727          10728           2          0.0      107266.4       1.0X
Use Loop api                                      10534          10535           1          0.0      105342.5       1.0X

Java 11

OpenJDK 64-Bit Server VM 11.0.16+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 1:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                 41             42           1          2.4         408.5       1.0X
Use Loop api                                         22             23           1          4.5         224.4       1.8X

OpenJDK 64-Bit Server VM 11.0.16+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 5:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                159            160           1          0.6        1594.5       1.0X
Use Loop api                                         86             87           0          1.2         864.7       1.8X

OpenJDK 64-Bit Server VM 11.0.16+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 10:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                275            276           2          0.4        2748.0       1.0X
Use Loop api                                        167            169           3          0.6        1673.5       1.6X

OpenJDK 64-Bit Server VM 11.0.16+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 20:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                511            513           2          0.2        5113.5       1.0X
Use Loop api                                        315            317           2          0.3        3151.8       1.6X

OpenJDK 64-Bit Server VM 11.0.16+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 50:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                               1012           1014           2          0.1       10118.2       1.0X
Use Loop api                                        675            677           2          0.1        6747.0       1.5X

OpenJDK 64-Bit Server VM 11.0.16+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 100:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                               1665           1667           3          0.1       16645.2       1.0X
Use Loop api                                       1253           1254           1          0.1       12528.3       1.3X

OpenJDK 64-Bit Server VM 11.0.16+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 500:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                               6305           6308           5          0.0       63046.3       1.0X
Use Loop api                                       5375           5376           1          0.0       53751.0       1.2X

OpenJDK 64-Bit Server VM 11.0.16+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
Test for distinct with input size 1000:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                              12081          12083           3          0.0      120806.6       1.0X
Use Loop api                                      10463          10467           5          0.0      104634.7       1.2X

Java 17

OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
Test for distinct with input size 1:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                 33             36           2          3.1         325.2       1.0X
Use Loop api                                         16             18           2          6.1         164.4       2.0X

OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
Test for distinct with input size 5:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                103            111           5          1.0        1032.9       1.0X
Use Loop api                                         75             80           3          1.3         746.4       1.4X

OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
Test for distinct with input size 10:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                202            210           5          0.5        2022.3       1.0X
Use Loop api                                        152            164           8          0.7        1522.6       1.3X

OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
Test for distinct with input size 20:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                345            362          14          0.3        3446.2       1.0X
Use Loop api                                        283            299          15          0.4        2827.3       1.2X

OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
Test for distinct with input size 50:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                                729            767          33          0.1        7295.0       1.0X
Use Loop api                                        581            598          12          0.2        5811.8       1.3X

OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
Test for distinct with input size 100:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                               1370           1381          16          0.1       13700.8       1.0X
Use Loop api                                       1107           1114          10          0.1       11070.0       1.2X

OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
Test for distinct with input size 500:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                               6541           6545           7          0.0       65405.0       1.0X
Use Loop api                                       4694           4782         124          0.0       46939.4       1.4X

OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Linux 5.15.0-1019-azure
Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
Test for distinct with input size 1000:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Use Arrays.steam api                              11999          12185         263          0.0      119990.3       1.0X
Use Loop api                                       9282           9366         118          0.0       92822.1       1.3X

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For Java 11 and 17, using loop looks more better,

return Arrays.stream(children()).map(e -> e.references())
.flatMap(Arrays::stream).distinct().toArray(NamedReference[]::new);
List<NamedReference> list = new ArrayList<>();
Set<NamedReference> uniqueValues = new HashSet<>();
for (Expression e : children()) {
NamedReference[] references = e.references();
for (NamedReference reference : references) {
if (uniqueValues.add(reference)) {
list.add(reference);
}
}
}
return list.toArray(new NamedReference[0]);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to build the List too? why not just .toArray on the Set, because ordering is important? LinkedHashSet could help there

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let met check this

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1ecf017 change to use LinkedHashSet, let me check the performance and waiting CI

Copy link
Copy Markdown
Contributor Author

@LuciferYang LuciferYang Sep 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@srowen 8ff3b77 revert to the previous version, because from the local test, using LinkedHashSet is slower than using ArrayList + HashSet, which may be because LinkedHashSet.toArray is much slower than ArrayList.toArray.

Let's wait for the bench results from GA to verify multiple Java versions

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK that's fine, thanks for checking. Whatever seems most efficient

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

friendly ping @huaxingao Could you help confirm that is the ordering important for the result of Expression#references method?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The order shouldn't matter. Thanks for checking with me @LuciferYang

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @huaxingao

Copy link
Copy Markdown
Contributor Author

@LuciferYang LuciferYang Sep 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mridulm @srowen 46d4a57 change to use HashSet due to the result order is not important, waiting CI and I will double check the bench result.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok to use HashSet if the result order is not important

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ public abstract class CustomAvgMetric implements CustomMetric {
@Override
public String aggregateTaskMetrics(long[] taskMetrics) {
if (taskMetrics.length > 0) {
double average = ((double)Arrays.stream(taskMetrics).sum()) / taskMetrics.length;
long sum = 0L;
for (long taskMetric : taskMetrics) {
sum += taskMetric;
}
double average = ((double) sum) / taskMetrics.length;
return new DecimalFormat("#0.000").format(average);
} else {
return "0";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
public abstract class CustomSumMetric implements CustomMetric {
@Override
public String aggregateTaskMetrics(long[] taskMetrics) {
return String.valueOf(Arrays.stream(taskMetrics).sum());
long sum = 0L;
for (long taskMetric : taskMetrics) {
sum += taskMetric;
}
return String.valueOf(sum);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@

package org.apache.spark.sql.connector.util;

import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.StringJoiner;
import java.util.stream.Collectors;

import org.apache.spark.sql.connector.expressions.Cast;
import org.apache.spark.sql.connector.expressions.Expression;
Expand Down Expand Up @@ -62,8 +61,7 @@ public String build(Expression expr) {
String name = e.name();
switch (name) {
case "IN": {
List<String> children =
Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList());
List<String> children = expressionsToStringList(e.children());
Copy link
Copy Markdown
Contributor

@mridulm mridulm Sep 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Given this is called only here, by not avoid the subList ? (give start offset and len params to expressionsToStringList)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is e00330f ok ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the .subList only wraps a SubList object and does not trigger operations such as memory copy, so the performance gap may be small

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, sublist is fairly optimal ... but can be avoided here is possible. It is a nit comment actually :-)

return visitIn(children.get(0), children.subList(1, children.size()));
}
case "IS_NULL":
Expand Down Expand Up @@ -159,63 +157,56 @@ public String build(Expression expr) {
case "BIT_LENGTH":
case "CHAR_LENGTH":
case "CONCAT":
return visitSQLFunction(name,
Arrays.stream(e.children()).map(c -> build(c)).toArray(String[]::new));
return visitSQLFunction(name, expressionsToStringArray(e.children()));
case "CASE_WHEN": {
List<String> children =
Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList());
return visitCaseWhen(children.toArray(new String[e.children().length]));
return visitCaseWhen(expressionsToStringArray(e.children()));
}
case "TRIM":
return visitTrim("BOTH",
Arrays.stream(e.children()).map(c -> build(c)).toArray(String[]::new));
return visitTrim("BOTH", expressionsToStringArray(e.children()));
case "LTRIM":
return visitTrim("LEADING",
Arrays.stream(e.children()).map(c -> build(c)).toArray(String[]::new));
return visitTrim("LEADING", expressionsToStringArray(e.children()));
case "RTRIM":
return visitTrim("TRAILING",
Arrays.stream(e.children()).map(c -> build(c)).toArray(String[]::new));
return visitTrim("TRAILING", expressionsToStringArray(e.children()));
case "OVERLAY":
return visitOverlay(
Arrays.stream(e.children()).map(c -> build(c)).toArray(String[]::new));
return visitOverlay(expressionsToStringArray(e.children()));
// TODO supports other expressions
default:
return visitUnexpectedExpr(expr);
}
} else if (expr instanceof Min) {
Min min = (Min) expr;
return visitAggregateFunction("MIN", false,
Arrays.stream(min.children()).map(c -> build(c)).toArray(String[]::new));
expressionsToStringArray(min.children()));
} else if (expr instanceof Max) {
Max max = (Max) expr;
return visitAggregateFunction("MAX", false,
Arrays.stream(max.children()).map(c -> build(c)).toArray(String[]::new));
expressionsToStringArray(max.children()));
} else if (expr instanceof Count) {
Count count = (Count) expr;
return visitAggregateFunction("COUNT", count.isDistinct(),
Arrays.stream(count.children()).map(c -> build(c)).toArray(String[]::new));
expressionsToStringArray(count.children()));
} else if (expr instanceof Sum) {
Sum sum = (Sum) expr;
return visitAggregateFunction("SUM", sum.isDistinct(),
Arrays.stream(sum.children()).map(c -> build(c)).toArray(String[]::new));
expressionsToStringArray(sum.children()));
} else if (expr instanceof CountStar) {
return visitAggregateFunction("COUNT", false, new String[]{"*"});
} else if (expr instanceof Avg) {
Avg avg = (Avg) expr;
return visitAggregateFunction("AVG", avg.isDistinct(),
Arrays.stream(avg.children()).map(c -> build(c)).toArray(String[]::new));
expressionsToStringArray(avg.children()));
} else if (expr instanceof GeneralAggregateFunc) {
GeneralAggregateFunc f = (GeneralAggregateFunc) expr;
return visitAggregateFunction(f.name(), f.isDistinct(),
Arrays.stream(f.children()).map(c -> build(c)).toArray(String[]::new));
expressionsToStringArray(f.children()));
} else if (expr instanceof UserDefinedScalarFunc) {
UserDefinedScalarFunc f = (UserDefinedScalarFunc) expr;
return visitUserDefinedScalarFunction(f.name(), f.canonicalName(),
Arrays.stream(f.children()).map(c -> build(c)).toArray(String[]::new));
expressionsToStringArray(f.children()));
} else if (expr instanceof UserDefinedAggregateFunc) {
UserDefinedAggregateFunc f = (UserDefinedAggregateFunc) expr;
return visitUserDefinedAggregateFunction(f.name(), f.canonicalName(), f.isDistinct(),
Arrays.stream(f.children()).map(c -> build(c)).toArray(String[]::new));
expressionsToStringArray(f.children()));
} else {
return visitUnexpectedExpr(expr);
}
Expand Down Expand Up @@ -393,4 +384,20 @@ private String joinListToString(
}
return joiner.toString();
}

private String[] expressionsToStringArray(Expression[] expressions) {
String[] result = new String[expressions.length];
for (int i = 0; i < expressions.length; i++) {
result[i] = build(expressions[i]);
}
return result;
}

private List<String> expressionsToStringList(Expression[] expressions) {
List<String> list = new ArrayList<>(expressions.length);
for (Expression expression : expressions) {
list.add(build(expression));
}
return list;
}
}