Skip to content

Commit 15ad2b2

Browse files
authored
Make everything 1.5% faster by calling leaf visitors immediately [NFC] (#8581)
Continuing #8571, use a constexpr check to see when we are about to visit something that has no children. In that case we don't need to push a task for it and pop it later, we can just do the visit inline.
1 parent 98ad697 commit 15ad2b2

File tree

3 files changed

+106
-2
lines changed

3 files changed

+106
-2
lines changed

src/wasm-traversal.h

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,27 @@ struct Walker : public VisitorType {
343343
Module* currModule = nullptr; // current module being processed
344344
};
345345

346+
// Define which expression classes are leaves. We can handle them more
347+
// optimally below. The accuracy of this list is tested in leaves.cpp.
348+
template<typename T> struct IsLeaf : std::false_type {};
349+
350+
template<> struct IsLeaf<LocalGet> : std::true_type {};
351+
template<> struct IsLeaf<GlobalGet> : std::true_type {};
352+
template<> struct IsLeaf<AtomicFence> : std::true_type {};
353+
template<> struct IsLeaf<Pause> : std::true_type {};
354+
template<> struct IsLeaf<DataDrop> : std::true_type {};
355+
template<> struct IsLeaf<Const> : std::true_type {};
356+
template<> struct IsLeaf<MemorySize> : std::true_type {};
357+
template<> struct IsLeaf<RefNull> : std::true_type {};
358+
template<> struct IsLeaf<RefFunc> : std::true_type {};
359+
template<> struct IsLeaf<TableSize> : std::true_type {};
360+
template<> struct IsLeaf<ElemDrop> : std::true_type {};
361+
template<> struct IsLeaf<Rethrow> : std::true_type {};
362+
template<> struct IsLeaf<Nop> : std::true_type {};
363+
template<> struct IsLeaf<Unreachable> : std::true_type {};
364+
template<> struct IsLeaf<Pop> : std::true_type {};
365+
template<> struct IsLeaf<StringConst> : std::true_type {};
366+
346367
// Walks in post-order, i.e., children first. When there isn't an obvious
347368
// order to operands, we follow them in order of execution.
348369

@@ -369,6 +390,10 @@ struct PostWalker : public Walker<SubType, VisitorType> {
369390
// Note that even if this ends up being a runtime check, it should be faster
370391
// than pushing empty tasks, as the check is much faster than the push/pop/
371392
// call, and a large number of our calls (most, perhaps) are not overridden.
393+
//
394+
// If we do *not* have an empty visitor, we can still optimize in the case
395+
// of a leaf: leaves have no children, so we can just call doVisit* rather
396+
// than push that task, pop it later, and call that.
372397
#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ <= 11
373398
#define DELEGATE_START(id) \
374399
if (&SubType::visit##id != \
@@ -377,17 +402,22 @@ struct PostWalker : public Walker<SubType, VisitorType> {
377402
self->pushTask(SubType::doVisit##id, currp); \
378403
} \
379404
[[maybe_unused]] auto* cast = curr->cast<id>();
380-
#else
405+
#else // constexpr
381406
#define DELEGATE_START(id) \
382407
if constexpr (&SubType::visit##id != \
383408
&Visitor<SubType, \
384409
typename SubType::ReturnType>::visit##id || \
385410
&SubType::doVisit##id != \
386411
&Walker<SubType, VisitorType>::doVisit##id) { \
412+
if constexpr (IsLeaf<id>::value && \
413+
&SubType::scan == &PostWalker<SubType, VisitorType>::scan) { \
414+
SubType::doVisit##id(self, currp); \
415+
return; \
416+
} \
387417
self->pushTask(SubType::doVisit##id, currp); \
388418
} \
389419
[[maybe_unused]] auto* cast = curr->cast<id>();
390-
#endif
420+
#endif // constexpr
391421

392422
#define DELEGATE_GET_FIELD(id, field) cast->field
393423

test/gtest/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ set(unittest_SOURCES
1212
dataflow.cpp
1313
dfa_minimization.cpp
1414
disjoint_sets.cpp
15+
leaves.cpp
1516
glbs.cpp
1617
interpreter.cpp
1718
intervals.cpp

test/gtest/leaves.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#include "wasm-traversal.h"
2+
#include "wasm.h"
3+
4+
#include "gtest/gtest.h"
5+
6+
using LeavesTest = ::testing::Test;
7+
8+
using namespace wasm;
9+
10+
TEST_F(LeavesTest, Manual) {
11+
// Verify some interesting cases manually.
12+
13+
// LocalGet is a leaf.
14+
EXPECT_TRUE(IsLeaf<LocalGet>::value);
15+
// GlobalSet is not a leaf due to a child.
16+
EXPECT_FALSE(IsLeaf<GlobalSet>::value);
17+
// Return is not a leaf due to an optional child.
18+
EXPECT_FALSE(IsLeaf<Return>::value);
19+
// Call is not a leaf due to a vector of children.
20+
EXPECT_FALSE(IsLeaf<Call>::value);
21+
}
22+
23+
TEST_F(LeavesTest, Automatic) {
24+
// Verify them all automatically.
25+
26+
// Count total expression classes and total with children.
27+
size_t total = 0, totalWithChildren = 0;
28+
29+
#define DELEGATE_FIELD_CASE_START(id) \
30+
{ \
31+
bool hasChildren = false;
32+
33+
#define DELEGATE_FIELD_CHILD(id, field) hasChildren = true;
34+
35+
#define DELEGATE_FIELD_OPTIONAL_CHILD(id, field) hasChildren = true;
36+
37+
#define DELEGATE_FIELD_CHILD_VECTOR(id, field) hasChildren = true;
38+
39+
// Verify that IsLeaf has the right value.
40+
#define DELEGATE_FIELD_CASE_END(id) \
41+
EXPECT_EQ(IsLeaf<id>::value, !hasChildren); \
42+
total++; \
43+
if (hasChildren) { \
44+
totalWithChildren++; \
45+
} \
46+
}
47+
48+
#define DELEGATE_FIELD_INT(id, field)
49+
#define DELEGATE_FIELD_LITERAL(id, field)
50+
#define DELEGATE_FIELD_NAME(id, field)
51+
#define DELEGATE_FIELD_SCOPE_NAME_DEF(id, field)
52+
#define DELEGATE_FIELD_SCOPE_NAME_USE(id, field)
53+
#define DELEGATE_FIELD_TYPE(id, field)
54+
#define DELEGATE_FIELD_HEAPTYPE(id, field)
55+
#define DELEGATE_FIELD_ADDRESS(id, field)
56+
#define DELEGATE_FIELD_INT_ARRAY(id, field)
57+
#define DELEGATE_FIELD_INT_VECTOR(id, field)
58+
#define DELEGATE_FIELD_NAME_VECTOR(id, field)
59+
#define DELEGATE_FIELD_NAME_USE_VECTOR(id, field)
60+
#define DELEGATE_FIELD_TYPE_VECTOR(id, field)
61+
#define DELEGATE_FIELD_SCOPE_NAME_USE_VECTOR(id, field)
62+
63+
#define DELEGATE_FIELD_MAIN_START
64+
#define DELEGATE_FIELD_MAIN_END
65+
66+
#include "wasm-delegations-fields.def"
67+
68+
// Not all have children (this just verifies the macros are actually doing
69+
// something).
70+
EXPECT_LT(totalWithChildren, total);
71+
EXPECT_GT(totalWithChildren, 0);
72+
EXPECT_GT(total, 0);
73+
}

0 commit comments

Comments
 (0)