Skip to content

Commit 4301eae

Browse files
authored
Use fewer scratch locals in IRBuilder (#8608)
When popping children for an expression, greedily pop none-typed expressions below the last value-producing expression in the stack, packaging all the popped instructions into a new block. This avoids leaving the none-typed expressions on top of the stack, which is good because having them on top of the stack would force the use of a scratch local when the next operand is popped. Besides producing better IR with fewer scratch locals, this also has the benefit of better round-tripping IR through binaries. The binary writer has an optimization where it will elide unnamed blocks because they cannot possibly be branch targets, but this could create "stacky" code that would previously introduce a scratch local when it was parsed back into IR. Now IRBuilder just recreates exactly the unnamed block that had been present in the IR in first place. While we don't generally guarantee that we can perfectly round-trip IR through binaries, this reduces the number of cases where round-trips lead to increased code size. Fixes #8413.
1 parent ea98200 commit 4301eae

File tree

7 files changed

+136
-201
lines changed

7 files changed

+136
-201
lines changed

src/wasm-ir-builder.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -713,15 +713,20 @@ class IRBuilder : public UnifiedExpressionVisitor<IRBuilder, Result<>> {
713713
Result<Index> addScratchLocal(Type);
714714

715715
struct HoistedVal {
716-
// The index in the stack of the original value-producing expression.
717-
Index valIndex;
716+
// The index in the stack of the deepest expression to be popped. This can
717+
// be the original value-producing expression, or if we are popping
718+
// greedily, it might be the deepest none-typed expression under the
719+
// value-producing expression.
720+
Index hoistIndex;
718721
// The local.get placed on the stack, if any.
719722
LocalGet* get;
720723
};
721724

722725
// Find the last value-producing expression, if any, and hoist its value to
723-
// the top of the stack using a scratch local if necessary.
724-
MaybeResult<HoistedVal> hoistLastValue();
726+
// the top of the stack using a scratch local if necessary. If `greedy`, then
727+
// also include none-typed expressions and the value-producing expression in
728+
// the hoisted range of expressions.
729+
MaybeResult<HoistedVal> hoistLastValue(bool greedy = false);
725730
// Transform the stack as necessary such that the original producer of the
726731
// hoisted value will be popped along with the final expression that produces
727732
// the value, if they are different. May only be called directly after

src/wasm/wasm-ir-builder.cpp

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -61,29 +61,37 @@ Result<Index> IRBuilder::addScratchLocal(Type type) {
6161
return Builder::addVar(func, name, type);
6262
}
6363

64-
MaybeResult<IRBuilder::HoistedVal> IRBuilder::hoistLastValue() {
64+
MaybeResult<IRBuilder::HoistedVal> IRBuilder::hoistLastValue(bool greedy) {
6565
auto& stack = getScope().exprStack;
66-
int index = stack.size() - 1;
67-
for (; index >= 0; --index) {
68-
if (stack[index]->type != Type::none) {
66+
int valIndex = stack.size() - 1;
67+
for (; valIndex >= 0; --valIndex) {
68+
if (stack[valIndex]->type != Type::none) {
6969
break;
7070
}
7171
}
72-
if (index < 0) {
72+
if (valIndex < 0) {
7373
// There is no value-producing or unreachable expression.
7474
return {};
7575
}
76-
if (unsigned(index) == stack.size() - 1) {
76+
77+
int hoistIndex = valIndex;
78+
if (greedy) {
79+
while (hoistIndex > 0 && stack[hoistIndex - 1]->type == Type::none) {
80+
--hoistIndex;
81+
}
82+
}
83+
84+
if (unsigned(valIndex) == stack.size() - 1) {
7785
// Value-producing expression already on top of the stack.
78-
return HoistedVal{Index(index), nullptr};
86+
return HoistedVal{Index(hoistIndex), nullptr};
7987
}
80-
auto*& expr = stack[index];
88+
auto*& expr = stack[valIndex];
8189
if (expr->type == Type::unreachable) {
8290
// Make sure the top of the stack also has an unreachable expression.
8391
if (stack.back()->type != Type::unreachable) {
8492
pushSynthetic(builder.makeUnreachable());
8593
}
86-
return HoistedVal{Index(index), nullptr};
94+
return HoistedVal{Index(hoistIndex), nullptr};
8795
}
8896
// Hoist with a scratch local. Normally the scratch local is the same type as
8997
// the hoisted expression, but we may need to adjust it given the enabled
@@ -99,7 +107,7 @@ MaybeResult<IRBuilder::HoistedVal> IRBuilder::hoistLastValue() {
99107
expr = builder.makeLocalSet(*scratchIdx, expr);
100108
auto* get = builder.makeLocalGet(*scratchIdx, type);
101109
pushSynthetic(get);
102-
return HoistedVal{Index(index), get};
110+
return HoistedVal{Index(hoistIndex), get};
103111
}
104112

105113
Result<> IRBuilder::packageHoistedValue(const HoistedVal& hoisted,
@@ -113,17 +121,17 @@ Result<> IRBuilder::packageHoistedValue(const HoistedVal& hoisted,
113121
// we are synthesizing a block to help us determine later whether we need to
114122
// run the nested pop fixup.
115123
scopeStack[0].noteSyntheticBlock();
116-
std::vector<Expression*> exprs(scope.exprStack.begin() + hoisted.valIndex,
124+
std::vector<Expression*> exprs(scope.exprStack.begin() + hoisted.hoistIndex,
117125
scope.exprStack.end());
118126
auto* block = builder.makeBlock(exprs, type);
119-
scope.exprStack.resize(hoisted.valIndex);
127+
scope.exprStack.resize(hoisted.hoistIndex);
120128
pushSynthetic(block);
121129
};
122130

123131
auto type = scope.exprStack.back()->type;
124132

125133
if (type.size() == sizeHint || type.size() <= 1) {
126-
if (hoisted.get) {
134+
if (hoisted.hoistIndex < scope.exprStack.size() - 1) {
127135
packageAsBlock(type);
128136
}
129137
return Ok{};
@@ -379,8 +387,10 @@ struct IRBuilder::ChildPopper
379387
continue;
380388
}
381389

382-
// Pop a child normally.
383-
auto val = pop(children[i].constraint.size());
390+
// Pop a child normally. Pop greedily for children other than the first
391+
// (i.e. the last to be popped).
392+
bool greedy = i > 0;
393+
auto val = pop(children[i].constraint.size(), greedy);
384394
CHECK_ERR(val);
385395
*children[i].childp = *val;
386396
}
@@ -458,12 +468,22 @@ struct IRBuilder::ChildPopper
458468
return false;
459469
}
460470

461-
Result<Expression*> pop(size_t size) {
471+
// If `greedy`, then we will pop additional none-typed expressions that come
472+
// before the value-producing expression. The additional expressions will be
473+
// packaged into a block with the value-producing expression. This is better
474+
// than leaving them on top of the stack, where they will force the use of a
475+
// scratch local when the next operand is popped. `greedy` should be used when
476+
// popping all children of an expression except the first (i.e. the
477+
// last child to be popped). Not being greedy for the last popped child defers
478+
// the creation of a block to hold its none-typed predecessors. It may turn
479+
// out that such a block is not necessary, for example when the none-typed
480+
// expressions can be included directly into a parent block scope.
481+
Result<Expression*> pop(size_t size, bool greedy = false) {
462482
assert(size >= 1);
463483
auto& scope = builder.getScope();
464484

465485
// Find the suffix of expressions that do not produce values.
466-
auto hoisted = builder.hoistLastValue();
486+
auto hoisted = builder.hoistLastValue(greedy);
467487
CHECK_ERR(hoisted);
468488
if (!hoisted) {
469489
// There are no expressions that produce values.
@@ -489,7 +509,7 @@ struct IRBuilder::ChildPopper
489509
std::vector<Expression*> elems;
490510
elems.resize(size);
491511
for (int i = size - 1; i >= 0; --i) {
492-
auto elem = pop(1);
512+
auto elem = pop(1, greedy || i > 0);
493513
CHECK_ERR(elem);
494514
elems[i] = *elem;
495515
}

test/lit/basic/extra-branch-values.wast

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,17 +2383,15 @@
23832383
;; CHECK-NEXT: (local $scratch_6 i32)
23842384
;; CHECK-NEXT: (local $scratch_7 i32)
23852385
;; CHECK-NEXT: (local $scratch_8 (ref any))
2386-
;; CHECK-NEXT: (local $scratch_9 i32)
2387-
;; CHECK-NEXT: (local $scratch_10 anyref)
2388-
;; CHECK-NEXT: (local $scratch_11 i32)
2389-
;; CHECK-NEXT: (local $scratch_12 (ref any))
2390-
;; CHECK-NEXT: (local $scratch_13 i32)
2391-
;; CHECK-NEXT: (local $scratch_14 eqref)
2386+
;; CHECK-NEXT: (local $scratch_9 anyref)
2387+
;; CHECK-NEXT: (local $scratch_10 i32)
2388+
;; CHECK-NEXT: (local $scratch_11 (ref any))
2389+
;; CHECK-NEXT: (local $scratch_12 eqref)
23922390
;; CHECK-NEXT: (local.set $scratch
23932391
;; CHECK-NEXT: (local.get $0)
23942392
;; CHECK-NEXT: )
23952393
;; CHECK-NEXT: (block $label (type $0) (result i32 eqref)
2396-
;; CHECK-NEXT: (local.set $scratch_14
2394+
;; CHECK-NEXT: (local.set $scratch_12
23972395
;; CHECK-NEXT: (block $label0 (result eqref)
23982396
;; CHECK-NEXT: (br $label
23992397
;; CHECK-NEXT: (if (type $0) (result i32 eqref)
@@ -2416,46 +2414,40 @@
24162414
;; CHECK-NEXT: )
24172415
;; CHECK-NEXT: )
24182416
;; CHECK-NEXT: (tuple.make 2
2419-
;; CHECK-NEXT: (block (result i32)
2420-
;; CHECK-NEXT: (local.set $scratch_9
2421-
;; CHECK-NEXT: (local.get $scratch_6)
2422-
;; CHECK-NEXT: )
2417+
;; CHECK-NEXT: (local.get $scratch_6)
2418+
;; CHECK-NEXT: (block (result (ref eq))
24232419
;; CHECK-NEXT: (global.set $any
24242420
;; CHECK-NEXT: (local.get $scratch_8)
24252421
;; CHECK-NEXT: )
2426-
;; CHECK-NEXT: (local.get $scratch_9)
2422+
;; CHECK-NEXT: (global.get $eq)
24272423
;; CHECK-NEXT: )
2428-
;; CHECK-NEXT: (global.get $eq)
24292424
;; CHECK-NEXT: )
24302425
;; CHECK-NEXT: )
24312426
;; CHECK-NEXT: (else
24322427
;; CHECK-NEXT: (local.set $scratch_6
24332428
;; CHECK-NEXT: (block (result i32)
2434-
;; CHECK-NEXT: (local.set $scratch_11
2429+
;; CHECK-NEXT: (local.set $scratch_10
24352430
;; CHECK-NEXT: (local.get $scratch)
24362431
;; CHECK-NEXT: )
2437-
;; CHECK-NEXT: (local.set $scratch_10
2432+
;; CHECK-NEXT: (local.set $scratch_9
24382433
;; CHECK-NEXT: (local.get $3)
24392434
;; CHECK-NEXT: )
2440-
;; CHECK-NEXT: (local.get $scratch_11)
2435+
;; CHECK-NEXT: (local.get $scratch_10)
24412436
;; CHECK-NEXT: )
24422437
;; CHECK-NEXT: )
2443-
;; CHECK-NEXT: (local.set $scratch_12
2438+
;; CHECK-NEXT: (local.set $scratch_11
24442439
;; CHECK-NEXT: (br_on_cast $label0 anyref eqref
2445-
;; CHECK-NEXT: (local.get $scratch_10)
2440+
;; CHECK-NEXT: (local.get $scratch_9)
24462441
;; CHECK-NEXT: )
24472442
;; CHECK-NEXT: )
24482443
;; CHECK-NEXT: (tuple.make 2
2449-
;; CHECK-NEXT: (block (result i32)
2450-
;; CHECK-NEXT: (local.set $scratch_13
2451-
;; CHECK-NEXT: (local.get $scratch_6)
2452-
;; CHECK-NEXT: )
2444+
;; CHECK-NEXT: (local.get $scratch_6)
2445+
;; CHECK-NEXT: (block (result eqref)
24532446
;; CHECK-NEXT: (global.set $any
2454-
;; CHECK-NEXT: (local.get $scratch_12)
2447+
;; CHECK-NEXT: (local.get $scratch_11)
24552448
;; CHECK-NEXT: )
2456-
;; CHECK-NEXT: (local.get $scratch_13)
2449+
;; CHECK-NEXT: (global.get $eqref)
24572450
;; CHECK-NEXT: )
2458-
;; CHECK-NEXT: (global.get $eqref)
24592451
;; CHECK-NEXT: )
24602452
;; CHECK-NEXT: )
24612453
;; CHECK-NEXT: )
@@ -2464,7 +2456,7 @@
24642456
;; CHECK-NEXT: )
24652457
;; CHECK-NEXT: (tuple.make 2
24662458
;; CHECK-NEXT: (local.get $scratch_6)
2467-
;; CHECK-NEXT: (local.get $scratch_14)
2459+
;; CHECK-NEXT: (local.get $scratch_12)
24682460
;; CHECK-NEXT: )
24692461
;; CHECK-NEXT: )
24702462
;; CHECK-NEXT: )

test/lit/passes/roundtrip-gc.wast

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)