-
Notifications
You must be signed in to change notification settings - Fork 850
perf: avoid O(N^2) exiting-branch checks in CodeFolding #8599
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,6 +63,7 @@ | |
| #include "ir/effects.h" | ||
| #include "ir/eh-utils.h" | ||
| #include "ir/find_all.h" | ||
| #include "ir/iteration.h" | ||
| #include "ir/label-utils.h" | ||
| #include "ir/utils.h" | ||
| #include "pass.h" | ||
|
|
@@ -135,6 +136,11 @@ struct CodeFolding | |
| modifieds; // modified code should not be processed | ||
| // again, wait for next pass | ||
|
|
||
| // Cache of expressions that have branches exiting to targets defined | ||
| // outside them. Populated lazily on first access via PostWalker. | ||
| std::unordered_set<Expression*> exitingBranchCache_; | ||
|
Changqing-JING marked this conversation as resolved.
Outdated
|
||
| bool exitingBranchCachePopulated_ = false; | ||
|
|
||
| // walking | ||
|
|
||
| void visitExpression(Expression* curr) { | ||
|
|
@@ -299,13 +305,76 @@ struct CodeFolding | |
| returnTails.clear(); | ||
| unoptimizables.clear(); | ||
| modifieds.clear(); | ||
| exitingBranchCache_.clear(); | ||
| exitingBranchCachePopulated_ = false; | ||
| if (needEHFixups) { | ||
| EHUtils::handleBlockNestedPops(func, *getModule()); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| private: | ||
| // Check if an expression has branches that exit to targets defined outside | ||
| // it. The cache is populated lazily on first call using a PostWalker for | ||
| // efficient bottom-up traversal. | ||
| bool hasExitingBranches(Expression* expr) { | ||
| if (!exitingBranchCachePopulated_) { | ||
| populateExitingBranchCache(getFunction()->body); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like this still scans the entire function. I suggest that we only scan expr itself. That will still avoid re-computing things, but avoid scanning things that we never need to look at. This does require that the cache store a bool, so we know if we scanned or not, and if we did, if we found branches out or not. But I think that is worth it - usually we will scan very few things.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The per-expression cache would still be O(N^2) in the nested block case. AssemblyScript GC emits __visit_members with deeply nested blocks + br_table, where the nesting level equals the number of classes (4000+ in real apps). Each nested block gets queried by optimizeTerminatingTails, and each query walks its overlapping subtree independently, giving O(N + (N-1) + ... + 1) = O(N^2) total work even with the cache. We also cannot reuse a child's cached bool to compute a parent's result, because knowing "child has exiting branches" does not tell us which names exit -- the parent may define/resolve some of them. To compose results bottom-up, we would need to store the full set of unresolved names per expression. I benchmarked that approach (storing unordered_map<Expression*, unordered_set> and propagating name sets upward), but the per-node set allocation overhead on millions of nodes made -Oz significantly slower than the baseline (~13min vs ~5min). The whole-function scan avoids both issues by computing all results in a single O(N) pass using only integer counters, with no per-node name storage.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
What is the baseline here? (is it before this PR, or the PR's current state)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Current main costs 9min
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, thanks. Ok, it might really make sense to scan the whole function then, in a fast way, rather than less code in a slower way. |
||
| exitingBranchCachePopulated_ = true; | ||
| } | ||
| return exitingBranchCache_.count(expr); | ||
| } | ||
|
|
||
| // Pre-populate the exiting branch cache for all sub-expressions of root | ||
| // in a single O(N) bottom-up walk. After this, exitingBranchCache_ | ||
| // lookups are O(1). | ||
| void populateExitingBranchCache(Expression* root) { | ||
| struct CachePopulator | ||
| : public PostWalker<CachePopulator, | ||
| UnifiedExpressionVisitor<CachePopulator>> { | ||
| std::unordered_set<Expression*>& cache; | ||
| // Track unresolved branch targets at each node. We propagate children's | ||
| // targets upward: add uses, remove defs. If any remain, the expression | ||
| // has exiting branches. | ||
| std::unordered_map<Expression*, std::unordered_set<Name>> targetSets; | ||
|
|
||
| CachePopulator(std::unordered_set<Expression*>& cache) : cache(cache) {} | ||
|
|
||
| void visitExpression(Expression* curr) { | ||
| std::unordered_set<Name> targets; | ||
| // Merge children's target sets into ours (move to avoid copies) | ||
| ChildIterator children(curr); | ||
| for (auto* child : children) { | ||
| auto it = targetSets.find(child); | ||
| if (it != targetSets.end()) { | ||
| if (targets.empty()) { | ||
| targets = std::move(it->second); | ||
| } else { | ||
| targets.merge(it->second); | ||
| } | ||
| targetSets.erase(it); | ||
| } | ||
| } | ||
| // Add branch uses (names this expression branches to) | ||
| BranchUtils::operateOnScopeNameUses( | ||
| curr, [&](Name& name) { targets.insert(name); }); | ||
| // Remove branch defs (names this expression defines as targets) | ||
| BranchUtils::operateOnScopeNameDefs(curr, [&](Name& name) { | ||
| if (name.is()) { | ||
| targets.erase(name); | ||
| } | ||
| }); | ||
| bool hasExiting = !targets.empty(); | ||
| if (hasExiting) { | ||
| cache.insert(curr); | ||
| targetSets[curr] = std::move(targets); | ||
| } | ||
| } | ||
| }; | ||
| CachePopulator populator(exitingBranchCache_); | ||
| populator.walk(root); | ||
| } | ||
|
|
||
| // check if we can move a list of items out of another item. we can't do so | ||
| // if one of the items has a branch to something inside outOf that is not | ||
| // inside that item | ||
|
|
@@ -637,9 +706,7 @@ struct CodeFolding | |
| // TODO: this should not be a problem in | ||
| // *non*-terminating tails, but | ||
| // double-verify that | ||
| if (EffectAnalyzer( | ||
| getPassOptions(), *getModule(), newItem) | ||
| .hasExternalBreakTargets()) { | ||
| if (hasExitingBranches(newItem)) { | ||
| return true; | ||
| } | ||
| return false; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.