Skip to content

Commit 8f4bc5e

Browse files
Fix performance of Sabre rust<->Python boundary (#10652) (#10659)
* Fix performance of Sabre rust<->Python boundary In #10366 the SabreLayout and SabreSwap passes were refactored to support nested control flow blocks. As part of that refactor a new struct SabreResult was created to store the nested results for each block. This new class however resulted in PyO3 cloning the full swap map on every access. Since we have at least 1 access per gate in the circuit (and another one for each swap) this extra clone() adds a lot of extra overhead for deep circuits. In extreme cases this regression could be quite extreme. To address this the result format is changed to be a tuple (as it was originally), while this is less ergonomic the advantage it provides is that for nested objects it moves the rust object to the pyo3 interface so we avoid a copy as Python owns the object on the return. However, for control flow blocks we're still using the SabreResult class as it simplifies the internal implementation (which is why #10366 introduced the struct). The potential impact of this is mitigated by switching to only a single clone per control flow block, by only accessing the SabreResult object's attribute on each recursive call. However, if it is an issue in the future we can work on flattening the nested structure before returning it to python to avoid any clones. Fixes #10650 * Simplify recursive call logic in _apply_sabre_result This commit simplifies the logic in the recursive call logic in _apply_sabre_result to always use a tuple so we avoid an isinstance check. Co-authored-by: Kevin Hartman <kevin@hart.mn> --------- Co-authored-by: Kevin Hartman <kevin@hart.mn> (cherry picked from commit e6c431e) Co-authored-by: Matthew Treinish <mtreinish@kortar.org>
1 parent ac638d5 commit 8f4bc5e

4 files changed

Lines changed: 42 additions & 12 deletions

File tree

crates/accelerate/src/sabre_layout.rs

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#![allow(clippy::too_many_arguments)]
1313

1414
use ndarray::prelude::*;
15+
use numpy::IntoPyArray;
1516
use numpy::PyReadonlyArray2;
1617
use pyo3::prelude::*;
1718
use pyo3::wrap_pyfunction;
@@ -24,10 +25,12 @@ use crate::getenv_use_multiple_threads;
2425
use crate::nlayout::NLayout;
2526
use crate::sabre_swap::neighbor_table::NeighborTable;
2627
use crate::sabre_swap::sabre_dag::SabreDAG;
27-
use crate::sabre_swap::{build_swap_map_inner, Heuristic, SabreResult};
28+
use crate::sabre_swap::swap_map::SwapMap;
29+
use crate::sabre_swap::{build_swap_map_inner, Heuristic, NodeBlockResults, SabreResult};
2830

2931
#[pyfunction]
3032
pub fn sabre_layout_and_routing(
33+
py: Python,
3134
dag: &SabreDAG,
3235
neighbor_table: &NeighborTable,
3336
distance_matrix: PyReadonlyArray2<f64>,
@@ -36,7 +39,7 @@ pub fn sabre_layout_and_routing(
3639
num_swap_trials: usize,
3740
num_layout_trials: usize,
3841
seed: Option<u64>,
39-
) -> ([NLayout; 2], SabreResult) {
42+
) -> ([NLayout; 2], (SwapMap, PyObject, NodeBlockResults)) {
4043
let run_in_parallel = getenv_use_multiple_threads();
4144
let outer_rng = match seed {
4245
Some(seed) => Pcg64Mcg::seed_from_u64(seed),
@@ -47,7 +50,7 @@ pub fn sabre_layout_and_routing(
4750
.take(num_layout_trials)
4851
.collect();
4952
let dist = distance_matrix.as_array();
50-
if run_in_parallel && num_layout_trials > 1 {
53+
let res = if run_in_parallel && num_layout_trials > 1 {
5154
seed_vec
5255
.into_par_iter()
5356
.enumerate()
@@ -91,7 +94,15 @@ pub fn sabre_layout_and_routing(
9194
})
9295
.min_by_key(|(_, result)| result.map.map.values().map(|x| x.len()).sum::<usize>())
9396
.unwrap()
94-
}
97+
};
98+
(
99+
res.0,
100+
(
101+
res.1.map,
102+
res.1.node_order.into_pyarray(py).into(),
103+
res.1.node_block_results,
104+
),
105+
)
95106
}
96107

97108
fn layout_trial(

crates/accelerate/src/sabre_swap/mod.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,12 +208,13 @@ fn cmap_from_neighor_table(neighbor_table: &NeighborTable) -> DiGraph<(), ()> {
208208
/// Run sabre swap on a circuit
209209
///
210210
/// Returns:
211-
/// (SwapMap, gate_order): A tuple where the first element is a mapping of
211+
/// (SwapMap, gate_order, node_block_results): A tuple where the first element is a mapping of
212212
/// DAGCircuit node ids to a list of virtual qubit swaps that should be
213213
/// added before that operation. The second element is a numpy array of
214214
/// node ids that represents the traversal order used by sabre.
215215
#[pyfunction]
216216
pub fn build_swap_map(
217+
py: Python,
217218
num_qubits: usize,
218219
dag: &SabreDAG,
219220
neighbor_table: &NeighborTable,
@@ -223,9 +224,9 @@ pub fn build_swap_map(
223224
num_trials: usize,
224225
seed: Option<u64>,
225226
run_in_parallel: Option<bool>,
226-
) -> SabreResult {
227+
) -> (SwapMap, PyObject, NodeBlockResults) {
227228
let dist = distance_matrix.as_array();
228-
build_swap_map_inner(
229+
let res = build_swap_map_inner(
229230
num_qubits,
230231
dag,
231232
neighbor_table,
@@ -235,6 +236,11 @@ pub fn build_swap_map(
235236
layout,
236237
num_trials,
237238
run_in_parallel,
239+
);
240+
(
241+
res.map,
242+
res.node_order.into_pyarray(py).into(),
243+
res.node_block_results,
238244
)
239245
}
240246

qiskit/transpiler/passes/routing/sabre_swap.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -341,11 +341,14 @@ def empty_dag(node, block):
341341
return out
342342

343343
def apply_inner(out_dag, current_layout, qubit_indices_inner, result, id_to_node):
344-
for node_id in result.node_order:
344+
node_order = result[1]
345+
swap_map = result[0]
346+
node_block_results = result[2]
347+
for node_id in node_order:
345348
node = id_to_node[node_id]
346349
if isinstance(node.op, ControlFlowOp):
347350
# Handle control flow op and continue.
348-
block_results = result.node_block_results[node_id]
351+
block_results = node_block_results[node_id]
349352
mapped_block_dags = []
350353
idle_qubits = set(out_dag.qubits)
351354
for block, block_result in zip(node.op.blocks, block_results):
@@ -360,7 +363,11 @@ def apply_inner(out_dag, current_layout, qubit_indices_inner, result, id_to_node
360363
mapped_block_dag,
361364
mapped_block_layout,
362365
block_qubit_indices,
363-
block_result.result,
366+
(
367+
block_result.result.map,
368+
block_result.result.node_order,
369+
block_result.result.node_block_results,
370+
),
364371
block_id_to_node,
365372
)
366373

@@ -396,9 +403,9 @@ def apply_inner(out_dag, current_layout, qubit_indices_inner, result, id_to_node
396403
continue
397404

398405
# If we get here, the node isn't a control-flow gate.
399-
if node_id in result.map:
406+
if node_id in swap_map:
400407
process_swaps(
401-
result.map[node_id],
408+
swap_map[node_id],
402409
out_dag,
403410
current_layout,
404411
canonical_register,
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
fixes:
3+
- |
4+
Fixed a performance regression in the :class:`~.SabreLayout` and
5+
:class:`~.SabreSwap` transpiler passes.
6+
Fixed `#10650 <https://github.com/Qiskit/qiskit-terra/issues/10650>`__

0 commit comments

Comments
 (0)