Skip to content

Commit 4d160bc

Browse files
committed
Add new multithreaded TwoQubitPeepholeOptimization pass
This commit adds a new transpiler pass for physical optimization, TwoQubitPeepholeOptimization. This replaces the use of Collect2qBlocks, ConsolidateBlocks, and UnitarySynthesis in the optimization stage for a default pass manager setup. The pass logically works the same way where it analyzes the dag to get a list of 2q runs, calculates the matrix of each run, and then synthesizes the matrix and substitutes it inplace. The distinction this pass makes though is it does this all in a single pass and also parallelizes the matrix calculation and synthesis steps because there is no data dependency there. This new pass is not meant to fully replace the Collect2qBlocks, ConsolidateBlocks, or UnitarySynthesis passes as those also run in contexts where we don't have a physical circuit. This is meant instead to replace their usage in the optimization stage only. Accordingly this new pass also changes the logic on how we select the synthesis to use and when to make a substituion. Previously this logic was primarily done via the ConsolidateBlocks pass by only consolidating to a UnitaryGate if the number of basis gates needed based on the weyl chamber coordinates was less than the number of 2q gates in the block (see #11659 for discussion on this). Since this new pass skips the explicit consolidation stage we go ahead and try all the available synthesizers Right now this commit has a number of limitations, the largest are: - Only supports the target - It doesn't support any synthesizers besides the TwoQubitBasisDecomposer, because it's the only one in rust currently. For plugin handling I left the logic as running the three pass series, but I'm not sure this is the behavior we want. We could say keep the synthesis plugins for `UnitarySynthesis` only and then rely on our built-in methods for physical optimiztion only. But this also seems less than ideal because the plugin mechanism is how we support synthesizing to custom basis gates, and also more advanced approximate synthesis methods. Both of those are things we need to do as part of the synthesis here. Additionally, this is currently missing tests and documentation and while running it manually "works" as in it returns a circuit that looks valid, I've not done any validation yet. This also likely will need several rounds of performance optimization and tuning. t this point this is just a rough proof of concept and will need a lof refinement along with larger changes to Qiskit's rust code before this is ready to merge. Fixes #12007 Fixes #11659
1 parent b258efc commit 4d160bc

10 files changed

Lines changed: 527 additions & 25 deletions

File tree

crates/accelerate/Cargo.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ license.workspace = true
99
name = "qiskit_accelerate"
1010
doctest = false
1111

12+
13+
[features]
14+
cache_pygates = ["qiskit-circuit/cache_pygates"]
15+
1216
[dependencies]
1317
rayon.workspace = true
1418
numpy.workspace = true
@@ -60,6 +64,3 @@ features = ["ndarray"]
6064
[dependencies.pulp]
6165
version = "0.18.22"
6266
features = ["macro"]
63-
64-
[features]
65-
cache_pygates = ["qiskit-circuit/cache_pygates"]

crates/accelerate/src/consolidate_blocks.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ pub(crate) fn consolidate_blocks(
107107
dag.get_qargs(inst.qubits),
108108
) {
109109
all_block_gates.insert(inst_node);
110-
let matrix = match get_matrix_from_inst(py, inst) {
110+
let matrix = match get_matrix_from_inst(inst) {
111111
Ok(mat) => mat,
112112
Err(_) => continue,
113113
};
@@ -198,7 +198,7 @@ pub(crate) fn consolidate_blocks(
198198
*block_qargs.iter().min().unwrap(),
199199
*block_qargs.iter().max().unwrap(),
200200
];
201-
let matrix = blocks_to_matrix(py, dag, &block, block_index_map).ok();
201+
let matrix = blocks_to_matrix(dag, &block, block_index_map).ok();
202202
if let Some(matrix) = matrix {
203203
if force_consolidate
204204
|| decomposer.num_basis_gates_inner(matrix.view()) < basis_count
@@ -252,7 +252,7 @@ pub(crate) fn consolidate_blocks(
252252
first_qubits,
253253
)
254254
{
255-
let matrix = match get_matrix_from_inst(py, first_inst) {
255+
let matrix = match get_matrix_from_inst(first_inst) {
256256
Ok(mat) => mat,
257257
Err(_) => continue,
258258
};
@@ -272,7 +272,7 @@ pub(crate) fn consolidate_blocks(
272272
already_in_block = true;
273273
}
274274
let gate = dag.dag()[*node].unwrap_operation();
275-
let operator = match get_matrix_from_inst(py, gate) {
275+
let operator = match get_matrix_from_inst(gate) {
276276
Ok(mat) => mat,
277277
Err(_) => {
278278
// Set this to skip this run because we can't compute the matrix of the

crates/accelerate/src/convert_2q_block_matrix.rs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,23 @@ use crate::euler_one_qubit_decomposer::matmul_1q;
3131
use crate::QiskitError;
3232

3333
#[inline]
34-
pub fn get_matrix_from_inst(py: Python, inst: &PackedInstruction) -> PyResult<Array2<Complex64>> {
34+
pub fn get_matrix_from_inst(inst: &PackedInstruction) -> PyResult<Array2<Complex64>> {
3535
if let Some(mat) = inst.op.matrix(inst.params_view()) {
3636
Ok(mat)
3737
} else if inst.op.try_standard_gate().is_some() {
3838
Err(QiskitError::new_err(
3939
"Parameterized gates can't be consolidated",
4040
))
4141
} else if let OperationRef::Gate(gate) = inst.op.view() {
42-
Ok(QI_OPERATOR
43-
.get_bound(py)
44-
.call1((gate.gate.clone_ref(py),))?
45-
.getattr(intern!(py, "data"))?
46-
.extract::<PyReadonlyArray2<Complex64>>()?
47-
.as_array()
48-
.to_owned())
42+
Python::with_gil(|py| {
43+
Ok(QI_OPERATOR
44+
.get_bound(py)
45+
.call1((gate.gate.clone_ref(py),))?
46+
.getattr(intern!(py, "data"))?
47+
.extract::<PyReadonlyArray2<Complex64>>()?
48+
.as_array()
49+
.to_owned())
50+
})
4951
} else {
5052
Err(QiskitError::new_err(
5153
"Can't compute matrix of non-unitary op",
@@ -55,7 +57,6 @@ pub fn get_matrix_from_inst(py: Python, inst: &PackedInstruction) -> PyResult<Ar
5557

5658
/// Return the matrix Operator resulting from a block of Instructions.
5759
pub fn blocks_to_matrix(
58-
py: Python,
5960
dag: &DAGCircuit,
6061
op_list: &[NodeIndex],
6162
block_index_map: [Qubit; 2],
@@ -73,7 +74,7 @@ pub fn blocks_to_matrix(
7374
let mut output_matrix: Option<Array2<Complex64>> = None;
7475
for node in op_list {
7576
let inst = dag.dag()[*node].unwrap_operation();
76-
let op_matrix = get_matrix_from_inst(py, inst)?;
77+
let op_matrix = get_matrix_from_inst(inst)?;
7778
match dag
7879
.get_qargs(inst.qubits)
7980
.iter()

crates/accelerate/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ pub mod synthesis;
5656
pub mod target_transpiler;
5757
pub mod twirling;
5858
pub mod two_qubit_decompose;
59+
pub mod two_qubit_peephole;
5960
pub mod uc_gate;
6061
pub mod unitary_synthesis;
6162
pub mod utils;

crates/accelerate/src/two_qubit_decompose.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,9 +1244,9 @@ type TwoQubitSequenceVec = Vec<(Option<StandardGate>, SmallVec<[f64; 3]>, SmallV
12441244
#[derive(Clone, Debug)]
12451245
#[pyclass(sequence)]
12461246
pub struct TwoQubitGateSequence {
1247-
gates: TwoQubitSequenceVec,
1247+
pub gates: TwoQubitSequenceVec,
12481248
#[pyo3(get)]
1249-
global_phase: f64,
1249+
pub global_phase: f64,
12501250
}
12511251

12521252
impl TwoQubitGateSequence {
@@ -1709,7 +1709,7 @@ impl TwoQubitBasisDecomposer {
17091709
gate: String,
17101710
gate_matrix: ArrayView2<Complex64>,
17111711
basis_fidelity: f64,
1712-
euler_basis: &str,
1712+
euler_basis: EulerBasis,
17131713
pulse_optimize: Option<bool>,
17141714
) -> PyResult<Self> {
17151715
let ipz: ArrayView2<Complex64> = aview2(&IPZ);
@@ -1817,7 +1817,7 @@ impl TwoQubitBasisDecomposer {
18171817
Ok(TwoQubitBasisDecomposer {
18181818
gate,
18191819
basis_fidelity,
1820-
euler_basis: EulerBasis::__new__(euler_basis)?,
1820+
euler_basis,
18211821
pulse_optimize,
18221822
basis_decomposer,
18231823
super_controlled,
@@ -1986,7 +1986,7 @@ impl TwoQubitBasisDecomposer {
19861986
gate,
19871987
gate_matrix.as_array(),
19881988
basis_fidelity,
1989-
euler_basis,
1989+
EulerBasis::__new__(euler_basis)?,
19901990
pulse_optimize,
19911991
)
19921992
}
@@ -2284,8 +2284,13 @@ fn two_qubit_decompose_up_to_diagonal(
22842284
let (su4, phase) = u4_to_su4(mat_arr);
22852285
let mut real_map = real_trace_transform(su4.view());
22862286
let mapped_su4 = real_map.dot(&su4.view());
2287-
let decomp =
2288-
TwoQubitBasisDecomposer::new_inner("cx".to_string(), aview2(&CX_GATE), 1.0, "U", None)?;
2287+
let decomp = TwoQubitBasisDecomposer::new_inner(
2288+
"cx".to_string(),
2289+
aview2(&CX_GATE),
2290+
1.0,
2291+
EulerBasis::__new__("U")?,
2292+
None,
2293+
)?;
22892294

22902295
let circ_seq = decomp.call_inner(mapped_su4.view(), None, true, None)?;
22912296
let circ = CircuitData::from_standard_gates(

0 commit comments

Comments
 (0)