Skip to content
Merged
2 changes: 2 additions & 0 deletions crates/accelerate/src/circuit_library/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ use pyo3::prelude::*;

mod entanglement;
mod pauli_feature_map;
mod quantum_volume;

pub fn circuit_library(m: &Bound<PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(pauli_feature_map::pauli_feature_map))?;
m.add_wrapped(wrap_pyfunction!(entanglement::get_entangler_map))?;
m.add_wrapped(wrap_pyfunction!(quantum_volume::quantum_volume))?;
Ok(())
}
149 changes: 149 additions & 0 deletions crates/accelerate/src/circuit_library/quantum_volume.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// This code is part of Qiskit.
//
// (C) Copyright IBM 2024
//
// This code is licensed under the Apache License, Version 2.0. You may
// obtain a copy of this license in the LICENSE.txt file in the root directory
// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
//
// Any modifications or derivative works of this code must retain this
// copyright notice, and modified files need to carry a notice indicating
// that they have been altered from the originals.

use std::thread::available_parallelism;

use qiskit_circuit::imports::UNITARY_GATE;

use pyo3::prelude::*;

use crate::getenv_use_multiple_threads;
use faer_ext::{IntoFaerComplex, IntoNdarrayComplex};
use ndarray::prelude::*;
use num_complex::Complex64;
use numpy::IntoPyArray;
use rand::prelude::*;
use rand_distr::Normal;
use rand_pcg::Pcg64Mcg;
use rayon::prelude::*;

use qiskit_circuit::circuit_data::CircuitData;
use qiskit_circuit::operations::Param;
use qiskit_circuit::operations::PyInstruction;
use qiskit_circuit::packed_instruction::PackedOperation;
use qiskit_circuit::Qubit;
use smallvec::smallvec;

#[inline]
Comment thread
Cryoris marked this conversation as resolved.
fn random_unitaries(seed: u64, size: usize) -> impl Iterator<Item = Array2<Complex64>> {
let mut rng = Pcg64Mcg::seed_from_u64(seed);
let dist = Normal::new(0., 1.0).unwrap();

(0..size).map(move |_| {
let mut z: Array2<Complex64> = Array2::from_shape_simple_fn((4, 4), || {
Complex64::new(dist.sample(&mut rng), dist.sample(&mut rng))
});
z.mapv_inplace(|x| x * std::f64::consts::FRAC_1_SQRT_2);
let qr = z.view().into_faer_complex().qr();
let r = qr.compute_r().as_ref().into_ndarray_complex().to_owned();
let mut d = r.into_diag();
d.mapv_inplace(|d| d / d.norm());
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this re-compute the norm of d in every application? If yes would it make sense to pre-compute d.norm()?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does an in-place substitution on the diagonal d so that each value also named d (which is probably why this is confusing) is replaced with d / d.norm() for each element d. I'll rename the diagonal to diag or something to make it clear which is an element and which is the array.

Copy link
Copy Markdown
Member Author

@mtreinish mtreinish Sep 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering if I should rewrite this using fixed size arrays or nalgebra's Matrix4<Complex64> to make this all faster. Right now I'm pretty confident the overhead is mostly dealing with allocations for 2x2 arrays.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did this here: 32b4c1f the code is a lot more verbose now, but I think it's a bit more explicit and it'll be a bit more optimized (although the random unitary construction is not the bottleneck).

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! The bottleneck is the UnitaryGate creation Python-side as you wrote above, right? I think a 10x speedup (maybe more now with the explicit matrix allocation?) is already great, plus we have the circuit as function reaching the goal of #13046 😄 But if you'd like to test with pre-generating the gates I'm also happy to wait 🙂

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we are totally bottlenecked by the UnitaryGate object creation, I ran a profile with the current state of the PR running the test script I was using to generate the data for the graph I shared above. In serial mode (py-spy doesn't handle the multithreading well) and the UnitaryGate object creation is taking ~58% of the time, while the random unitary generation is only ~30% of the runtime (almost all of that is performing the qr factorization).

I'll do a quick test of serial mode dumping the unitaries to a vec first to see if that makes a difference performance wise (I don't expect it to), and will leave a comment here with the results.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

times

I reran the numbers collecting the unitaries into a vec and then iterating off of that instead of passing the iterator directly to the circuit constructor. It looks like it's marginally faster to collect into a vec.

let mut q = qr.compute_q().as_ref().into_ndarray_complex().to_owned();
q.axis_iter_mut(Axis(0)).for_each(|mut row| {
Comment thread
Cryoris marked this conversation as resolved.
row.iter_mut()
.enumerate()
.for_each(|(index, val)| *val *= d.diag()[index])
});
q
})
}

#[pyfunction]
pub fn quantum_volume(
py: Python,
num_qubits: u32,
depth: usize,
seed: Option<u64>,
) -> PyResult<CircuitData> {
let width = num_qubits as usize / 2;
let num_unitaries = width * depth;
let mut permutation: Vec<Qubit> = (0..num_qubits).map(Qubit).collect();

let mut build_instruction = |(unitary_index, unitary_array): (usize, Array2<Complex64>),
rng: &mut Pcg64Mcg| {
let layer_index = unitary_index % width;
if layer_index == 0 {
permutation.shuffle(rng);
}
let unitary = unitary_array.into_pyarray_bound(py);
let unitary_gate = UNITARY_GATE
.get_bound(py)
.call1((unitary.clone(), py.None(), false))
.unwrap();
let instruction = PyInstruction {
qubits: 2,
clbits: 0,
params: 1,
op_name: "unitary".to_string(),
control_flow: false,
instruction: unitary_gate.unbind(),
};
let qubit = layer_index * 2;
(
PackedOperation::from_instruction(Box::new(instruction)),
smallvec![Param::Obj(unitary.unbind().into())],
vec![permutation[qubit], permutation[qubit + 1]],
vec![],
)
};

if getenv_use_multiple_threads() {
let mut per_thread = num_unitaries / available_parallelism().unwrap();
if per_thread == 0 {
if num_unitaries > 10 {
per_thread = 10
} else {
per_thread = num_unitaries
}
}

let mut outer_rng = match seed {
Some(seed) => Pcg64Mcg::seed_from_u64(seed),
None => Pcg64Mcg::from_entropy(),
};
let seed_vec: Vec<u64> = outer_rng
.clone()
.sample_iter(&rand::distributions::Standard)
.take(num_unitaries)
.collect();
let unitaries: Vec<Array2<Complex64>> = seed_vec
.into_par_iter()
.chunks(per_thread)
.flat_map_iter(|seeds| random_unitaries(seeds[0], seeds.len()))
.collect();
CircuitData::from_packed_operations(
py,
num_qubits,
0,
unitaries
.into_iter()
.enumerate()
.map(|x| build_instruction(x, &mut outer_rng)),
Param::Float(0.),
)
} else {
let mut outer_rng = match seed {
Some(seed) => Pcg64Mcg::seed_from_u64(seed),
None => Pcg64Mcg::from_entropy(),
};
let seed: u64 = outer_rng.sample(rand::distributions::Standard);
CircuitData::from_packed_operations(
py,
num_qubits,
0,
random_unitaries(seed, num_unitaries)
.enumerate()
.map(|x| build_instruction(x, &mut outer_rng)),
Param::Float(0.),
)
}
}
3 changes: 2 additions & 1 deletion qiskit/circuit/library/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@
HiddenLinearFunction
IQP
QuantumVolume
quantum_volume
PhaseEstimation
GroverOperator
PhaseOracle
Expand Down Expand Up @@ -564,7 +565,7 @@
StatePreparation,
Initialize,
)
from .quantum_volume import QuantumVolume
from .quantum_volume import QuantumVolume, quantum_volume
from .fourier_checking import FourierChecking
from .graph_state import GraphState
from .hidden_linear_function import HiddenLinearFunction
Expand Down
35 changes: 35 additions & 0 deletions qiskit/circuit/library/quantum_volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import numpy as np
from qiskit.circuit import QuantumCircuit, CircuitInstruction
from qiskit.circuit.library.generalized_gates import PermutationGate, UnitaryGate
from qiskit._accelerate.circuit_library import quantum_volume as qv_rs


class QuantumVolume(QuantumCircuit):
Expand Down Expand Up @@ -113,3 +114,37 @@ def __init__(
base._append(CircuitInstruction(gate, qubits[qubit : qubit + 2]))
if not flatten:
self._append(CircuitInstruction(base.to_instruction(), tuple(self.qubits)))


def quantum_volume(
num_qubits: int,
depth: Optional[int] = None,
seed: Optional[Union[int, np.random.Generator]] = None,
Comment thread
mtreinish marked this conversation as resolved.
Outdated
) -> QuantumCircuit:
"""A quantum volume model circuit.

The model circuits are random instances of circuits used to measure
the Quantum Volume metric, as introduced in [1].

The model circuits consist of layers of Haar random
elements of SU(4) applied between corresponding pairs
of qubits in a random bipartition.

**Reference Circuit:**

.. plot::

from qiskit.circuit.library import quantum_volume
circuit = quantum_volume(5, 6, seed=10)
circuit.draw('mpl')

**References:**

[1] A. Cross et al. Validating quantum computers using
randomized model circuits, Phys. Rev. A 100, 032328 (2019).
[`arXiv:1811.12926 <https://arxiv.org/abs/1811.12926>`_]
Comment thread
mtreinish marked this conversation as resolved.
Outdated
"""
if isinstance(seed, np.random.Generator):
seed = seed.integers(0, dtype=np.uint64)
depth = depth or num_qubits
return QuantumCircuit._from_circuit_data(qv_rs(num_qubits, depth, seed))
11 changes: 11 additions & 0 deletions releasenotes/notes/add-qv-function-a8990e248d5e7e1a.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
features_circuits:
- |
Added a new function :func:`.quantum_volume` for generating a quantum volume
:class:`.QuantumCircuit` object as defined in A. Cross et al. Validating quantum computers
using randomized model circuits, Phys. Rev. A 100, 032328 (2019)
`https://link.aps.org/doi/10.1103/PhysRevA.100.032328 <https://link.aps.org/doi/10.1103/PhysRevA.100.032328>`__.
This new function differs from the existing :class:`.QuantumVolume` class in that it returns
a :class:`.QuantumCircuit` object instead of building a subclass object. The second is
that this new function is multithreaded and implemented in rust so it generates the output
circuit ~10x faster than the :class:`.QuantumVolume` class.
15 changes: 15 additions & 0 deletions test/python/circuit/library/test_quantum_volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from test.utils.base import QiskitTestCase
from qiskit.circuit.library import QuantumVolume
from qiskit.circuit.library.quantum_volume import quantum_volume


class TestQuantumVolumeLibrary(QiskitTestCase):
Expand All @@ -35,6 +36,20 @@ def test_qv_seed_reproducibility(self):
right = QuantumVolume(4, 4, seed=2024, flatten=True)
self.assertEqual(left, right)

def test_qv_function_seed_reproducibility(self):
"""Test qv circuit."""
left = quantum_volume(10, 10, seed=128)
right = quantum_volume(10, 10, seed=128)
self.assertEqual(left, right)

left = quantum_volume(10, 10, seed=256)
right = quantum_volume(10, 10, seed=256)
self.assertEqual(left, right)

left = quantum_volume(10, 10, seed=4196)
right = quantum_volume(10, 10, seed=4196)
self.assertEqual(left, right)


if __name__ == "__main__":
unittest.main()