@@ -215,8 +215,17 @@ class SparsePauliOpGPUComposeBench:
215215 ``_GPU_COMPOSE_THRESHOLD`` (5 000 000), ensuring the CuPy branch is taken.
216216 """
217217
218- # (num_qubits, num_terms): product num_terms^2 * num_qubits must be > 5_000_000
219- params = [[10 , 800 ], [20 , 520 ], [30 , 420 ]]
218+ # "num_qubits,num_terms": product num_terms^2 * num_qubits must be > 5_000_000
219+ # Rows span just-above-threshold (~6M) up to large tensors (~50M) at varying
220+ # qubit counts, so we can see how GPU speedup scales with both tensor size and shape.
221+ params = [
222+ # ~6M elements (just above threshold)
223+ "10,800" , "20,520" , "30,420" ,
224+ # ~20M elements
225+ "10,1500" , "20,1000" , "30,820" , "50,640" ,
226+ # ~50M elements
227+ "10,2300" , "20,1600" , "50,1000" ,
228+ ]
220229 param_names = ["num_qubits,num_terms" ]
221230
222231 def setup (self , num_qubits_num_terms ):
@@ -225,7 +234,7 @@ def setup(self, num_qubits_num_terms):
225234 except ImportError as exc :
226235 raise NotImplementedError ("CuPy not installed" ) from exc
227236
228- num_qubits , num_terms = num_qubits_num_terms
237+ num_qubits , num_terms = map ( int , num_qubits_num_terms . split ( "," ))
229238 self .p1 = SparsePauliOp (
230239 random_pauli_list (num_qubits = num_qubits , size = num_terms , phase = True )
231240 )
@@ -245,3 +254,48 @@ def time_compose_cpu(self, _):
245254
246255 with unittest .mock .patch .object (_spo , "_GPU_COMPOSE_THRESHOLD" , 10 ** 18 ):
247256 self .p1 .compose (self .p2 )
257+
258+
259+ class SparsePauliOpGPUComposeQargsBench :
260+ """Benchmark SparsePauliOp.compose with qargs on GPU vs CPU.
261+
262+ Uses a larger operator composed onto a subset of qubits, exercising the
263+ cp.repeat + scatter path added alongside the qargs=None GPU path.
264+ """
265+
266+ # "total_qubits,sub_qubits,num_terms": compose a sub_qubits operator onto
267+ # a subset of a total_qubits operator. num_terms^2 * sub_qubits > 5_000_000.
268+ params = [
269+ "20,10,800" , "30,10,800" , "50,10,800" ,
270+ "30,20,520" , "50,20,520" ,
271+ "50,30,420" ,
272+ ]
273+ param_names = ["total_qubits,sub_qubits,num_terms" ]
274+
275+ def setup (self , params ):
276+ try :
277+ import cupy # noqa: F401 pylint: disable=import-outside-toplevel
278+ except ImportError as exc :
279+ raise NotImplementedError ("CuPy not installed" ) from exc
280+
281+ total_qubits , sub_qubits , num_terms = map (int , params .split ("," ))
282+ self .p1 = SparsePauliOp (
283+ random_pauli_list (num_qubits = total_qubits , size = num_terms , phase = True )
284+ )
285+ self .p2 = SparsePauliOp (
286+ random_pauli_list (num_qubits = sub_qubits , size = num_terms , phase = True )
287+ )
288+ self .qargs = list (range (sub_qubits ))
289+
290+ def time_compose_qargs_gpu (self , _ ):
291+ """GPU path: compose smaller op onto subset of qubits."""
292+ self .p1 .compose (self .p2 , qargs = self .qargs )
293+
294+ def time_compose_qargs_cpu (self , _ ):
295+ """CPU path on same inputs for direct comparison."""
296+ from qiskit .quantum_info .operators .symplectic import ( # pylint: disable=import-outside-toplevel
297+ sparse_pauli_op as _spo ,
298+ )
299+
300+ with unittest .mock .patch .object (_spo , "_GPU_COMPOSE_THRESHOLD" , 10 ** 18 ):
301+ self .p1 .compose (self .p2 , qargs = self .qargs )
0 commit comments