@@ -55,74 +55,74 @@ class Buffer
5555
5656 // / Default constructor creates CPU buffer
5757 Buffer ()
58- : impl_(std::make_unique<CpuBufferImpl<T>>())
5958 {
59+ set_impl (std::make_unique<CpuBufferImpl<T, Allocator>>());
6060 }
6161
6262 // / Construct with a backend implementation (set once at construction).
6363 // / This is the only way to set a non-CPU backend; there is no post-
6464 // / construction setter, which avoids race conditions with concurrent reads.
6565 explicit Buffer (std::unique_ptr<BufferImplBase<T>> impl)
66- : impl_(std::move(impl))
6766 {
68- if (!impl_ ) {
67+ if (!impl ) {
6968 throw std::invalid_argument (" Buffer implementation must not be null" );
7069 }
70+ set_impl (std::move (impl));
7171 }
7272
7373 // / Construct with initial size (CPU backend)
7474 explicit Buffer (size_t count)
75- : impl_(std::make_unique<CpuBufferImpl<T>>())
7675 {
77- get_cpu_impl ()->get_storage ().resize (count);
76+ set_impl (std::make_unique<CpuBufferImpl<T, Allocator>>());
77+ cpu_impl_->get_storage ().resize (count);
7878 }
7979
8080 // / Construct with initial size and value (CPU backend)
8181 Buffer (size_t count, const T & value)
82- : impl_(std::make_unique<CpuBufferImpl<T>>())
8382 {
84- get_cpu_impl ()->get_storage ().assign (count, value);
83+ set_impl (std::make_unique<CpuBufferImpl<T, Allocator>>());
84+ cpu_impl_->get_storage ().assign (count, value);
8585 }
8686
8787 // / Construct from std::vector (copy) - for backward compatibility
88- Buffer (const std::vector<T> & vec) // NOLINT(runtime/explicit) - intentionally implicit
89- : impl_(std::make_unique<CpuBufferImpl<T>>())
88+ Buffer (const std::vector<T, Allocator> & vec) // NOLINT(runtime/explicit)
9089 {
91- get_cpu_impl ()->get_storage () = vec;
90+ set_impl (std::make_unique<CpuBufferImpl<T, Allocator>>());
91+ cpu_impl_->get_storage () = vec;
9292 }
9393
9494 // / Construct from std::vector (move) - for backward compatibility
95- Buffer (std::vector<T> && vec) // NOLINT(runtime/explicit) - intentionally implicit
96- : impl_(std::make_unique<CpuBufferImpl<T>>())
95+ Buffer (std::vector<T, Allocator> && vec) // NOLINT(runtime/explicit) - intentionally implicit
9796 {
98- get_cpu_impl ()->get_storage () = std::move (vec);
97+ set_impl (std::make_unique<CpuBufferImpl<T, Allocator>>());
98+ cpu_impl_->get_storage () = std::move (vec);
9999 }
100100
101101 // / Construct from initializer list - for backward compatibility
102102 Buffer (std::initializer_list<T> init)
103- : impl_(std::make_unique<CpuBufferImpl<T>>())
104103 {
105- get_cpu_impl ()->get_storage () = init;
104+ set_impl (std::make_unique<CpuBufferImpl<T, Allocator>>());
105+ cpu_impl_->get_storage () = init;
106106 }
107107
108108 // / Copy constructor (deep copy via clone())
109109 Buffer (const Buffer & other)
110- : impl_(other.impl_->clone ())
111110 {
111+ set_impl (other.impl_ ->clone ());
112112 }
113113
114114 // / Move constructor — the moved-from buffer is left in a valid, empty state.
115115 Buffer (Buffer && other) noexcept
116- : impl_(std::move(other.impl_))
117116 {
118- other.impl_ = std::make_unique<CpuBufferImpl<T>>();
117+ set_impl (std::move (other.impl_ ));
118+ other.set_impl (std::make_unique<CpuBufferImpl<T, Allocator>>());
119119 }
120120
121121 // / Copy assignment (deep copy via clone())
122122 Buffer & operator =(const Buffer & other)
123123 {
124124 if (this != &other) {
125- impl_ = other.impl_ ->clone ();
125+ set_impl ( other.impl_ ->clone () );
126126 }
127127 return *this ;
128128 }
@@ -131,8 +131,8 @@ class Buffer
131131 Buffer & operator =(Buffer && other) noexcept
132132 {
133133 if (this != &other) {
134- impl_ = std::move (other.impl_ );
135- other.impl_ = std::make_unique<CpuBufferImpl<T>>();
134+ set_impl ( std::move (other.impl_ ) );
135+ other.set_impl ( std::make_unique<CpuBufferImpl<T, Allocator >>() );
136136 }
137137 return *this ;
138138 }
@@ -142,29 +142,29 @@ class Buffer
142142 Buffer & operator =(std::initializer_list<T> init)
143143 {
144144 throw_if_not_cpu_backend ();
145- get_cpu_impl () ->get_storage () = init;
145+ cpu_impl_ ->get_storage () = init;
146146 return *this ;
147147 }
148148
149149 // / Assignment from std::vector (copy) - for backward compatibility
150150 // / Uses SFINAE to avoid ambiguity with initializer lists
151- template <typename U = std::vector<T>,
152- typename std::enable_if<std::is_same<U, std::vector<T>>::value, int >::type = 0 >
151+ template <typename U = std::vector<T, Allocator >,
152+ typename std::enable_if<std::is_same<U, std::vector<T, Allocator >>::value, int >::type = 0 >
153153 Buffer & operator =(const U & vec)
154154 {
155155 throw_if_not_cpu_backend ();
156- get_cpu_impl () ->get_storage () = vec;
156+ cpu_impl_ ->get_storage () = vec;
157157 return *this ;
158158 }
159159
160160 // / Assignment from std::vector (move) - for backward compatibility
161161 // / Uses SFINAE to avoid ambiguity with initializer lists
162- template <typename U = std::vector<T>,
163- typename std::enable_if<std::is_same<U, std::vector<T>>::value, int >::type = 0 >
162+ template <typename U = std::vector<T, Allocator >,
163+ typename std::enable_if<std::is_same<U, std::vector<T, Allocator >>::value, int >::type = 0 >
164164 Buffer & operator =(U && vec)
165165 {
166166 throw_if_not_cpu_backend ();
167- get_cpu_impl () ->get_storage () = std::move (vec);
167+ cpu_impl_ ->get_storage () = std::move (vec);
168168 return *this ;
169169 }
170170
@@ -414,32 +414,36 @@ class Buffer
414414
415415 // ========== Conversion Operators ==========
416416
417- // / Implicit conversion to std::vector<T>& (CPU only).
417+ // / Implicit conversion to std::vector<T, Allocator >& (CPU only).
418418 // / Provides backward compatibility with existing code.
419419 // / @throws std::runtime_error if backend is not CPU.
420- operator std::vector<T> &()
420+ operator std::vector<T, Allocator > &()
421421 {
422422 throw_if_not_cpu_backend ();
423- return get_cpu_impl () ->get_storage ();
423+ return cpu_impl_ ->get_storage ();
424424 }
425425
426- operator const std::vector<T> &() const
426+ operator const std::vector<T, Allocator > &() const
427427 {
428428 throw_if_not_cpu_backend ();
429- return get_cpu_impl () ->get_storage ();
429+ return cpu_impl_ ->get_storage ();
430430 }
431431
432- // / Escape hatch: Explicit conversion to std::vector<T> (all backends).
432+ // / Escape hatch: Explicit conversion to std::vector<T, Allocator > (all backends).
433433 // / For non-CPU backends, this triggers a copy to CPU memory.
434434 // / @return A std::vector containing a copy of the buffer data.
435- std::vector<T> to_vector () const
435+ std::vector<T, Allocator > to_vector () const
436436 {
437- if (get_backend_type () == " cpu" ) {
438- return get_cpu_impl ()->get_storage ();
437+ if (cpu_impl_) {
438+ return cpu_impl_->get_storage ();
439+ }
440+ auto cpu_copy = impl_->to_cpu ();
441+ auto * cpu = static_cast <CpuBufferImpl<T> *>(cpu_copy.get ());
442+ if constexpr (std::is_same_v<Allocator, std::allocator<T>>) {
443+ return std::move (cpu->get_storage ());
439444 } else {
440- auto cpu_impl_ptr = impl_->to_cpu ();
441- auto * cpu_impl = static_cast <CpuBufferImpl<T> *>(cpu_impl_ptr.get ());
442- return cpu_impl->get_storage ();
445+ auto & src = cpu->get_storage ();
446+ return std::vector<T, Allocator>(src.begin (), src.end ());
443447 }
444448 }
445449
@@ -450,8 +454,8 @@ class Buffer
450454 if (size () != other.size ()) {
451455 return false ;
452456 }
453- if (get_backend_type () == " cpu " && other.get_backend_type () == " cpu " ) {
454- return get_cpu_impl () ->get_storage () == other.get_cpu_impl () ->get_storage ();
457+ if (cpu_impl_ && other.cpu_impl_ ) {
458+ return cpu_impl_ ->get_storage () == other.cpu_impl_ ->get_storage ();
455459 }
456460 return to_vector () == other.to_vector ();
457461 }
@@ -479,10 +483,10 @@ class Buffer
479483 // / @throws std::runtime_error if backend is not CPU.
480484 void throw_if_not_cpu_backend () const
481485 {
482- const std::string bt = get_backend_type ();
483- if (bt != " cpu" ) {
486+ if (!cpu_impl_) {
484487 throw std::runtime_error (
485- " Operation requires CPU backend. Current backend: " + bt +
488+ " Operation requires CPU backend. Current backend: " +
489+ impl_->get_backend_type () +
486490 " . Use to_vector() for explicit conversion to CPU." );
487491 }
488492 }
@@ -492,10 +496,21 @@ class Buffer
492496 // / The implementation is the sole source of truth for the backend type.
493497 std::unique_ptr<BufferImplBase<T>> impl_;
494498
499+ // / Cached pointer for type-safe CPU backend detection.
500+ // / Set by set_impl() via dynamic_cast; null for non-CPU backends.
501+ CpuBufferImpl<T, Allocator> * cpu_impl_ = nullptr ;
502+
503+ // / Set the implementation and update the cached CPU pointer.
504+ void set_impl (std::unique_ptr<BufferImplBase<T>> impl)
505+ {
506+ impl_ = std::move (impl);
507+ cpu_impl_ = dynamic_cast <CpuBufferImpl<T, Allocator> *>(impl_.get ());
508+ }
509+
495510 // / Get CPU implementation (assumes throw_if_not_cpu_backend() was called)
496- CpuBufferImpl<T> * get_cpu_impl () const
511+ CpuBufferImpl<T, Allocator > * get_cpu_impl () const
497512 {
498- return static_cast <CpuBufferImpl<T> *>(impl_. get ()) ;
513+ return cpu_impl_ ;
499514 }
500515};
501516
0 commit comments