@@ -162,11 +162,11 @@ __global__ void FusedActBias(Functor act,
162162 idx < elem_cnt;
163163 idx += step) {
164164 const int32_t col_idx = idx % cols;
165- phi:: Load<InType, VecSize>(&src[idx], &src_vec);
166- phi:: Load<float , VecSize>(&dequant_out_scale_data[col_idx],
167- &dequant_out_scale_vec);
165+ Load<InType, VecSize>(&src[idx], &src_vec);
166+ Load<float , VecSize>(&dequant_out_scale_data[col_idx],
167+ &dequant_out_scale_vec);
168168 if (bias) {
169- phi:: Load<T, VecSize>(&bias[col_idx], &bias_vec);
169+ Load<T, VecSize>(&bias[col_idx], &bias_vec);
170170 }
171171#pragma unroll
172172 for (int32_t unroll_idx = 0 ; unroll_idx < VecSize; unroll_idx++) {
@@ -194,7 +194,7 @@ __global__ void FusedActBias(Functor act,
194194 }
195195 }
196196 }
197- phi:: Store<OutType, VecSize>(out_vec, &dst[idx]);
197+ Store<OutType, VecSize>(out_vec, &dst[idx]);
198198 }
199199}
200200
@@ -322,17 +322,17 @@ __global__ void FusedDropoutActGrad(Functor act_grad,
322322 LoadT src_vec;
323323 MaskLoadT mask_vec;
324324
325- phi:: Load<T, VecSize>(&dout[i], &dout_vec);
326- phi:: Load<MaskType, VecSize>(&mask[i], &mask_vec);
327- phi:: Load<T, VecSize>(&src[i], &src_vec);
325+ Load<T, VecSize>(&dout[i], &dout_vec);
326+ Load<MaskType, VecSize>(&mask[i], &mask_vec);
327+ Load<T, VecSize>(&src[i], &src_vec);
328328
329329 StoreT dx_vec;
330330#pragma unroll
331331 for (int ii = 0 ; ii < VecSize; ii++) {
332332 T tmp = dout_vec[ii] * static_cast <T>(mask_vec[ii]) * factor;
333333 dx_vec[ii] = tmp * act_grad.UseOut (src_vec[ii]);
334334 }
335- phi:: Store<T, VecSize>(dx_vec, &dx[i]);
335+ Store<T, VecSize>(dx_vec, &dx[i]);
336336 }
337337}
338338
@@ -376,10 +376,10 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad(
376376 LoadT bias_vec;
377377 MaskLoadT mask_vec;
378378
379- phi:: Load<T, VecSize>(&dout[index], &dout_vec);
380- phi:: Load<T, VecSize>(&src[index], &src_vec);
381- phi:: Load<MaskType, VecSize>(&mask[index], &mask_vec);
382- phi:: Load<T, VecSize>(&bias[col_id * VecSize], &bias_vec);
379+ Load<T, VecSize>(&dout[index], &dout_vec);
380+ Load<T, VecSize>(&src[index], &src_vec);
381+ Load<MaskType, VecSize>(&mask[index], &mask_vec);
382+ Load<T, VecSize>(&bias[col_id * VecSize], &bias_vec);
383383
384384 StoreT dx_vec;
385385#pragma unroll
@@ -390,7 +390,7 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void FusedDropoutActBiasGrad(
390390 dx_vec[i] = val;
391391 tmp_sum[i] += val;
392392 }
393- phi:: Store<T, VecSize>(dx_vec, &dx[index]);
393+ Store<T, VecSize>(dx_vec, &dx[index]);
394394 }
395395 }
396396
0 commit comments