Skip to content

Commit ba52d1a

Browse files
committed
refine int64 helper variable names
1 parent 3c2cd9c commit ba52d1a

1 file changed

Lines changed: 90 additions & 89 deletions

File tree

paddle/phi/kernels/gpu/depthwise_conv.h

Lines changed: 90 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,45 +1564,45 @@ class DepthwiseConvFunctor<GPUContext, T, fuse_relu_before_conv> {
15641564
DenseTensor* output,
15651565
const DataLayout data_layout = DataLayout::NCHW) {
15661566
const int64_t batch_size = input.dims()[0];
1567-
const int64_t input_channels64 =
1567+
const int64_t input_channels_64 =
15681568
(data_layout != DataLayout::NHWC ? input.dims()[1] : input.dims()[3]);
1569-
const int64_t input_height64 =
1569+
const int64_t input_height_64 =
15701570
(data_layout != DataLayout::NHWC ? input.dims()[2] : input.dims()[1]);
1571-
const int64_t input_width64 =
1571+
const int64_t input_width_64 =
15721572
(data_layout != DataLayout::NHWC ? input.dims()[3] : input.dims()[2]);
1573-
const int64_t output_channels64 =
1573+
const int64_t output_channels_64 =
15741574
(data_layout != DataLayout::NHWC ? output->dims()[1]
15751575
: output->dims()[3]);
1576-
const int64_t output_height64 =
1576+
const int64_t output_height_64 =
15771577
(data_layout != DataLayout::NHWC ? output->dims()[2]
15781578
: output->dims()[1]);
1579-
const int64_t output_width64 =
1579+
const int64_t output_width_64 =
15801580
(data_layout != DataLayout::NHWC ? output->dims()[3]
15811581
: output->dims()[2]);
1582-
const int64_t ksize_height64 = filter.dims()[2];
1583-
const int64_t ksize_width64 = filter.dims()[3];
1582+
const int64_t ksize_height_64 = filter.dims()[2];
1583+
const int64_t ksize_width_64 = filter.dims()[3];
15841584

15851585
PADDLE_ENFORCE_LE_INT_MAX(batch_size, "depthwise conv batch size");
1586-
PADDLE_ENFORCE_LE_INT_MAX(input_channels64,
1586+
PADDLE_ENFORCE_LE_INT_MAX(input_channels_64,
15871587
"depthwise conv input channels");
1588-
PADDLE_ENFORCE_LE_INT_MAX(input_height64, "depthwise conv input height");
1589-
PADDLE_ENFORCE_LE_INT_MAX(input_width64, "depthwise conv input width");
1590-
PADDLE_ENFORCE_LE_INT_MAX(output_channels64,
1588+
PADDLE_ENFORCE_LE_INT_MAX(input_height_64, "depthwise conv input height");
1589+
PADDLE_ENFORCE_LE_INT_MAX(input_width_64, "depthwise conv input width");
1590+
PADDLE_ENFORCE_LE_INT_MAX(output_channels_64,
15911591
"depthwise conv output channels");
1592-
PADDLE_ENFORCE_LE_INT_MAX(output_height64, "depthwise conv output height");
1593-
PADDLE_ENFORCE_LE_INT_MAX(output_width64, "depthwise conv output width");
1594-
PADDLE_ENFORCE_LE_INT_MAX(ksize_height64, "depthwise conv kernel height");
1595-
PADDLE_ENFORCE_LE_INT_MAX(ksize_width64, "depthwise conv kernel width");
1592+
PADDLE_ENFORCE_LE_INT_MAX(output_height_64, "depthwise conv output height");
1593+
PADDLE_ENFORCE_LE_INT_MAX(output_width_64, "depthwise conv output width");
1594+
PADDLE_ENFORCE_LE_INT_MAX(ksize_height_64, "depthwise conv kernel height");
1595+
PADDLE_ENFORCE_LE_INT_MAX(ksize_width_64, "depthwise conv kernel width");
15961596

15971597
const int batch_size_int = static_cast<int>(batch_size);
1598-
const int input_channels = static_cast<int>(input_channels64);
1599-
const int input_height = static_cast<int>(input_height64);
1600-
const int input_width = static_cast<int>(input_width64);
1601-
const int output_channels = static_cast<int>(output_channels64);
1602-
const int output_height = static_cast<int>(output_height64);
1603-
const int output_width = static_cast<int>(output_width64);
1604-
const int ksize_height = static_cast<int>(ksize_height64);
1605-
const int ksize_width = static_cast<int>(ksize_width64);
1598+
const int input_channels = static_cast<int>(input_channels_64);
1599+
const int input_height = static_cast<int>(input_height_64);
1600+
const int input_width = static_cast<int>(input_width_64);
1601+
const int output_channels = static_cast<int>(output_channels_64);
1602+
const int output_height = static_cast<int>(output_height_64);
1603+
const int output_width = static_cast<int>(output_width_64);
1604+
const int ksize_height = static_cast<int>(ksize_height_64);
1605+
const int ksize_width = static_cast<int>(ksize_width_64);
16061606

16071607
const int stride_height = strides[0];
16081608
const int stride_width = strides[1];
@@ -1644,12 +1644,12 @@ class DepthwiseConvFunctor<GPUContext, T, fuse_relu_before_conv> {
16441644
threads = dim3(thread_x, blocks, 1);
16451645
grid = dim3(output_channels, batch_size_int, 1);
16461646
} else {
1647-
const int64_t block_y64 =
1647+
const int64_t block_y_64 =
16481648
((static_cast<int64_t>(output_width) + dilate_width - 1) /
16491649
dilate_width) *
16501650
dilate_width;
16511651
blocks = static_cast<int>(
1652-
std::min<int64_t>(std::max(thread / output_channels, 1), block_y64));
1652+
std::min<int64_t>(std::max(thread / output_channels, 1), block_y_64));
16531653
const int thread_x = std::min(output_channels, thread);
16541654
const int64_t grid_x =
16551655
(static_cast<int64_t>(output_height) + dilate_height - 1) /
@@ -1663,9 +1663,9 @@ class DepthwiseConvFunctor<GPUContext, T, fuse_relu_before_conv> {
16631663
int filter_multiplier = output_channels / input_channels;
16641664
int64_t nums_output = output->numel();
16651665
int block_size = 512;
1666-
int64_t grid_size64 = (nums_output + block_size - 1) / block_size;
1667-
PADDLE_ENFORCE_LE_INT_MAX(grid_size64, "grid_size");
1668-
int grid_size = static_cast<int>(grid_size64);
1666+
int64_t grid_size_64 = (nums_output + block_size - 1) / block_size;
1667+
PADDLE_ENFORCE_LE_INT_MAX(grid_size_64, "grid_size");
1668+
int grid_size = static_cast<int>(grid_size_64);
16691669

16701670
#define check_case(c_filter_multiplier, c_stride, c_filter) \
16711671
if (c_filter_multiplier == 0 || \
@@ -1765,52 +1765,52 @@ class DepthwiseConvInputGradFunctor<GPUContext, T, fuse_relu_before_conv> {
17651765
DenseTensor* input_grad,
17661766
const DataLayout data_layout = DataLayout::NCHW) {
17671767
const int64_t batch_size = input.dims()[0];
1768-
const int64_t input_channels64 =
1768+
const int64_t input_channels_64 =
17691769
(data_layout != DataLayout::NHWC ? input.dims()[1] : input.dims()[3]);
1770-
const int64_t input_height64 =
1770+
const int64_t input_height_64 =
17711771
(data_layout != DataLayout::NHWC ? input.dims()[2] : input.dims()[1]);
1772-
const int64_t input_width64 =
1772+
const int64_t input_width_64 =
17731773
(data_layout != DataLayout::NHWC ? input.dims()[3] : input.dims()[2]);
1774-
const int64_t output_channels64 =
1774+
const int64_t output_channels_64 =
17751775
(data_layout != DataLayout::NHWC ? output_grad.dims()[1]
17761776
: output_grad.dims()[3]);
1777-
const int64_t output_height64 =
1777+
const int64_t output_height_64 =
17781778
(data_layout != DataLayout::NHWC ? output_grad.dims()[2]
17791779
: output_grad.dims()[1]);
1780-
const int64_t output_width64 =
1780+
const int64_t output_width_64 =
17811781
(data_layout != DataLayout::NHWC ? output_grad.dims()[3]
17821782
: output_grad.dims()[2]);
1783-
const int64_t ksize_height64 = filter.dims()[2];
1784-
const int64_t ksize_width64 = filter.dims()[3];
1783+
const int64_t ksize_height_64 = filter.dims()[2];
1784+
const int64_t ksize_width_64 = filter.dims()[3];
17851785

17861786
PADDLE_ENFORCE_LE_INT_MAX(batch_size,
17871787
"depthwise conv input grad batch size");
1788-
PADDLE_ENFORCE_LE_INT_MAX(input_channels64,
1788+
PADDLE_ENFORCE_LE_INT_MAX(input_channels_64,
17891789
"depthwise conv input grad input channels");
1790-
PADDLE_ENFORCE_LE_INT_MAX(input_height64,
1790+
PADDLE_ENFORCE_LE_INT_MAX(input_height_64,
17911791
"depthwise conv input grad input height");
1792-
PADDLE_ENFORCE_LE_INT_MAX(input_width64,
1792+
PADDLE_ENFORCE_LE_INT_MAX(input_width_64,
17931793
"depthwise conv input grad input width");
1794-
PADDLE_ENFORCE_LE_INT_MAX(output_channels64,
1794+
PADDLE_ENFORCE_LE_INT_MAX(output_channels_64,
17951795
"depthwise conv input grad output channels");
1796-
PADDLE_ENFORCE_LE_INT_MAX(output_height64,
1796+
PADDLE_ENFORCE_LE_INT_MAX(output_height_64,
17971797
"depthwise conv input grad output height");
1798-
PADDLE_ENFORCE_LE_INT_MAX(output_width64,
1798+
PADDLE_ENFORCE_LE_INT_MAX(output_width_64,
17991799
"depthwise conv input grad output width");
1800-
PADDLE_ENFORCE_LE_INT_MAX(ksize_height64,
1800+
PADDLE_ENFORCE_LE_INT_MAX(ksize_height_64,
18011801
"depthwise conv input grad kernel height");
1802-
PADDLE_ENFORCE_LE_INT_MAX(ksize_width64,
1802+
PADDLE_ENFORCE_LE_INT_MAX(ksize_width_64,
18031803
"depthwise conv input grad kernel width");
18041804

18051805
const int batch_size_int = static_cast<int>(batch_size);
1806-
const int input_channels = static_cast<int>(input_channels64);
1807-
const int input_height = static_cast<int>(input_height64);
1808-
const int input_width = static_cast<int>(input_width64);
1809-
const int output_channels = static_cast<int>(output_channels64);
1810-
const int output_height = static_cast<int>(output_height64);
1811-
const int output_width = static_cast<int>(output_width64);
1812-
const int ksize_height = static_cast<int>(ksize_height64);
1813-
const int ksize_width = static_cast<int>(ksize_width64);
1806+
const int input_channels = static_cast<int>(input_channels_64);
1807+
const int input_height = static_cast<int>(input_height_64);
1808+
const int input_width = static_cast<int>(input_width_64);
1809+
const int output_channels = static_cast<int>(output_channels_64);
1810+
const int output_height = static_cast<int>(output_height_64);
1811+
const int output_width = static_cast<int>(output_width_64);
1812+
const int ksize_height = static_cast<int>(ksize_height_64);
1813+
const int ksize_width = static_cast<int>(ksize_width_64);
18141814

18151815
const int stride_height = strides[0];
18161816
const int stride_width = strides[1];
@@ -1854,12 +1854,12 @@ class DepthwiseConvInputGradFunctor<GPUContext, T, fuse_relu_before_conv> {
18541854
threads = dim3(thread_x, blocks, 1);
18551855
grid = dim3(input_channels, batch_size_int, 1);
18561856
} else {
1857-
const int64_t block_y64 =
1857+
const int64_t block_y_64 =
18581858
((static_cast<int64_t>(input_width) + dilate_width - 1) /
18591859
dilate_width) *
18601860
dilate_width;
18611861
blocks = static_cast<int>(
1862-
std::min<int64_t>(std::max(thread / input_channels, 1), block_y64));
1862+
std::min<int64_t>(std::max(thread / input_channels, 1), block_y_64));
18631863
const int thread_x = std::min(input_channels, thread);
18641864
const int64_t grid_x =
18651865
(static_cast<int64_t>(input_height) + dilate_height - 1) /
@@ -1874,9 +1874,9 @@ class DepthwiseConvInputGradFunctor<GPUContext, T, fuse_relu_before_conv> {
18741874
int filter_multiplier = output_channels / input_channels;
18751875
int64_t nums_input = input_grad->numel();
18761876
int block_size = 512;
1877-
int64_t grid_size64 = (nums_input + block_size - 1) / block_size;
1878-
PADDLE_ENFORCE_LE_INT_MAX(grid_size64, "grid_size");
1879-
int grid_size = static_cast<int>(grid_size64);
1877+
int64_t grid_size_64 = (nums_input + block_size - 1) / block_size;
1878+
PADDLE_ENFORCE_LE_INT_MAX(grid_size_64, "grid_size");
1879+
int grid_size = static_cast<int>(grid_size_64);
18801880

18811881
#define check_case(c_filter_multiplier, c_stride, c_filter) \
18821882
if (c_filter_multiplier == 0 || \
@@ -1977,52 +1977,52 @@ class DepthwiseConvFilterGradFunctor<GPUContext, T, fuse_relu_before_conv> {
19771977
DenseTensor* filter_grad,
19781978
const DataLayout data_layout = DataLayout::NCHW) {
19791979
const int64_t batch_size = input.dims()[0];
1980-
const int64_t input_channels64 =
1980+
const int64_t input_channels_64 =
19811981
(data_layout != DataLayout::NHWC ? input.dims()[1] : input.dims()[3]);
1982-
const int64_t input_height64 =
1982+
const int64_t input_height_64 =
19831983
(data_layout != DataLayout::NHWC ? input.dims()[2] : input.dims()[1]);
1984-
const int64_t input_width64 =
1984+
const int64_t input_width_64 =
19851985
(data_layout != DataLayout::NHWC ? input.dims()[3] : input.dims()[2]);
1986-
const int64_t output_channels64 =
1986+
const int64_t output_channels_64 =
19871987
(data_layout != DataLayout::NHWC ? output_grad.dims()[1]
19881988
: output_grad.dims()[3]);
1989-
const int64_t output_height64 =
1989+
const int64_t output_height_64 =
19901990
(data_layout != DataLayout::NHWC ? output_grad.dims()[2]
19911991
: output_grad.dims()[1]);
1992-
const int64_t output_width64 =
1992+
const int64_t output_width_64 =
19931993
(data_layout != DataLayout::NHWC ? output_grad.dims()[3]
19941994
: output_grad.dims()[2]);
1995-
const int64_t ksize_height64 = filter_grad->dims()[2];
1996-
const int64_t ksize_width64 = filter_grad->dims()[3];
1995+
const int64_t ksize_height_64 = filter_grad->dims()[2];
1996+
const int64_t ksize_width_64 = filter_grad->dims()[3];
19971997

19981998
PADDLE_ENFORCE_LE_INT_MAX(batch_size,
19991999
"depthwise conv filter grad batch size");
2000-
PADDLE_ENFORCE_LE_INT_MAX(input_channels64,
2000+
PADDLE_ENFORCE_LE_INT_MAX(input_channels_64,
20012001
"depthwise conv filter grad input channels");
2002-
PADDLE_ENFORCE_LE_INT_MAX(input_height64,
2002+
PADDLE_ENFORCE_LE_INT_MAX(input_height_64,
20032003
"depthwise conv filter grad input height");
2004-
PADDLE_ENFORCE_LE_INT_MAX(input_width64,
2004+
PADDLE_ENFORCE_LE_INT_MAX(input_width_64,
20052005
"depthwise conv filter grad input width");
2006-
PADDLE_ENFORCE_LE_INT_MAX(output_channels64,
2006+
PADDLE_ENFORCE_LE_INT_MAX(output_channels_64,
20072007
"depthwise conv filter grad output channels");
2008-
PADDLE_ENFORCE_LE_INT_MAX(output_height64,
2008+
PADDLE_ENFORCE_LE_INT_MAX(output_height_64,
20092009
"depthwise conv filter grad output height");
2010-
PADDLE_ENFORCE_LE_INT_MAX(output_width64,
2010+
PADDLE_ENFORCE_LE_INT_MAX(output_width_64,
20112011
"depthwise conv filter grad output width");
2012-
PADDLE_ENFORCE_LE_INT_MAX(ksize_height64,
2012+
PADDLE_ENFORCE_LE_INT_MAX(ksize_height_64,
20132013
"depthwise conv filter grad kernel height");
2014-
PADDLE_ENFORCE_LE_INT_MAX(ksize_width64,
2014+
PADDLE_ENFORCE_LE_INT_MAX(ksize_width_64,
20152015
"depthwise conv filter grad kernel width");
20162016

20172017
const int batch_size_int = static_cast<int>(batch_size);
2018-
const int input_channels = static_cast<int>(input_channels64);
2019-
const int input_height = static_cast<int>(input_height64);
2020-
const int input_width = static_cast<int>(input_width64);
2021-
const int output_channels = static_cast<int>(output_channels64);
2022-
const int output_height = static_cast<int>(output_height64);
2023-
const int output_width = static_cast<int>(output_width64);
2024-
const int ksize_height = static_cast<int>(ksize_height64);
2025-
const int ksize_width = static_cast<int>(ksize_width64);
2018+
const int input_channels = static_cast<int>(input_channels_64);
2019+
const int input_height = static_cast<int>(input_height_64);
2020+
const int input_width = static_cast<int>(input_width_64);
2021+
const int output_channels = static_cast<int>(output_channels_64);
2022+
const int output_height = static_cast<int>(output_height_64);
2023+
const int output_width = static_cast<int>(output_width_64);
2024+
const int ksize_height = static_cast<int>(ksize_height_64);
2025+
const int ksize_width = static_cast<int>(ksize_width_64);
20262026

20272027
const int stride_height = strides[0];
20282028
const int stride_width = strides[1];
@@ -2051,20 +2051,20 @@ class DepthwiseConvFilterGradFunctor<GPUContext, T, fuse_relu_before_conv> {
20512051
const int64_t output_hw =
20522052
static_cast<int64_t>(output_height) * output_width;
20532053
if (output_hw < WARP_SIZE) {
2054-
const int64_t block_x64 = batch_size * output_hw;
2054+
const int64_t block_x_64 = batch_size * output_hw;
20552055
const int block_x =
2056-
static_cast<int>(std::min<int64_t>(block_size, block_x64));
2056+
static_cast<int>(std::min<int64_t>(block_size, block_x_64));
20572057
threads = dim3(block_x);
20582058
}
20592059
} else {
20602060
// Large block size may cause atomic dependence, reduce block size here.
20612061
block_size = 256;
2062-
const int64_t block_y64 =
2062+
const int64_t block_y_64 =
20632063
((static_cast<int64_t>(output_width) + dilate_width - 1) /
20642064
dilate_width) *
20652065
dilate_width;
20662066
blocks = static_cast<int>(std::min<int64_t>(
2067-
std::max(block_size / output_channels, 1), block_y64));
2067+
std::max(block_size / output_channels, 1), block_y_64));
20682068
const int thread_x = std::min(output_channels, block_size);
20692069
const int64_t grid_x =
20702070
(static_cast<int64_t>(output_height) + dilate_height - 1) /
@@ -2078,10 +2078,11 @@ class DepthwiseConvFilterGradFunctor<GPUContext, T, fuse_relu_before_conv> {
20782078
if (output_channels < SMALL_THRESHOLD) {
20792079
const int64_t hwc_size =
20802080
static_cast<int64_t>(ksize_height) * ksize_width * output_channels;
2081-
const int64_t blocks64 =
2081+
const int64_t blocks_64 =
20822082
(hwc_size + static_cast<int64_t>(block_size) - 1) / block_size;
2083-
PADDLE_ENFORCE_LE_INT_MAX(blocks64, "CUDA launch grid filter_hwc_size");
2084-
grid = dim3(static_cast<int>(blocks64), batch_size, 1);
2083+
PADDLE_ENFORCE_LE_INT_MAX(blocks_64,
2084+
"CUDA launch grid filter_hwc_size");
2085+
grid = dim3(static_cast<int>(blocks_64), batch_size, 1);
20852086
threads = dim3(static_cast<int>(
20862087
std::min(static_cast<int64_t>(block_size), hwc_size)));
20872088
}

0 commit comments

Comments
 (0)