@@ -1564,45 +1564,45 @@ class DepthwiseConvFunctor<GPUContext, T, fuse_relu_before_conv> {
15641564 DenseTensor* output,
15651565 const DataLayout data_layout = DataLayout::NCHW ) {
15661566 const int64_t batch_size = input.dims ()[0 ];
1567- const int64_t input_channels64 =
1567+ const int64_t input_channels_64 =
15681568 (data_layout != DataLayout::NHWC ? input.dims ()[1 ] : input.dims ()[3 ]);
1569- const int64_t input_height64 =
1569+ const int64_t input_height_64 =
15701570 (data_layout != DataLayout::NHWC ? input.dims ()[2 ] : input.dims ()[1 ]);
1571- const int64_t input_width64 =
1571+ const int64_t input_width_64 =
15721572 (data_layout != DataLayout::NHWC ? input.dims ()[3 ] : input.dims ()[2 ]);
1573- const int64_t output_channels64 =
1573+ const int64_t output_channels_64 =
15741574 (data_layout != DataLayout::NHWC ? output->dims ()[1 ]
15751575 : output->dims ()[3 ]);
1576- const int64_t output_height64 =
1576+ const int64_t output_height_64 =
15771577 (data_layout != DataLayout::NHWC ? output->dims ()[2 ]
15781578 : output->dims ()[1 ]);
1579- const int64_t output_width64 =
1579+ const int64_t output_width_64 =
15801580 (data_layout != DataLayout::NHWC ? output->dims ()[3 ]
15811581 : output->dims ()[2 ]);
1582- const int64_t ksize_height64 = filter.dims ()[2 ];
1583- const int64_t ksize_width64 = filter.dims ()[3 ];
1582+ const int64_t ksize_height_64 = filter.dims ()[2 ];
1583+ const int64_t ksize_width_64 = filter.dims ()[3 ];
15841584
15851585 PADDLE_ENFORCE_LE_INT_MAX (batch_size, " depthwise conv batch size" );
1586- PADDLE_ENFORCE_LE_INT_MAX (input_channels64 ,
1586+ PADDLE_ENFORCE_LE_INT_MAX (input_channels_64 ,
15871587 " depthwise conv input channels" );
1588- PADDLE_ENFORCE_LE_INT_MAX (input_height64 , " depthwise conv input height" );
1589- PADDLE_ENFORCE_LE_INT_MAX (input_width64 , " depthwise conv input width" );
1590- PADDLE_ENFORCE_LE_INT_MAX (output_channels64 ,
1588+ PADDLE_ENFORCE_LE_INT_MAX (input_height_64 , " depthwise conv input height" );
1589+ PADDLE_ENFORCE_LE_INT_MAX (input_width_64 , " depthwise conv input width" );
1590+ PADDLE_ENFORCE_LE_INT_MAX (output_channels_64 ,
15911591 " depthwise conv output channels" );
1592- PADDLE_ENFORCE_LE_INT_MAX (output_height64 , " depthwise conv output height" );
1593- PADDLE_ENFORCE_LE_INT_MAX (output_width64 , " depthwise conv output width" );
1594- PADDLE_ENFORCE_LE_INT_MAX (ksize_height64 , " depthwise conv kernel height" );
1595- PADDLE_ENFORCE_LE_INT_MAX (ksize_width64 , " depthwise conv kernel width" );
1592+ PADDLE_ENFORCE_LE_INT_MAX (output_height_64 , " depthwise conv output height" );
1593+ PADDLE_ENFORCE_LE_INT_MAX (output_width_64 , " depthwise conv output width" );
1594+ PADDLE_ENFORCE_LE_INT_MAX (ksize_height_64 , " depthwise conv kernel height" );
1595+ PADDLE_ENFORCE_LE_INT_MAX (ksize_width_64 , " depthwise conv kernel width" );
15961596
15971597 const int batch_size_int = static_cast <int >(batch_size);
1598- const int input_channels = static_cast <int >(input_channels64 );
1599- const int input_height = static_cast <int >(input_height64 );
1600- const int input_width = static_cast <int >(input_width64 );
1601- const int output_channels = static_cast <int >(output_channels64 );
1602- const int output_height = static_cast <int >(output_height64 );
1603- const int output_width = static_cast <int >(output_width64 );
1604- const int ksize_height = static_cast <int >(ksize_height64 );
1605- const int ksize_width = static_cast <int >(ksize_width64 );
1598+ const int input_channels = static_cast <int >(input_channels_64 );
1599+ const int input_height = static_cast <int >(input_height_64 );
1600+ const int input_width = static_cast <int >(input_width_64 );
1601+ const int output_channels = static_cast <int >(output_channels_64 );
1602+ const int output_height = static_cast <int >(output_height_64 );
1603+ const int output_width = static_cast <int >(output_width_64 );
1604+ const int ksize_height = static_cast <int >(ksize_height_64 );
1605+ const int ksize_width = static_cast <int >(ksize_width_64 );
16061606
16071607 const int stride_height = strides[0 ];
16081608 const int stride_width = strides[1 ];
@@ -1644,12 +1644,12 @@ class DepthwiseConvFunctor<GPUContext, T, fuse_relu_before_conv> {
16441644 threads = dim3 (thread_x, blocks, 1 );
16451645 grid = dim3 (output_channels, batch_size_int, 1 );
16461646 } else {
1647- const int64_t block_y64 =
1647+ const int64_t block_y_64 =
16481648 ((static_cast <int64_t >(output_width) + dilate_width - 1 ) /
16491649 dilate_width) *
16501650 dilate_width;
16511651 blocks = static_cast <int >(
1652- std::min<int64_t >(std::max (thread / output_channels, 1 ), block_y64 ));
1652+ std::min<int64_t >(std::max (thread / output_channels, 1 ), block_y_64 ));
16531653 const int thread_x = std::min (output_channels, thread);
16541654 const int64_t grid_x =
16551655 (static_cast <int64_t >(output_height) + dilate_height - 1 ) /
@@ -1663,9 +1663,9 @@ class DepthwiseConvFunctor<GPUContext, T, fuse_relu_before_conv> {
16631663 int filter_multiplier = output_channels / input_channels;
16641664 int64_t nums_output = output->numel ();
16651665 int block_size = 512 ;
1666- int64_t grid_size64 = (nums_output + block_size - 1 ) / block_size;
1667- PADDLE_ENFORCE_LE_INT_MAX (grid_size64 , " grid_size" );
1668- int grid_size = static_cast <int >(grid_size64 );
1666+ int64_t grid_size_64 = (nums_output + block_size - 1 ) / block_size;
1667+ PADDLE_ENFORCE_LE_INT_MAX (grid_size_64 , " grid_size" );
1668+ int grid_size = static_cast <int >(grid_size_64 );
16691669
16701670#define check_case (c_filter_multiplier, c_stride, c_filter ) \
16711671 if (c_filter_multiplier == 0 || \
@@ -1765,52 +1765,52 @@ class DepthwiseConvInputGradFunctor<GPUContext, T, fuse_relu_before_conv> {
17651765 DenseTensor* input_grad,
17661766 const DataLayout data_layout = DataLayout::NCHW ) {
17671767 const int64_t batch_size = input.dims ()[0 ];
1768- const int64_t input_channels64 =
1768+ const int64_t input_channels_64 =
17691769 (data_layout != DataLayout::NHWC ? input.dims ()[1 ] : input.dims ()[3 ]);
1770- const int64_t input_height64 =
1770+ const int64_t input_height_64 =
17711771 (data_layout != DataLayout::NHWC ? input.dims ()[2 ] : input.dims ()[1 ]);
1772- const int64_t input_width64 =
1772+ const int64_t input_width_64 =
17731773 (data_layout != DataLayout::NHWC ? input.dims ()[3 ] : input.dims ()[2 ]);
1774- const int64_t output_channels64 =
1774+ const int64_t output_channels_64 =
17751775 (data_layout != DataLayout::NHWC ? output_grad.dims ()[1 ]
17761776 : output_grad.dims ()[3 ]);
1777- const int64_t output_height64 =
1777+ const int64_t output_height_64 =
17781778 (data_layout != DataLayout::NHWC ? output_grad.dims ()[2 ]
17791779 : output_grad.dims ()[1 ]);
1780- const int64_t output_width64 =
1780+ const int64_t output_width_64 =
17811781 (data_layout != DataLayout::NHWC ? output_grad.dims ()[3 ]
17821782 : output_grad.dims ()[2 ]);
1783- const int64_t ksize_height64 = filter.dims ()[2 ];
1784- const int64_t ksize_width64 = filter.dims ()[3 ];
1783+ const int64_t ksize_height_64 = filter.dims ()[2 ];
1784+ const int64_t ksize_width_64 = filter.dims ()[3 ];
17851785
17861786 PADDLE_ENFORCE_LE_INT_MAX (batch_size,
17871787 " depthwise conv input grad batch size" );
1788- PADDLE_ENFORCE_LE_INT_MAX (input_channels64 ,
1788+ PADDLE_ENFORCE_LE_INT_MAX (input_channels_64 ,
17891789 " depthwise conv input grad input channels" );
1790- PADDLE_ENFORCE_LE_INT_MAX (input_height64 ,
1790+ PADDLE_ENFORCE_LE_INT_MAX (input_height_64 ,
17911791 " depthwise conv input grad input height" );
1792- PADDLE_ENFORCE_LE_INT_MAX (input_width64 ,
1792+ PADDLE_ENFORCE_LE_INT_MAX (input_width_64 ,
17931793 " depthwise conv input grad input width" );
1794- PADDLE_ENFORCE_LE_INT_MAX (output_channels64 ,
1794+ PADDLE_ENFORCE_LE_INT_MAX (output_channels_64 ,
17951795 " depthwise conv input grad output channels" );
1796- PADDLE_ENFORCE_LE_INT_MAX (output_height64 ,
1796+ PADDLE_ENFORCE_LE_INT_MAX (output_height_64 ,
17971797 " depthwise conv input grad output height" );
1798- PADDLE_ENFORCE_LE_INT_MAX (output_width64 ,
1798+ PADDLE_ENFORCE_LE_INT_MAX (output_width_64 ,
17991799 " depthwise conv input grad output width" );
1800- PADDLE_ENFORCE_LE_INT_MAX (ksize_height64 ,
1800+ PADDLE_ENFORCE_LE_INT_MAX (ksize_height_64 ,
18011801 " depthwise conv input grad kernel height" );
1802- PADDLE_ENFORCE_LE_INT_MAX (ksize_width64 ,
1802+ PADDLE_ENFORCE_LE_INT_MAX (ksize_width_64 ,
18031803 " depthwise conv input grad kernel width" );
18041804
18051805 const int batch_size_int = static_cast <int >(batch_size);
1806- const int input_channels = static_cast <int >(input_channels64 );
1807- const int input_height = static_cast <int >(input_height64 );
1808- const int input_width = static_cast <int >(input_width64 );
1809- const int output_channels = static_cast <int >(output_channels64 );
1810- const int output_height = static_cast <int >(output_height64 );
1811- const int output_width = static_cast <int >(output_width64 );
1812- const int ksize_height = static_cast <int >(ksize_height64 );
1813- const int ksize_width = static_cast <int >(ksize_width64 );
1806+ const int input_channels = static_cast <int >(input_channels_64 );
1807+ const int input_height = static_cast <int >(input_height_64 );
1808+ const int input_width = static_cast <int >(input_width_64 );
1809+ const int output_channels = static_cast <int >(output_channels_64 );
1810+ const int output_height = static_cast <int >(output_height_64 );
1811+ const int output_width = static_cast <int >(output_width_64 );
1812+ const int ksize_height = static_cast <int >(ksize_height_64 );
1813+ const int ksize_width = static_cast <int >(ksize_width_64 );
18141814
18151815 const int stride_height = strides[0 ];
18161816 const int stride_width = strides[1 ];
@@ -1854,12 +1854,12 @@ class DepthwiseConvInputGradFunctor<GPUContext, T, fuse_relu_before_conv> {
18541854 threads = dim3 (thread_x, blocks, 1 );
18551855 grid = dim3 (input_channels, batch_size_int, 1 );
18561856 } else {
1857- const int64_t block_y64 =
1857+ const int64_t block_y_64 =
18581858 ((static_cast <int64_t >(input_width) + dilate_width - 1 ) /
18591859 dilate_width) *
18601860 dilate_width;
18611861 blocks = static_cast <int >(
1862- std::min<int64_t >(std::max (thread / input_channels, 1 ), block_y64 ));
1862+ std::min<int64_t >(std::max (thread / input_channels, 1 ), block_y_64 ));
18631863 const int thread_x = std::min (input_channels, thread);
18641864 const int64_t grid_x =
18651865 (static_cast <int64_t >(input_height) + dilate_height - 1 ) /
@@ -1874,9 +1874,9 @@ class DepthwiseConvInputGradFunctor<GPUContext, T, fuse_relu_before_conv> {
18741874 int filter_multiplier = output_channels / input_channels;
18751875 int64_t nums_input = input_grad->numel ();
18761876 int block_size = 512 ;
1877- int64_t grid_size64 = (nums_input + block_size - 1 ) / block_size;
1878- PADDLE_ENFORCE_LE_INT_MAX (grid_size64 , " grid_size" );
1879- int grid_size = static_cast <int >(grid_size64 );
1877+ int64_t grid_size_64 = (nums_input + block_size - 1 ) / block_size;
1878+ PADDLE_ENFORCE_LE_INT_MAX (grid_size_64 , " grid_size" );
1879+ int grid_size = static_cast <int >(grid_size_64 );
18801880
18811881#define check_case (c_filter_multiplier, c_stride, c_filter ) \
18821882 if (c_filter_multiplier == 0 || \
@@ -1977,52 +1977,52 @@ class DepthwiseConvFilterGradFunctor<GPUContext, T, fuse_relu_before_conv> {
19771977 DenseTensor* filter_grad,
19781978 const DataLayout data_layout = DataLayout::NCHW ) {
19791979 const int64_t batch_size = input.dims ()[0 ];
1980- const int64_t input_channels64 =
1980+ const int64_t input_channels_64 =
19811981 (data_layout != DataLayout::NHWC ? input.dims ()[1 ] : input.dims ()[3 ]);
1982- const int64_t input_height64 =
1982+ const int64_t input_height_64 =
19831983 (data_layout != DataLayout::NHWC ? input.dims ()[2 ] : input.dims ()[1 ]);
1984- const int64_t input_width64 =
1984+ const int64_t input_width_64 =
19851985 (data_layout != DataLayout::NHWC ? input.dims ()[3 ] : input.dims ()[2 ]);
1986- const int64_t output_channels64 =
1986+ const int64_t output_channels_64 =
19871987 (data_layout != DataLayout::NHWC ? output_grad.dims ()[1 ]
19881988 : output_grad.dims ()[3 ]);
1989- const int64_t output_height64 =
1989+ const int64_t output_height_64 =
19901990 (data_layout != DataLayout::NHWC ? output_grad.dims ()[2 ]
19911991 : output_grad.dims ()[1 ]);
1992- const int64_t output_width64 =
1992+ const int64_t output_width_64 =
19931993 (data_layout != DataLayout::NHWC ? output_grad.dims ()[3 ]
19941994 : output_grad.dims ()[2 ]);
1995- const int64_t ksize_height64 = filter_grad->dims ()[2 ];
1996- const int64_t ksize_width64 = filter_grad->dims ()[3 ];
1995+ const int64_t ksize_height_64 = filter_grad->dims ()[2 ];
1996+ const int64_t ksize_width_64 = filter_grad->dims ()[3 ];
19971997
19981998 PADDLE_ENFORCE_LE_INT_MAX (batch_size,
19991999 " depthwise conv filter grad batch size" );
2000- PADDLE_ENFORCE_LE_INT_MAX (input_channels64 ,
2000+ PADDLE_ENFORCE_LE_INT_MAX (input_channels_64 ,
20012001 " depthwise conv filter grad input channels" );
2002- PADDLE_ENFORCE_LE_INT_MAX (input_height64 ,
2002+ PADDLE_ENFORCE_LE_INT_MAX (input_height_64 ,
20032003 " depthwise conv filter grad input height" );
2004- PADDLE_ENFORCE_LE_INT_MAX (input_width64 ,
2004+ PADDLE_ENFORCE_LE_INT_MAX (input_width_64 ,
20052005 " depthwise conv filter grad input width" );
2006- PADDLE_ENFORCE_LE_INT_MAX (output_channels64 ,
2006+ PADDLE_ENFORCE_LE_INT_MAX (output_channels_64 ,
20072007 " depthwise conv filter grad output channels" );
2008- PADDLE_ENFORCE_LE_INT_MAX (output_height64 ,
2008+ PADDLE_ENFORCE_LE_INT_MAX (output_height_64 ,
20092009 " depthwise conv filter grad output height" );
2010- PADDLE_ENFORCE_LE_INT_MAX (output_width64 ,
2010+ PADDLE_ENFORCE_LE_INT_MAX (output_width_64 ,
20112011 " depthwise conv filter grad output width" );
2012- PADDLE_ENFORCE_LE_INT_MAX (ksize_height64 ,
2012+ PADDLE_ENFORCE_LE_INT_MAX (ksize_height_64 ,
20132013 " depthwise conv filter grad kernel height" );
2014- PADDLE_ENFORCE_LE_INT_MAX (ksize_width64 ,
2014+ PADDLE_ENFORCE_LE_INT_MAX (ksize_width_64 ,
20152015 " depthwise conv filter grad kernel width" );
20162016
20172017 const int batch_size_int = static_cast <int >(batch_size);
2018- const int input_channels = static_cast <int >(input_channels64 );
2019- const int input_height = static_cast <int >(input_height64 );
2020- const int input_width = static_cast <int >(input_width64 );
2021- const int output_channels = static_cast <int >(output_channels64 );
2022- const int output_height = static_cast <int >(output_height64 );
2023- const int output_width = static_cast <int >(output_width64 );
2024- const int ksize_height = static_cast <int >(ksize_height64 );
2025- const int ksize_width = static_cast <int >(ksize_width64 );
2018+ const int input_channels = static_cast <int >(input_channels_64 );
2019+ const int input_height = static_cast <int >(input_height_64 );
2020+ const int input_width = static_cast <int >(input_width_64 );
2021+ const int output_channels = static_cast <int >(output_channels_64 );
2022+ const int output_height = static_cast <int >(output_height_64 );
2023+ const int output_width = static_cast <int >(output_width_64 );
2024+ const int ksize_height = static_cast <int >(ksize_height_64 );
2025+ const int ksize_width = static_cast <int >(ksize_width_64 );
20262026
20272027 const int stride_height = strides[0 ];
20282028 const int stride_width = strides[1 ];
@@ -2051,20 +2051,20 @@ class DepthwiseConvFilterGradFunctor<GPUContext, T, fuse_relu_before_conv> {
20512051 const int64_t output_hw =
20522052 static_cast <int64_t >(output_height) * output_width;
20532053 if (output_hw < WARP_SIZE ) {
2054- const int64_t block_x64 = batch_size * output_hw;
2054+ const int64_t block_x_64 = batch_size * output_hw;
20552055 const int block_x =
2056- static_cast <int >(std::min<int64_t >(block_size, block_x64 ));
2056+ static_cast <int >(std::min<int64_t >(block_size, block_x_64 ));
20572057 threads = dim3 (block_x);
20582058 }
20592059 } else {
20602060 // Large block size may cause atomic dependence, reduce block size here.
20612061 block_size = 256 ;
2062- const int64_t block_y64 =
2062+ const int64_t block_y_64 =
20632063 ((static_cast <int64_t >(output_width) + dilate_width - 1 ) /
20642064 dilate_width) *
20652065 dilate_width;
20662066 blocks = static_cast <int >(std::min<int64_t >(
2067- std::max (block_size / output_channels, 1 ), block_y64 ));
2067+ std::max (block_size / output_channels, 1 ), block_y_64 ));
20682068 const int thread_x = std::min (output_channels, block_size);
20692069 const int64_t grid_x =
20702070 (static_cast <int64_t >(output_height) + dilate_height - 1 ) /
@@ -2078,10 +2078,11 @@ class DepthwiseConvFilterGradFunctor<GPUContext, T, fuse_relu_before_conv> {
20782078 if (output_channels < SMALL_THRESHOLD ) {
20792079 const int64_t hwc_size =
20802080 static_cast <int64_t >(ksize_height) * ksize_width * output_channels;
2081- const int64_t blocks64 =
2081+ const int64_t blocks_64 =
20822082 (hwc_size + static_cast <int64_t >(block_size) - 1 ) / block_size;
2083- PADDLE_ENFORCE_LE_INT_MAX (blocks64, " CUDA launch grid filter_hwc_size" );
2084- grid = dim3 (static_cast <int >(blocks64), batch_size, 1 );
2083+ PADDLE_ENFORCE_LE_INT_MAX (blocks_64,
2084+ " CUDA launch grid filter_hwc_size" );
2085+ grid = dim3 (static_cast <int >(blocks_64), batch_size, 1 );
20852086 threads = dim3 (static_cast <int >(
20862087 std::min (static_cast <int64_t >(block_size), hwc_size)));
20872088 }
0 commit comments