1818#transform_map5_tid = #rock.transform_map <affine_map <(d0 ) -> (0 , d0 )> by [<Merge {4 , 4 } [" tid" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [16 ] -> [4 , 4 ]>
1919#transform_map5_iter = #rock.transform_map <affine_map <(d0 ) -> (d0 , 0 )> by [<Merge {4 , 1 } [" iter" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [4 ] -> [4 , 1 ]>
2020
21- func.func @test_dpp_cluster4 (%input : memref <1 x4 x32 xf32 >, %output : memref <1 x1 x32 xf32 >) attributes {arch = " ##TOKEN_ARCH##" , block_size = 16 : i32 , grid_size = 8 : i32 , kernel } {
21+ func.func @test_dpp_cluster4 (%input : memref <1 x4 x32 xf32 >, %output : memref <1 x1 x32 xf32 >) attributes {rock. arch = " ##TOKEN_ARCH##" , block_size = 16 : i32 , grid_size = 8 : i32 , rock. kernel } {
2222 %input_reg = rock.alloc () : memref <4 xf32 , #gpu.address_space <private >>
2323 %output_reg = rock.alloc () : memref <4 xf32 , #gpu.address_space <private >>
2424 %ws_lds_bytes = rock.alloc () : memref <256 xi8 , #gpu.address_space <workgroup >>
@@ -42,7 +42,7 @@ func.func @test_dpp_cluster4(%input : memref<1x4x32xf32>, %output : memref<1x1x3
4242#c8_map5_tid = #rock.transform_map <affine_map <(d0 ) -> (0 , d0 )> by [<Merge {4 , 8 } [" tid" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [32 ] -> [4 , 8 ]>
4343#c8_map5_iter = #rock.transform_map <affine_map <(d0 ) -> (d0 , 0 )> by [<Merge {4 , 1 } [" iter" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [4 ] -> [4 , 1 ]>
4444
45- func.func @test_dpp_cluster8 (%input : memref <1 x8 x32 xf32 >, %output : memref <1 x1 x32 xf32 >) attributes {arch = " ##TOKEN_ARCH##" , block_size = 32 : i32 , grid_size = 8 : i32 , kernel } {
45+ func.func @test_dpp_cluster8 (%input : memref <1 x8 x32 xf32 >, %output : memref <1 x1 x32 xf32 >) attributes {rock. arch = " ##TOKEN_ARCH##" , block_size = 32 : i32 , grid_size = 8 : i32 , rock. kernel } {
4646 %input_reg = rock.alloc () : memref <4 xf32 , #gpu.address_space <private >>
4747 %output_reg = rock.alloc () : memref <4 xf32 , #gpu.address_space <private >>
4848 %ws_lds_bytes = rock.alloc () : memref <512 xi8 , #gpu.address_space <workgroup >>
@@ -66,7 +66,7 @@ func.func @test_dpp_cluster8(%input : memref<1x8x32xf32>, %output : memref<1x1x3
6666#c16_map5_tid = #rock.transform_map <affine_map <(d0 ) -> (0 , d0 )> by [<Merge {2 , 16 } [" tid" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [32 ] -> [2 , 16 ]>
6767#c16_map5_iter = #rock.transform_map <affine_map <(d0 ) -> (d0 , 0 )> by [<Merge {2 , 1 } [" iter" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [2 ] -> [2 , 1 ]>
6868
69- func.func @test_dpp_cluster16 (%input : memref <1 x16 x32 xf32 >, %output : memref <1 x1 x32 xf32 >) attributes {arch = " ##TOKEN_ARCH##" , block_size = 32 : i32 , grid_size = 16 : i32 , kernel } {
69+ func.func @test_dpp_cluster16 (%input : memref <1 x16 x32 xf32 >, %output : memref <1 x1 x32 xf32 >) attributes {rock. arch = " ##TOKEN_ARCH##" , block_size = 32 : i32 , grid_size = 16 : i32 , rock. kernel } {
7070 %input_reg = rock.alloc () : memref <2 xf32 , #gpu.address_space <private >>
7171 %output_reg = rock.alloc () : memref <2 xf32 , #gpu.address_space <private >>
7272 %ws_lds_bytes = rock.alloc () : memref <256 xi8 , #gpu.address_space <workgroup >>
@@ -90,7 +90,7 @@ func.func @test_dpp_cluster16(%input : memref<1x16x32xf32>, %output : memref<1x1
9090#c32_map5_tid = #rock.transform_map <affine_map <(d0 ) -> (0 , d0 )> by [<Merge {1 , 32 } [" tid" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [32 ] -> [1 , 32 ]>
9191#c32_map5_iter = #rock.transform_map <affine_map <(d0 ) -> (d0 , 0 )> by [<Merge {1 , 1 } [" iter" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [1 ] -> [1 , 1 ]>
9292
93- func.func @test_dpp_cluster32 (%input : memref <1 x32 x1 xf32 >, %output : memref <1 x1 x1 xf32 >) attributes {arch = " ##TOKEN_ARCH##" , block_size = 32 : i32 , grid_size = 1 : i32 , kernel } {
93+ func.func @test_dpp_cluster32 (%input : memref <1 x32 x1 xf32 >, %output : memref <1 x1 x1 xf32 >) attributes {rock. arch = " ##TOKEN_ARCH##" , block_size = 32 : i32 , grid_size = 1 : i32 , rock. kernel } {
9494 %input_reg = rock.alloc () : memref <1 xf32 , #gpu.address_space <private >>
9595 %output_reg = rock.alloc () : memref <1 xf32 , #gpu.address_space <private >>
9696 %ws_lds_bytes = rock.alloc () : memref <128 xi8 , #gpu.address_space <workgroup >>
@@ -115,7 +115,7 @@ func.func @test_dpp_cluster32(%input : memref<1x32x1xf32>, %output : memref<1x1x
115115#c64_map5_tid = #rock.transform_map <affine_map <(d0 ) -> (0 , d0 )> by [<Merge {1 , 64 } [" tid" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [64 ] -> [1 , 64 ]>
116116#c64_map5_iter = #rock.transform_map <affine_map <(d0 ) -> (d0 , 0 )> by [<Merge {1 , 1 } [" iter" ] at [0 ] -> [" nr_per_bid" , " r" ] at [0 , 1 ]>] bounds = [1 ] -> [1 , 1 ]>
117117
118- func.func @test_dpp_cluster64 (%input : memref <1 x64 x1 xf32 >, %output : memref <1 x1 x1 xf32 >) attributes {arch = " ##TOKEN_ARCH##" , block_size = 64 : i32 , grid_size = 1 : i32 , kernel } {
118+ func.func @test_dpp_cluster64 (%input : memref <1 x64 x1 xf32 >, %output : memref <1 x1 x1 xf32 >) attributes {rock. arch = " ##TOKEN_ARCH##" , block_size = 64 : i32 , grid_size = 1 : i32 , rock. kernel } {
119119 %input_reg = rock.alloc () : memref <1 xf32 , #gpu.address_space <private >>
120120 %output_reg = rock.alloc () : memref <1 xf32 , #gpu.address_space <private >>
121121 %ws_lds_bytes = rock.alloc () : memref <256 xi8 , #gpu.address_space <workgroup >>
0 commit comments