@@ -1892,30 +1892,146 @@ def concat_columns(a: Array, b: Array):
18921892 >>> if __name__ == "__main__":
18931893 >>> x = ds.random_array((8, 4), block_size=(2, 2))
18941894 >>> y = ds.random_array((8, 4), block_size=(2, 2))
1895- >>> result = ds.conc_columns (x, y)
1895+ >>> result = ds.concat_columns (x, y)
18961896 >>> print(result.collect())
18971897 """
18981898 if a ._shape [0 ] != b ._shape [0 ]:
18991899 raise ValueError ("incompatible number of rows "
1900- f"subtract ({ a ._shape [0 ]} != { b ._shape [0 ]} " )
1900+ f" for the concatenation "
1901+ f"({ a ._shape [0 ]} != { b ._shape [0 ]} " )
19011902
19021903 if a ._reg_shape [0 ] != b ._reg_shape [0 ] or a ._reg_shape [1 ] != \
19031904 b ._reg_shape [1 ]:
19041905 raise ValueError ("incorrect block sizes for the requested "
1905- f"subtract ({ a ._reg_shape [0 ], a ._reg_shape [1 ]} "
1906+ f"concatenation ({ a ._reg_shape [0 ], a ._reg_shape [1 ]} "
19061907 f"!= { b ._reg_shape [0 ], b ._reg_shape [1 ]} )" )
19071908
1908- for i in range (len (a ._blocks )):
1909- for j in range (len (b ._blocks [0 ])):
1910- a ._blocks [i ].append (b ._blocks [i ][j ])
1911-
1912- return Array (blocks = a ._blocks ,
1909+ blocks_concatted = [[object () for _ in range (math .ceil (
1910+ (a .shape [1 ]+ b .shape [1 ])/ a ._reg_shape [1 ]))]
1911+ for _ in range (len (a ._blocks ))]
1912+ if a .shape [1 ] % a ._reg_shape [1 ] == 0 :
1913+ for i in range (len (a ._blocks )):
1914+ x = blocks_concatted [i ][:len (a ._blocks [i ])]
1915+ _assign_block_columns (x , a ._blocks [i ])
1916+ blocks_concatted [i ][:len (a ._blocks [i ])] = x
1917+ x = blocks_concatted [i ][len (a ._blocks [i ]):]
1918+ _assign_block_columns (x , b ._blocks [i ])
1919+ blocks_concatted [i ][len (a ._blocks [i ]):] = x
1920+ else :
1921+ for i in range (len (a ._blocks )):
1922+ x = blocks_concatted [i ][:len (a ._blocks [i ]) - 1 ]
1923+ _assign_block_columns (x , a ._blocks [i ][:(len (a ._blocks [i ]) - 1 )])
1924+ blocks_concatted [i ][:len (a ._blocks [i ]) - 1 ] = x
1925+ leftover_data = a ._blocks [i ][- 1 ]
1926+ x = blocks_concatted [i ][len (a ._blocks [i ])- 1 :]
1927+ _assign_block_columns_leftover_data (x , b ._blocks [i ],
1928+ a ._reg_shape [1 ],
1929+ leftover_data )
1930+ blocks_concatted [i ][len (a ._blocks [i ]) - 1 :] = x
1931+ return Array (blocks = blocks_concatted ,
19131932 top_left_shape = (a ._reg_shape [0 ], a ._reg_shape [1 ]),
19141933 reg_shape = (a ._reg_shape [0 ], a ._reg_shape [1 ]),
19151934 shape = (a ._shape [0 ], a ._shape [1 ] + b ._shape [1 ]),
19161935 sparse = a ._sparse )
19171936
19181937
1938+ def concat_rows (a , b ):
1939+ """ Matrix concatenation by rows.
1940+ Parameters
1941+ ----------
1942+ a : ds-array
1943+ First matrix.
1944+ b : ds-array
1945+ Second matrix.
1946+ Returns
1947+ -------
1948+ out : ds-array
1949+ The output array.
1950+ Raises
1951+ ------
1952+ ValueError
1953+ If the arrays do not match in the number of rows.
1954+ If the block size is different between the arrays.
1955+ Examples
1956+ --------
1957+ >>> import dislib as ds
1958+ >>>
1959+ >>>
1960+ >>> if __name__ == "__main__":
1961+ >>> x = ds.random_array((8, 4), block_size=(2, 2))
1962+ >>> y = ds.random_array((8, 4), block_size=(2, 2))
1963+ >>> result = ds.concat_rows(x, y)
1964+ >>> print(result.collect())
1965+ """
1966+ if a ._shape [1 ] != b ._shape [1 ]:
1967+ raise ValueError ("incompatible number of rows "
1968+ f"for the concatenation "
1969+ f"({ a ._shape [1 ]} != { b ._shape [1 ]} " )
1970+
1971+ if (a ._reg_shape [0 ] != b ._reg_shape [0 ] or a ._reg_shape [1 ] !=
1972+ b ._reg_shape [1 ]) and b ._n_blocks [0 ] > 1 :
1973+ raise ValueError ("incorrect block sizes for the requested "
1974+ f"concatenation ({ a ._reg_shape [0 ], a ._reg_shape [1 ]} "
1975+ f"!= { b ._reg_shape [0 ], b ._reg_shape [1 ]} )" )
1976+
1977+ size_last_block_a = a .shape [0 ] % a ._reg_shape [0 ]
1978+ if size_last_block_a == 0 :
1979+ size_last_block_a = a ._reg_shape [0 ]
1980+ size_last_block_b = b .shape [0 ] % b ._reg_shape [0 ]
1981+ if size_last_block_b == 0 :
1982+ size_last_block_b = a ._reg_shape [0 ]
1983+ blocks_a = [[object () for _ in range (len (a ._blocks [i ]))]
1984+ for i in range (len (a ._blocks ) - 1 )]
1985+ blocks_b = [[object () for _ in range (len (b ._blocks [i ]))]
1986+ for i in range (len (b ._blocks ) - 1 )]
1987+ remaining_blocks = [[object () for _ in range (len (b ._blocks [0 ]))] for i
1988+ in range (math .ceil ((size_last_block_a +
1989+ size_last_block_b ) /
1990+ a ._reg_shape [0 ]))]
1991+ blocks_concatted = blocks_a + blocks_b + remaining_blocks
1992+ for i in range (len (blocks_concatted )):
1993+ if i < (len (a ._blocks ) - 1 ):
1994+ _assign_blocks (blocks_concatted [i ], a ._blocks [i ])
1995+ elif i == (len (a ._blocks ) - 1 ):
1996+ if size_last_block_a == a ._reg_shape [0 ]:
1997+ _assign_blocks (blocks_concatted [i ], a ._blocks [i ])
1998+ else :
1999+ _assign_blocks (blocks_concatted [i ], a ._blocks [i ],
2000+ b ._blocks [0 ], a ._reg_shape [0 ], used_data = 0 )
2001+ break
2002+ i += 1
2003+ for j in range (len (blocks_concatted ) - (i )):
2004+ if size_last_block_a == a ._reg_shape [0 ] and j < (len (b ._blocks ) - 1 ):
2005+ _assign_blocks (blocks_concatted [j + i ], b ._blocks [j ])
2006+ elif size_last_block_a != a ._reg_shape [0 ] and j < (len (b ._blocks ) - 1 ):
2007+ _assign_blocks (blocks_concatted [j + i ], b ._blocks [j ],
2008+ b ._blocks [j + 1 ], a ._reg_shape [0 ],
2009+ used_data = (a ._reg_shape [0 ] -
2010+ (a .shape [0 ] %
2011+ a ._reg_shape [0 ])))
2012+ else :
2013+ if size_last_block_a != a ._reg_shape [0 ]:
2014+ if size_last_block_b != b ._reg_shape [0 ]:
2015+ _assign_blocks (blocks_concatted [j + i ], b ._blocks [j - 1 ],
2016+ b ._blocks [j ], a ._reg_shape [0 ],
2017+ used_data = (a ._reg_shape [0 ] +
2018+ a ._reg_shape [0 ] -
2019+ (a .shape [0 ] %
2020+ a ._reg_shape [0 ])))
2021+ else :
2022+ _assign_blocks (blocks_concatted [j + i ], b ._blocks [j ],
2023+ used_data = (a ._reg_shape [0 ] -
2024+ (a .shape [0 ] %
2025+ a ._reg_shape [0 ])))
2026+ else :
2027+ _assign_blocks (blocks_concatted [j + i ], b ._blocks [j ])
2028+ return Array (blocks = blocks_concatted ,
2029+ top_left_shape = (a ._reg_shape [0 ], a ._reg_shape [1 ]),
2030+ reg_shape = (a ._reg_shape [0 ], a ._reg_shape [1 ]),
2031+ shape = (a ._shape [0 ] + b ._shape [0 ], a ._shape [1 ]),
2032+ sparse = a ._sparse )
2033+
2034+
19192035def _add_block_groups (hblock , vblock ):
19202036 blocks = []
19212037
@@ -2008,6 +2124,60 @@ def _random_block_wrapper(block_size, r_state):
20082124 return _random_block (block_size , seed )
20092125
20102126
2127+ @constraint (computing_units = "${ComputingUnits}" )
2128+ @task (blocks = {Type : COLLECTION_OUT , Depth : 1 },
2129+ a_blocks = {Type : COLLECTION_IN , Depth : 1 })
2130+ def _assign_block_columns (blocks , a_blocks ):
2131+ for i in range (len (a_blocks )):
2132+ blocks [i ] = a_blocks [i ]
2133+
2134+
2135+ @constraint (computing_units = "${ComputingUnits}" )
2136+ @task (blocks = {Type : COLLECTION_OUT , Depth : 1 },
2137+ input_block = {Type : COLLECTION_IN , Depth : 1 })
2138+ def _assign_block_columns_leftover_data (blocks , input_block ,
2139+ block_shape , leftover_data ):
2140+ if leftover_data is not None :
2141+ total_data = np .concatenate ((leftover_data , input_block [0 ]), axis = 1 )
2142+ else :
2143+ total_data = input_block [0 ]
2144+ blocks [0 ] = total_data [:, :block_shape ]
2145+ leftover_data = total_data [:, block_shape :]
2146+ for idx , block in enumerate (input_block [1 :]):
2147+ if leftover_data is not None :
2148+ total_data = np .concatenate ((leftover_data , block ), axis = 1 )
2149+ else :
2150+ total_data = block
2151+ blocks [idx + 1 ] = total_data [:, :block_shape ]
2152+ leftover_data = total_data [:, block_shape :]
2153+
2154+
2155+ @constraint (computing_units = "${ComputingUnits}" )
2156+ @task (blocks = {Type : COLLECTION_OUT , Depth : 1 },
2157+ input_blocks = {Type : COLLECTION_IN , Depth : 1 },
2158+ input_blocks_b = {Type : COLLECTION_IN , Depth : 1 })
2159+ def _assign_blocks (blocks , input_blocks , input_blocks_b = [None ],
2160+ reg_shape = 0 , used_data = 0 ):
2161+ if used_data == 0 :
2162+ if reg_shape != 0 :
2163+ for i in range (len (blocks )):
2164+ concatted_data = np .concatenate ((input_blocks [i ],
2165+ input_blocks_b [i ]))
2166+ blocks [i ] = concatted_data [used_data : used_data + reg_shape ]
2167+ else :
2168+ for i in range (len (blocks )):
2169+ blocks [i ] = input_blocks [i ]
2170+ else :
2171+ if reg_shape != 0 :
2172+ for i in range (len (blocks )):
2173+ concatted_data = np .concatenate ((input_blocks [i ],
2174+ input_blocks_b [i ]))
2175+ blocks [i ] = concatted_data [used_data : used_data + reg_shape ]
2176+ else :
2177+ for i in range (len (blocks )):
2178+ blocks [i ] = input_blocks [i ][used_data :]
2179+
2180+
20112181@constraint (computing_units = "${ComputingUnits}" )
20122182@task (returns = 1 )
20132183def _get_item (i , j , block ):
0 commit comments