Skip to content

Commit a8b0173

Browse files
author
Cristian Tatu
authored
Merge branch 'master' into gpu-tests
2 parents 8999982 + ffd8a9b commit a8b0173

41 files changed

Lines changed: 8407 additions & 1417 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Dockerfile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,15 @@ MAINTAINER COMPSs Support <support-compss@bsc.es>
33

44
COPY . dislib/
55

6-
ENV PYTHONPATH=$PYTHONPATH:/dislib:/opt/COMPSs/Bindings/python/3/
6+
ENV PYTHONPATH=$PYTHONPATH:/dislib:/opt/COMPSs/Bindings/python/3/:/python-blosc2
77
ENV LC_ALL=C.UTF-8
8-
RUN python3 -m pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org --upgrade -r /dislib/requirements.txt
98
RUN python3 -m pip install flake8 parameterized coverage
9+
RUN git clone https://github.com/Blosc/python-blosc2/ /python-blosc2
10+
RUN python3 -m pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org --upgrade -r /python-blosc2/requirements-build.txt
11+
RUN python3 -m pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org --upgrade -r /python-blosc2/requirements-runtime.txt
12+
RUN cd /python-blosc2 && git submodule update --init --recursive && python3 setup.py build_ext --inplace -- -DDEACTIVATE_AVX2:STRING=ON
13+
RUN python3 -m pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org --upgrade -r /dislib/requirements.txt
14+
1015

1116
ENV COMPSS_LOAD_SOURCE false
1217

codecov.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@ codecov:
44
coverage:
55
precision: 2
66
round: down
7-
range: "90...100"
7+
range: "90..100"
8+
status:
9+
project:
10+
diff_coverage:
11+
target: 90%
12+
default:
13+
threshold: 1%
814

915
parsers:
1016
gcov:

dislib/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22

33
from dislib.data.array import random_array, apply_along_axis, array, zeros, \
4-
full, identity, eye, matmul
4+
full, identity, eye, matmul, concat_rows, concat_columns
55
from dislib.data.io import load_svmlight_file, load_npy_file, load_txt_file, \
66
load_mdcrd_file, save_txt
77
from dislib.math import kron, svd
@@ -29,8 +29,8 @@
2929

3030
__all__ = ['array', 'random_array', 'zeros', 'full', 'identity', 'eye',
3131
'load_txt_file', 'load_svmlight_file', 'load_npy_file',
32-
'load_mdcrd_file', 'matmul', 'save_txt',
33-
'apply_along_axis', 'kron', 'svd']
32+
'load_mdcrd_file', 'matmul', 'save_txt', 'concat_rows',
33+
'concat_columns', 'apply_along_axis', 'kron', 'svd']
3434

3535
gpu_envar = os.environ.get('DISLIB_GPU_AVAILABLE', 'False')
3636
__gpu_available__ = gpu_envar.lower() == 'true'

dislib/classification/csvm/base.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -226,22 +226,23 @@ def score(self, x, y, collect=False):
226226

227227
def _check_initial_parameters(self):
228228
gamma = self.gamma
229-
assert (gamma == "auto" or type(gamma) == float
230-
or type(float(gamma)) == float), "Invalid gamma"
229+
assert (gamma == "auto" or isinstance(gamma, float)
230+
or isinstance(float(gamma), float)), "Invalid gamma"
231231
kernel = self.kernel
232232
assert (kernel is None or kernel in self._name_to_kernel.keys()), \
233233
"Incorrect kernel value [%s], available kernels are %s" % (
234234
kernel, self._name_to_kernel.keys())
235235
c = self.c
236-
assert (c is None or type(c) == float or type(float(c)) == float), \
236+
assert (c is None or isinstance(c, float) or
237+
isinstance(float(c), float)), \
237238
"Incorrect C type [%s], type : %s" % (c, type(c))
238239
tol = self.tol
239-
assert (type(tol) == float or type(float(tol)) == float), \
240+
assert (isinstance(tol, float) or isinstance(float(tol), float)), \
240241
"Incorrect tol type [%s], type : %s" % (tol, type(tol))
241242
assert self.cascade_arity > 1, "Cascade arity must be greater than 1"
242243
assert self.max_iter > 0, "Max iterations must be greater than 0"
243-
assert type(self.check_convergence) == bool, "Invalid value in " \
244-
"check_convergence"
244+
assert isinstance(self.check_convergence, bool), "Invalid value in " \
245+
"check_convergence"
245246

246247
def _reset_model(self):
247248
self.iterations = 0

dislib/data/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from dislib.data.array import array, random_array, apply_along_axis, zeros, \
2-
full, identity, eye, matmul, matsubtract, matadd, concat_columns
2+
full, identity, eye, matmul, matsubtract, matadd, concat_columns, \
3+
concat_rows
34
from dislib.data.io import load_txt_file, save_npy_file, load_npy_file, \
45
load_svmlight_file, load_mdcrd_file, load_hstack_npy_files, save_txt, \
56
load_npy_files, load_blocks_rechunk
@@ -8,5 +9,6 @@
89
'apply_along_axis', 'save_npy_file', 'load_npy_file',
910
'load_mdcrd_file', 'load_hstack_npy_files', 'load_npy_files',
1011
'load_blocks_rechunk', 'matmul', 'matsubtract', 'save_txt',
11-
'zeros', 'full', 'matadd', 'concat_columns', 'identity', 'eye',
12+
'zeros', 'full', 'matadd', 'concat_columns', 'concat_rows',
13+
'identity', 'eye',
1214
'util']

dislib/data/array.py

Lines changed: 178 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,30 +1892,146 @@ def concat_columns(a: Array, b: Array):
18921892
>>> if __name__ == "__main__":
18931893
>>> x = ds.random_array((8, 4), block_size=(2, 2))
18941894
>>> y = ds.random_array((8, 4), block_size=(2, 2))
1895-
>>> result = ds.conc_columns(x, y)
1895+
>>> result = ds.concat_columns(x, y)
18961896
>>> print(result.collect())
18971897
"""
18981898
if a._shape[0] != b._shape[0]:
18991899
raise ValueError("incompatible number of rows "
1900-
f"subtract ({a._shape[0]} != {b._shape[0]}")
1900+
f" for the concatenation "
1901+
f"({a._shape[0]} != {b._shape[0]}")
19011902

19021903
if a._reg_shape[0] != b._reg_shape[0] or a._reg_shape[1] !=\
19031904
b._reg_shape[1]:
19041905
raise ValueError("incorrect block sizes for the requested "
1905-
f"subtract ({a._reg_shape[0], a._reg_shape[1]} "
1906+
f"concatenation ({a._reg_shape[0], a._reg_shape[1]} "
19061907
f"!= {b._reg_shape[0], b._reg_shape[1]})")
19071908

1908-
for i in range(len(a._blocks)):
1909-
for j in range(len(b._blocks[0])):
1910-
a._blocks[i].append(b._blocks[i][j])
1911-
1912-
return Array(blocks=a._blocks,
1909+
blocks_concatted = [[object() for _ in range(math.ceil(
1910+
(a.shape[1]+b.shape[1])/a._reg_shape[1]))]
1911+
for _ in range(len(a._blocks))]
1912+
if a.shape[1] % a._reg_shape[1] == 0:
1913+
for i in range(len(a._blocks)):
1914+
x = blocks_concatted[i][:len(a._blocks[i])]
1915+
_assign_block_columns(x, a._blocks[i])
1916+
blocks_concatted[i][:len(a._blocks[i])] = x
1917+
x = blocks_concatted[i][len(a._blocks[i]):]
1918+
_assign_block_columns(x, b._blocks[i])
1919+
blocks_concatted[i][len(a._blocks[i]):] = x
1920+
else:
1921+
for i in range(len(a._blocks)):
1922+
x = blocks_concatted[i][:len(a._blocks[i]) - 1]
1923+
_assign_block_columns(x, a._blocks[i][:(len(a._blocks[i]) - 1)])
1924+
blocks_concatted[i][:len(a._blocks[i]) - 1] = x
1925+
leftover_data = a._blocks[i][-1]
1926+
x = blocks_concatted[i][len(a._blocks[i])-1:]
1927+
_assign_block_columns_leftover_data(x, b._blocks[i],
1928+
a._reg_shape[1],
1929+
leftover_data)
1930+
blocks_concatted[i][len(a._blocks[i]) - 1:] = x
1931+
return Array(blocks=blocks_concatted,
19131932
top_left_shape=(a._reg_shape[0], a._reg_shape[1]),
19141933
reg_shape=(a._reg_shape[0], a._reg_shape[1]),
19151934
shape=(a._shape[0], a._shape[1] + b._shape[1]),
19161935
sparse=a._sparse)
19171936

19181937

1938+
def concat_rows(a, b):
1939+
""" Matrix concatenation by rows.
1940+
Parameters
1941+
----------
1942+
a : ds-array
1943+
First matrix.
1944+
b : ds-array
1945+
Second matrix.
1946+
Returns
1947+
-------
1948+
out : ds-array
1949+
The output array.
1950+
Raises
1951+
------
1952+
ValueError
1953+
If the arrays do not match in the number of rows.
1954+
If the block size is different between the arrays.
1955+
Examples
1956+
--------
1957+
>>> import dislib as ds
1958+
>>>
1959+
>>>
1960+
>>> if __name__ == "__main__":
1961+
>>> x = ds.random_array((8, 4), block_size=(2, 2))
1962+
>>> y = ds.random_array((8, 4), block_size=(2, 2))
1963+
>>> result = ds.concat_rows(x, y)
1964+
>>> print(result.collect())
1965+
"""
1966+
if a._shape[1] != b._shape[1]:
1967+
raise ValueError("incompatible number of rows "
1968+
f"for the concatenation "
1969+
f"({a._shape[1]} != {b._shape[1]}")
1970+
1971+
if (a._reg_shape[0] != b._reg_shape[0] or a._reg_shape[1] !=
1972+
b._reg_shape[1]) and b._n_blocks[0] > 1:
1973+
raise ValueError("incorrect block sizes for the requested "
1974+
f"concatenation ({a._reg_shape[0], a._reg_shape[1]} "
1975+
f"!= {b._reg_shape[0], b._reg_shape[1]})")
1976+
1977+
size_last_block_a = a.shape[0] % a._reg_shape[0]
1978+
if size_last_block_a == 0:
1979+
size_last_block_a = a._reg_shape[0]
1980+
size_last_block_b = b.shape[0] % b._reg_shape[0]
1981+
if size_last_block_b == 0:
1982+
size_last_block_b = a._reg_shape[0]
1983+
blocks_a = [[object() for _ in range(len(a._blocks[i]))]
1984+
for i in range(len(a._blocks) - 1)]
1985+
blocks_b = [[object() for _ in range(len(b._blocks[i]))]
1986+
for i in range(len(b._blocks) - 1)]
1987+
remaining_blocks = [[object() for _ in range(len(b._blocks[0]))] for i
1988+
in range(math.ceil((size_last_block_a +
1989+
size_last_block_b) /
1990+
a._reg_shape[0]))]
1991+
blocks_concatted = blocks_a + blocks_b + remaining_blocks
1992+
for i in range(len(blocks_concatted)):
1993+
if i < (len(a._blocks) - 1):
1994+
_assign_blocks(blocks_concatted[i], a._blocks[i])
1995+
elif i == (len(a._blocks) - 1):
1996+
if size_last_block_a == a._reg_shape[0]:
1997+
_assign_blocks(blocks_concatted[i], a._blocks[i])
1998+
else:
1999+
_assign_blocks(blocks_concatted[i], a._blocks[i],
2000+
b._blocks[0], a._reg_shape[0], used_data=0)
2001+
break
2002+
i += 1
2003+
for j in range(len(blocks_concatted) - (i)):
2004+
if size_last_block_a == a._reg_shape[0] and j < (len(b._blocks) - 1):
2005+
_assign_blocks(blocks_concatted[j + i], b._blocks[j])
2006+
elif size_last_block_a != a._reg_shape[0] and j < (len(b._blocks) - 1):
2007+
_assign_blocks(blocks_concatted[j + i], b._blocks[j],
2008+
b._blocks[j + 1], a._reg_shape[0],
2009+
used_data=(a._reg_shape[0] -
2010+
(a.shape[0] %
2011+
a._reg_shape[0])))
2012+
else:
2013+
if size_last_block_a != a._reg_shape[0]:
2014+
if size_last_block_b != b._reg_shape[0]:
2015+
_assign_blocks(blocks_concatted[j + i], b._blocks[j - 1],
2016+
b._blocks[j], a._reg_shape[0],
2017+
used_data=(a._reg_shape[0] +
2018+
a._reg_shape[0] -
2019+
(a.shape[0] %
2020+
a._reg_shape[0])))
2021+
else:
2022+
_assign_blocks(blocks_concatted[j+i], b._blocks[j],
2023+
used_data=(a._reg_shape[0] -
2024+
(a.shape[0] %
2025+
a._reg_shape[0])))
2026+
else:
2027+
_assign_blocks(blocks_concatted[j + i], b._blocks[j])
2028+
return Array(blocks=blocks_concatted,
2029+
top_left_shape=(a._reg_shape[0], a._reg_shape[1]),
2030+
reg_shape=(a._reg_shape[0], a._reg_shape[1]),
2031+
shape=(a._shape[0] + b._shape[0], a._shape[1]),
2032+
sparse=a._sparse)
2033+
2034+
19192035
def _add_block_groups(hblock, vblock):
19202036
blocks = []
19212037

@@ -2008,6 +2124,60 @@ def _random_block_wrapper(block_size, r_state):
20082124
return _random_block(block_size, seed)
20092125

20102126

2127+
@constraint(computing_units="${ComputingUnits}")
2128+
@task(blocks={Type: COLLECTION_OUT, Depth: 1},
2129+
a_blocks={Type: COLLECTION_IN, Depth: 1})
2130+
def _assign_block_columns(blocks, a_blocks):
2131+
for i in range(len(a_blocks)):
2132+
blocks[i] = a_blocks[i]
2133+
2134+
2135+
@constraint(computing_units="${ComputingUnits}")
2136+
@task(blocks={Type: COLLECTION_OUT, Depth: 1},
2137+
input_block={Type: COLLECTION_IN, Depth: 1})
2138+
def _assign_block_columns_leftover_data(blocks, input_block,
2139+
block_shape, leftover_data):
2140+
if leftover_data is not None:
2141+
total_data = np.concatenate((leftover_data, input_block[0]), axis=1)
2142+
else:
2143+
total_data = input_block[0]
2144+
blocks[0] = total_data[:, :block_shape]
2145+
leftover_data = total_data[:, block_shape:]
2146+
for idx, block in enumerate(input_block[1:]):
2147+
if leftover_data is not None:
2148+
total_data = np.concatenate((leftover_data, block), axis=1)
2149+
else:
2150+
total_data = block
2151+
blocks[idx + 1] = total_data[:, :block_shape]
2152+
leftover_data = total_data[:, block_shape:]
2153+
2154+
2155+
@constraint(computing_units="${ComputingUnits}")
2156+
@task(blocks={Type: COLLECTION_OUT, Depth: 1},
2157+
input_blocks={Type: COLLECTION_IN, Depth: 1},
2158+
input_blocks_b={Type: COLLECTION_IN, Depth: 1})
2159+
def _assign_blocks(blocks, input_blocks, input_blocks_b=[None],
2160+
reg_shape=0, used_data=0):
2161+
if used_data == 0:
2162+
if reg_shape != 0:
2163+
for i in range(len(blocks)):
2164+
concatted_data = np.concatenate((input_blocks[i],
2165+
input_blocks_b[i]))
2166+
blocks[i] = concatted_data[used_data: used_data + reg_shape]
2167+
else:
2168+
for i in range(len(blocks)):
2169+
blocks[i] = input_blocks[i]
2170+
else:
2171+
if reg_shape != 0:
2172+
for i in range(len(blocks)):
2173+
concatted_data = np.concatenate((input_blocks[i],
2174+
input_blocks_b[i]))
2175+
blocks[i] = concatted_data[used_data: used_data + reg_shape]
2176+
else:
2177+
for i in range(len(blocks)):
2178+
blocks[i] = input_blocks[i][used_data:]
2179+
2180+
20112181
@constraint(computing_units="${ComputingUnits}")
20122182
@task(returns=1)
20132183
def _get_item(i, j, block):

dislib/data/util/model.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
except ImportError:
99
cbor2 = None
1010

11+
try:
12+
import blosc2
13+
except ImportError:
14+
blosc2 = None
15+
1116

1217
def encoder_helper(obj):
1318
if isinstance(obj, np.generic):

0 commit comments

Comments
 (0)