Skip to content

Commit 4f44c94

Browse files
authored
Merge pull request #311 from melissalinkert/compression-updates
Update compression option handling for zarr-java
2 parents ce46297 + 5ecd26f commit 4f44c94

File tree

5 files changed

+274
-7
lines changed

5 files changed

+274
-7
lines changed

README.md

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,20 +162,37 @@ https://github.com/glencoesoftware/raw2ometiff for more information.
162162
Compression Options
163163
===================
164164

165-
By default, output is compressed with Blosc using the `lz4` codec.
165+
By default, output is compressed with the Blosc codec using the `lz4` compression algorithm with `clevel` set to `5`.
166+
167+
To change the overall compression type, use `--compression <type>`. Supported types depend upon the Zarr/NGFF version being written:
168+
169+
| Zarr/NGFF version | null (uncompressed) | blosc | gzip | zlib | zstd |
170+
|-------------------|---------------------|-------|------|------|------|
171+
| v2/0.4 | yes | yes | no | yes | no |
172+
| v3/0.5 | yes | yes | yes | no | yes |
166173

167-
To change the overall compression type, use `--compression <type>`. Supported types are `blosc`, `zlib`, and `null` (uncompressed).
168174

169175
To change type-specific options, use `--compression-properties <key=value>`.
170176

171177
Supported options for `blosc` are:
172178

173179
* `cname=<codec>`, where the default is `cname=lz4`. `zstd`, `zlib`, `blosclz`, and `lz4hc` are also valid values of `cname`.
174-
* `clevel=<level>`, where the default is `clevel=5`. Valid values are integers from 1 to 9 inclusive.
180+
* `clevel=<level>`, where the default is `clevel=5`. Valid values are integers from 0 to 9 inclusive.
181+
* `blocksize=<blocksize>`, where the default is `blocksize=0`.
182+
* `shuffle=<shuffle type>`, where the default is `shuffle=byteshuffle`. Valid values are `noshuffle`, `shuffle`/`byteshuffle`, and `bitshuffle`.
183+
184+
Supported options for `gzip` are:
185+
186+
* `level=<level>`, where the default is `level=5`. Valid values are integers from 0 to 9 inclusive.
175187

176188
Supported options for `zlib` are:
177189

178-
* `level=<level>`, where the default is `level=1`. Valid values are integers from 1 to 9 inclusive.
190+
* `level=<level>`, where the default is `level=1`. Valid values are integers from 0 to 9 inclusive.
191+
192+
Supported options for `zstd` are:
193+
194+
* `level=<level>`, where the default is `level=5`. Valid values are integers from -7 to 22 inclusive.
195+
* `checksum=<calculate checksum>`, where the default is `checksum=true`. Value values are `true` or `false`.
179196

180197
There are no supported compression options for type `null`, as this is uncompressed data.
181198

src/main/java/com/glencoesoftware/bioformats2raw/Converter.java

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2566,6 +2566,10 @@ else if (compressionType == ZarrCompression.zlib) {
25662566
compressionProperties.getOrDefault("level", "1").toString());
25672567
builder = builder.withZlibCompressor(level);
25682568
}
2569+
else if (compressionType != ZarrCompression.raw) {
2570+
throw new IllegalArgumentException(
2571+
"Zarr v2 does not support " + compressionType + " compression");
2572+
}
25692573

25702574
dev.zarr.zarrjava.v2.Array.create(handle, builder.build());
25712575
}
@@ -3475,10 +3479,32 @@ private boolean chunkAndShardCompatible(
34753479
*/
34763480
private CodecBuilder applyCompressionType(CodecBuilder builder) {
34773481
if (getCompression() == ZarrCompression.blosc) {
3478-
return builder.withBlosc();
3482+
String cname =
3483+
compressionProperties.getOrDefault("cname", "lz4").toString();
3484+
int clevel = Integer.parseInt(
3485+
compressionProperties.getOrDefault("clevel", "5").toString());
3486+
int blocksize = Integer.parseInt(
3487+
compressionProperties.getOrDefault("blocksize", "0").toString());
3488+
String shuffle = compressionProperties.getOrDefault(
3489+
"shuffle", "byteshuffle").toString();
3490+
3491+
return builder.withBlosc(cname, shuffle, clevel, blocksize);
3492+
}
3493+
else if (getCompression() == ZarrCompression.gzip) {
3494+
int level = Integer.parseInt(
3495+
compressionProperties.getOrDefault("level", "5").toString());
3496+
return builder.withGzip(level);
3497+
}
3498+
else if (getCompression() == ZarrCompression.zstd) {
3499+
int level = Integer.parseInt(
3500+
compressionProperties.getOrDefault("level", "5").toString());
3501+
boolean checksum = Boolean.parseBoolean(
3502+
compressionProperties.getOrDefault("checksum", "true").toString());
3503+
return builder.withZstd(level, checksum);
34793504
}
34803505
else if (getCompression() != ZarrCompression.raw) {
3481-
LOGGER.warn("Skipping unsupported compression: {}", getCompression());
3506+
throw new IllegalArgumentException(
3507+
"Zarr v3 does not support " + getCompression() + " compression");
34823508
}
34833509
return builder;
34843510
}

src/main/java/com/glencoesoftware/bioformats2raw/ZarrCompression.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
public enum ZarrCompression {
1212
raw("null"),
1313
zlib("zlib"),
14-
blosc("blosc");
14+
blosc("blosc"),
15+
zstd("zstd"),
16+
gzip("gzip");
1517

1618
private final String value;
1719

src/test/java/com/glencoesoftware/bioformats2raw/test/ZarrTest.java

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.util.Map;
2020
import java.util.stream.Stream;
2121

22+
import com.scalableminds.bloscjava.Blosc;
2223
import dev.zarr.zarrjava.ZarrException;
2324
import dev.zarr.zarrjava.core.Attributes;
2425
import dev.zarr.zarrjava.core.chunkkeyencoding.Separator;
@@ -27,6 +28,9 @@
2728
import dev.zarr.zarrjava.v2.Array;
2829
import dev.zarr.zarrjava.v2.DataType;
2930
import dev.zarr.zarrjava.v2.Group;
31+
import dev.zarr.zarrjava.v2.codec.Codec;
32+
import dev.zarr.zarrjava.v2.codec.core.BloscCodec;
33+
import dev.zarr.zarrjava.v2.codec.core.ZlibCodec;
3034

3135
import com.fasterxml.jackson.databind.JsonNode;
3236
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -896,6 +900,96 @@ static Stream<Arguments> getDimensions() {
896900
);
897901
}
898902

903+
/**
904+
* @return compression settings
905+
*/
906+
static Stream<Arguments> getCompressionSettings() {
907+
return Stream.of(
908+
Arguments.of((Object) new String[] {"-c", "zlib"}),
909+
Arguments.of((Object) new String[] {"-c", "zlib",
910+
"--compression-properties", "level=1"}),
911+
Arguments.of((Object) new String[] {"-c", "blosc",
912+
"--compression-properties", "cname=zlib",
913+
"--compression-properties", "blocksize=8"}),
914+
Arguments.of((Object) new String[] {"-c", "blosc",
915+
"--compression-properties", "clevel=1",
916+
"--compression-properties", "shuffle=noshuffle"})
917+
);
918+
}
919+
920+
/**
921+
* @return compression settings expected to throw an exception
922+
*/
923+
static Stream<Arguments> getBadCompressionSettings() {
924+
return Stream.of(
925+
Arguments.of("gzip"),
926+
Arguments.of("zstd")
927+
);
928+
}
929+
930+
/**
931+
* Test different compression options.
932+
*
933+
* @param options compression type and properties passed directly to converter
934+
*/
935+
@ParameterizedTest
936+
@MethodSource("getCompressionSettings")
937+
public void testCompressionOptions(String[] options) throws Exception {
938+
input = fake();
939+
assertTool(options);
940+
941+
Array array = Array.open(store.resolve("0", "0"));
942+
Codec codec = array.metadata().compressor;
943+
if (options[1].equals("zlib")) {
944+
assertEquals(codec.getClass(), ZlibCodec.class);
945+
if (options.length == 4) {
946+
ZlibCodec c = (ZlibCodec) codec;
947+
String[] option = options[3].split("=");
948+
assertEquals(option[0], "level");
949+
assertEquals(Integer.parseInt(option[1]), c.level);
950+
}
951+
}
952+
else if (options[1].equals("blosc")) {
953+
assertEquals(codec.getClass(), BloscCodec.class);
954+
if (options.length > 3) {
955+
BloscCodec c = (BloscCodec) codec;
956+
for (int i=2; i<options.length; i+=2) {
957+
assertEquals(options[i], "--compression-properties");
958+
String[] option = options[i + 1].split("=");
959+
if (option[0].equals("clevel")) {
960+
assertEquals(Integer.parseInt(option[1]), c.clevel);
961+
}
962+
else if (option[0].equals("cname")) {
963+
assertEquals(option[1], c.cname.getValue());
964+
}
965+
else if (option[0].equals("blocksize")) {
966+
assertEquals(Integer.parseInt(option[1]), c.blocksize);
967+
}
968+
else if (option[0].equals("shuffle")) {
969+
assertEquals(Blosc.Shuffle.fromString(option[1]), c.shuffle);
970+
}
971+
}
972+
}
973+
}
974+
else {
975+
fail("Unexpected compression type " + options[1]);
976+
}
977+
}
978+
979+
/**
980+
* Test different invalid compression options.
981+
*
982+
* @param codec codec name
983+
*/
984+
@ParameterizedTest
985+
@MethodSource("getBadCompressionSettings")
986+
public void testBadCompressionOptions(String codec) throws IOException {
987+
input = fake();
988+
assertThrows(ExecutionException.class, () -> {
989+
assertTool("-c", codec);
990+
});
991+
}
992+
899993
/**
900994
* Test that there are no edge effects when tiles do not divide evenly
901995
* and downsampling.

src/test/java/com/glencoesoftware/bioformats2raw/test/ZarrV3Test.java

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,31 @@
1414
import java.util.Optional;
1515
import java.util.stream.Stream;
1616

17+
import com.scalableminds.bloscjava.Blosc;
1718
import dev.zarr.zarrjava.core.Attributes;
1819
import dev.zarr.zarrjava.v3.Array;
1920
import dev.zarr.zarrjava.v3.ArrayMetadata;
2021
import dev.zarr.zarrjava.v3.Group;
2122
import dev.zarr.zarrjava.v3.codec.Codec;
23+
import dev.zarr.zarrjava.v3.codec.core.BloscCodec;
24+
import dev.zarr.zarrjava.v3.codec.core.GzipCodec;
2225
import dev.zarr.zarrjava.v3.codec.core.ShardingIndexedCodec;
26+
import dev.zarr.zarrjava.v3.codec.core.ZstdCodec;
2327

2428
import ome.xml.model.OME;
2529

30+
import picocli.CommandLine.ExecutionException;
31+
2632
import org.junit.jupiter.api.Test;
2733
import org.junit.jupiter.params.ParameterizedTest;
2834
import org.junit.jupiter.params.provider.Arguments;
2935
import org.junit.jupiter.params.provider.MethodSource;
3036
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
3137
import static org.junit.jupiter.api.Assertions.assertEquals;
3238
import static org.junit.jupiter.api.Assertions.assertFalse;
39+
import static org.junit.jupiter.api.Assertions.assertThrows;
3340
import static org.junit.jupiter.api.Assertions.assertTrue;
41+
import static org.junit.jupiter.api.Assertions.fail;
3442

3543
public class ZarrV3Test extends AbstractZarrTest {
3644

@@ -315,4 +323,124 @@ public void testInvalidShardSizes() throws Exception {
315323
assertFalse(shardingCodec.isPresent());
316324
}
317325

326+
/**
327+
* @return compression settings
328+
*/
329+
static Stream<Arguments> getCompressionSettings() {
330+
return Stream.of(
331+
Arguments.of((Object) new String[] {"-c", "gzip"}),
332+
Arguments.of((Object) new String[] {"-c", "gzip",
333+
"--compression-properties", "level=1"}),
334+
Arguments.of((Object) new String[] {"-c", "blosc",
335+
"--compression-properties", "cname=zlib",
336+
"--compression-properties", "blocksize=8"}),
337+
Arguments.of((Object) new String[] {"-c", "blosc",
338+
"--compression-properties", "clevel=1",
339+
"--compression-properties", "shuffle=noshuffle"}),
340+
Arguments.of((Object) new String[] {"-c", "zstd",
341+
"--compression-properties", "level=9",
342+
"--compression-properties", "checksum=false"})
343+
);
344+
}
345+
346+
/**
347+
* @return compression settings expected to throw an exception
348+
*/
349+
static Stream<Arguments> getBadCompressionSettings() {
350+
return Stream.of(
351+
Arguments.of("zlib")
352+
);
353+
}
354+
355+
/**
356+
* Test different compression options.
357+
*
358+
* @param options compression type and properties passed directly to converter
359+
*/
360+
@ParameterizedTest
361+
@MethodSource("getCompressionSettings")
362+
public void testCompressionOptions(String[] options) throws Exception {
363+
input = fake();
364+
String[] allOptions = new String[options.length + 2];
365+
allOptions[0] = "--ngff-version";
366+
allOptions[1] = getNGFFVersion();
367+
System.arraycopy(options, 0, allOptions, 2, options.length);
368+
assertTool(allOptions);
369+
370+
Array array = Array.open(store.resolve("0", "0"));
371+
assertEquals(array.metadata().codecs.length, 1);
372+
assertTrue(array.metadata().codecs[0] instanceof ShardingIndexedCodec);
373+
ShardingIndexedCodec shards =
374+
(ShardingIndexedCodec) array.metadata().codecs[0];
375+
assertEquals(shards.configuration.codecs.length, 2);
376+
Codec codec = shards.configuration.codecs[1];
377+
if (options[1].equals("gzip")) {
378+
assertEquals(codec.getClass(), GzipCodec.class);
379+
if (options.length == 4) {
380+
GzipCodec c = (GzipCodec) codec;
381+
String[] option = options[3].split("=");
382+
assertEquals(option[0], "level");
383+
assertEquals(Integer.parseInt(option[1]), c.configuration.level);
384+
}
385+
}
386+
else if (options[1].equals("zstd")) {
387+
assertEquals(codec.getClass(), ZstdCodec.class);
388+
if (options.length > 3) {
389+
ZstdCodec c = (ZstdCodec) codec;
390+
for (int i=2; i<options.length; i+=2) {
391+
assertEquals(options[i], "--compression-properties");
392+
String[] option = options[i + 1].split("=");
393+
if (option[0].equals("level")) {
394+
assertEquals(Integer.parseInt(option[1]), c.configuration.level);
395+
}
396+
else if (option[0].equals("checksum")) {
397+
assertEquals(
398+
Boolean.parseBoolean(option[1]), c.configuration.checksum);
399+
}
400+
}
401+
}
402+
}
403+
else if (options[1].equals("blosc")) {
404+
assertEquals(codec.getClass(), BloscCodec.class);
405+
if (options.length > 3) {
406+
BloscCodec c = (BloscCodec) codec;
407+
for (int i=2; i<options.length; i+=2) {
408+
assertEquals(options[i], "--compression-properties");
409+
String[] option = options[i + 1].split("=");
410+
if (option[0].equals("clevel")) {
411+
assertEquals(Integer.parseInt(option[1]), c.configuration.clevel);
412+
}
413+
else if (option[0].equals("cname")) {
414+
assertEquals(option[1], c.configuration.cname.getValue());
415+
}
416+
else if (option[0].equals("blocksize")) {
417+
assertEquals(
418+
Integer.parseInt(option[1]), c.configuration.blocksize);
419+
}
420+
else if (option[0].equals("shuffle")) {
421+
assertEquals(
422+
Blosc.Shuffle.fromString(option[1]), c.configuration.shuffle);
423+
}
424+
}
425+
}
426+
}
427+
else {
428+
fail("Unexpected compression type " + options[1]);
429+
}
430+
}
431+
432+
/**
433+
* Test different invalid compression options.
434+
*
435+
* @param codec codec name
436+
*/
437+
@ParameterizedTest
438+
@MethodSource("getBadCompressionSettings")
439+
public void testBadCompressionOptions(String codec) throws Exception {
440+
input = fake();
441+
assertThrows(ExecutionException.class, () -> {
442+
assertTool("--ngff-version", getNGFFVersion(), "-c", codec);
443+
});
444+
}
445+
318446
}

0 commit comments

Comments
 (0)