Skip to content

Commit 5c07ebf

Browse files
Check at nc_open if file appears to be in NCZarr/Zarr format.
re: Issue Unidata#2656 Charlie Zender notes that *nc_open()* does not immediately detect that the given path refers to a file not in zarr format. Rather it fails later when trying to read the (meta-)data. The reason is that the Zarr format is highly decentralized. There is no easily testable magic number or superblock to look for. In effect the only way to see if a directory is Zarr is to successfully read it. It is possible to heuristically detect that a path refers to an NCZarr/Zarr file by doing a breadth-first search of the file tree starting at the given path. If the search encounters a file whose name starts with ".z", then assume it is a legitimate NCZarr/Zarr file. Of course, this test could be costly. One hopes that in practice that it is not. In addition to this fix, a corresponding test case was added. ## Other Changes re: PR Unidata#2529 There was an error under Cygwin for this PR that is fixed in this PR. The fix was to convert all *noinst_* references to *check_*.
1 parent bb632f2 commit 5c07ebf

10 files changed

Lines changed: 214 additions & 31 deletions

File tree

.github/workflows/run_tests_win_mingw.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ jobs:
5454
- name: Check for plugins
5555
run: |
5656
dir ./plugins
57-
dir ./plugins/.libs
57+
if test -e ./plugins/.libs ; then dir ./plugins/.libs ; fi
5858
5959
- name: (Autotools) Build and Run Tests
6060
run: make check -j 8 LDFLAGS="-Wl,--export-all-symbols"

RELEASE_NOTES.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ This file contains a high-level description of this package's evolution. Release
77

88
## 4.9.2 - TBD
99

10+
* [Bug Fix] Add a crude test to see if an NCZarr path looks like a valid NCZarr/Zarr file. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).
1011
* [Bug Fix] Update DAP code to enable CURLOPT_ACCEPT_ENCODING by default. See [Github #2630](https://github.com/Unidata/netcdf-c/pull/2630).
11-
* [Bug Fix] Fix byterange failures for certain URLs. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).
12+
* [Bug Fix] Fix byterange failures for certain URLs. See [Github #2649](https://github.com/Unidata/netcdf-c/pull/2649).
1213
* [Bug Fix] Fix 'make distcheck' error in run_interop.sh. See [Github #2631](https://github.com/Unidata/netcdf-c/pull/2631).
1314
* [Enhancement] Update `nc-config` to remove inclusion from automatically-detected `nf-config` and `ncxx-config` files, as the wrong files could be included in the output. This is in support of [GitHub #2274](https://github.com/Unidata/netcdf-c/issues/2274).
1415
* [Enhancement] Update H5FDhttp.[ch] to work with HDF5 version 1.14.0. See [Github #2615](https://github.com/Unidata/netcdf-c/pull/2615).

libnczarr/zarr.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls)
142142
if((stat = nczmap_open(zinfo->controls.mapimpl,nc->path,mode,zinfo->controls.flags,NULL,&zinfo->map)))
143143
goto done;
144144

145+
/* Ok, try to read superblock */
145146
if((stat = ncz_read_superblock(file,&nczarr_version,&zarr_format))) goto done;
146147

147148
if(nczarr_version == NULL) /* default */

libnczarr/zsync.c

Lines changed: 75 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr
4141
static int json_convention_read(NCjson* jdict, NCjson** jtextp);
4242
static int jtypes2atypes(NCjson* jtypes, NClist* atypes);
4343

44+
static int ncz_validate(NC_FILE_INFO_T* file);
45+
4446
/**************************************************/
4547
/**************************************************/
4648
/* Synchronize functions to make map and memory
@@ -1829,7 +1831,7 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp)
18291831
break;
18301832
default: goto done;
18311833
}
1832-
/* Also gett Zarr Root Group */
1834+
/* Get Zarr Root Group, if any */
18331835
switch(stat = NCZ_downloadjson(zinfo->map, ZMETAROOT, &jzgroup)) {
18341836
case NC_NOERR:
18351837
break;
@@ -1842,8 +1844,9 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp)
18421844
if(jzgroup != NULL) {
18431845
/* See if this NCZarr V2 */
18441846
if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK,&jsuper))) goto done;
1845-
if(!stat && jsuper == NULL)
1846-
{if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK_UC,&jsuper))) goto done;}
1847+
if(!stat && jsuper == NULL) { /* try uppercase name */
1848+
if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK_UC,&jsuper))) goto done;
1849+
}
18471850
if(jsuper != NULL) {
18481851
/* Extract the equivalent attribute */
18491852
if(jsuper->sort != NCJ_DICT)
@@ -1855,15 +1858,21 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp)
18551858
if((stat = NCJdictget(jzgroup,"zarr_format",&jtmp))) goto done;
18561859
zarr_format = nulldup(NCJstring(jtmp));
18571860
}
1858-
/* Set the controls */
1861+
/* Set the format flags */
18591862
if(jnczgroup == NULL && jsuper == NULL) {
1860-
zinfo->controls.flags |= FLAG_PUREZARR;
1863+
/* See if this is looks like a NCZarr/Zarr dataset at all
1864+
by looking for anything here of the form ".z*" */
1865+
if((stat = ncz_validate(file))) goto done;
1866+
/* ok, assume pure zarr with no groups */
1867+
zinfo->controls.flags |= FLAG_PUREZARR;
1868+
zinfo->controls.flags &= ~(FLAG_NCZARR_V1);
1869+
zarr_format = strdup("2");
18611870
} else if(jnczgroup != NULL) {
18621871
zinfo->controls.flags |= FLAG_NCZARR_V1;
18631872
/* Also means file is read only */
18641873
file->no_write = 1;
18651874
} else if(jsuper != NULL) {
1866-
/* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */
1875+
/* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */
18671876
}
18681877
if(nczarrvp) {*nczarrvp = nczarr_version; nczarr_version = NULL;}
18691878
if(zarrfp) {*zarrfp = zarr_format; zarr_format = NULL;}
@@ -2411,3 +2420,63 @@ jtypes2atypes(NCjson* jtypes, NClist* atypes)
24112420
done:
24122421
return stat;
24132422
}
2423+
2424+
/* See if there is reason to believe the specified path is a legitimate (NC)Zarr file
2425+
* Do a breadth first walk of the tree starting at file path.
2426+
* @param file to validate
2427+
* @return NC_NOERR if it looks ok
2428+
* @return NC_ENOTNC if it does not look ok
2429+
*/
2430+
static int
2431+
ncz_validate(NC_FILE_INFO_T* file)
2432+
{
2433+
int i,stat = NC_NOERR;
2434+
NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info;
2435+
int validate = 0;
2436+
NCbytes* prefix = ncbytesnew();
2437+
NClist* queue = nclistnew();
2438+
NClist* nextlevel = nclistnew();
2439+
NCZMAP* map = zinfo->map;
2440+
char* path = NULL;
2441+
char* segment = NULL;
2442+
size_t seglen;
2443+
2444+
ZTRACE(3,"file=%s",file->controller->path);
2445+
2446+
path = strdup("/");
2447+
nclistpush(queue,path);
2448+
path = NULL;
2449+
do {
2450+
/* This should be full path key */
2451+
nullfree(path); path = NULL;
2452+
path = nclistremove(queue,0); /* remove from front of queue */
2453+
/* get list of next level segments (partial keys) */
2454+
nclistclear(nextlevel);
2455+
if((stat=nczmap_search(map,path,nextlevel))) {validate = 0; goto done;}
2456+
/* For each s in next level, test, convert to full path, and push onto queue */
2457+
for(i=0;i<nclistlength(nextlevel);i++) {
2458+
nullfree(segment); segment = NULL;
2459+
segment = nclistremove(nextlevel,0);
2460+
seglen = nulllen(segment);
2461+
if((seglen >= 2 && memcmp(segment,".z",2)==0) || (seglen >= 4 && memcmp(segment,".ncz",4)==0)) {
2462+
validate = 1;
2463+
goto done;
2464+
}
2465+
/* Convert to full path */
2466+
ncbytesclear(prefix);
2467+
ncbytescat(prefix,path);
2468+
if(strlen(path) > 1) ncbytescat(prefix,"/");
2469+
ncbytescat(prefix,segment);
2470+
/* push onto queue */
2471+
nclistpush(queue,ncbytesextract(prefix));
2472+
}
2473+
} while(nclistlength(queue) > 0);
2474+
done:
2475+
if(!validate) stat = NC_ENOTNC;
2476+
nullfree(path);
2477+
nullfree(segment);
2478+
nclistfreeall(queue);
2479+
nclistfreeall(nextlevel);
2480+
ncbytesfree(prefix);
2481+
return ZUNTRACE(THROW(stat));
2482+
}

nczarr_test/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ IF(ENABLE_TESTS)
124124
BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC})
125125
add_sh_test(nczarr_test run_quantize)
126126

127+
BUILD_BIN_TEST(tst_notzarr ${TSTCOMMONSRC})
128+
add_sh_test(nczarr_test run_notzarr)
129+
127130
if(ENABLE_S3)
128131
add_sh_test(nczarr_test run_s3_cleanup)
129132
ENDIF()

nczarr_test/Makefile.am

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ ut_projections_SOURCES = ut_projections.c ${commonsrc}
3535
ut_chunking_SOURCES = ut_chunking.c ${commonsrc}
3636
tst_fillonlyz_SOURCES = tst_fillonlyz.c ${tstcommonsrc}
3737

38-
check_PROGRAMS += tst_zchunks tst_zchunks2 tst_zchunks3 tst_fillonlyz test_quantize
38+
check_PROGRAMS += tst_zchunks tst_zchunks2 tst_zchunks3 tst_fillonlyz test_quantize tst_notzarr
3939

4040
TESTS += run_ut_chunk.sh
4141

@@ -64,7 +64,9 @@ TESTS += run_jsonconvention.sh
6464
TESTS += run_strings.sh
6565
TESTS += run_scalar.sh
6666
TESTS += run_nulls.sh
67-
endif
67+
TESTS += run_notzarr.sh
68+
69+
endif #BUILD_UTILITIES
6870

6971
if BUILD_UTILITIES
7072

@@ -139,7 +141,7 @@ run_purezarr.sh run_interop.sh run_misc.sh \
139141
run_filter.sh \
140142
run_newformat.sh run_nczarr_fill.sh run_quantize.sh \
141143
run_jsonconvention.sh run_nczfilter.sh run_unknown.sh \
142-
run_scalar.sh run_strings.sh run_nulls.sh
144+
run_scalar.sh run_strings.sh run_nulls.sh run_notzarr.sh
143145

144146
EXTRA_DIST += \
145147
ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \

nczarr_test/run_notzarr.sh

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/bin/sh
2+
3+
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
4+
. ../test_common.sh
5+
6+
. "$srcdir/test_nczarr.sh"
7+
8+
# Test ability to detect NCZarr/Zarr files
9+
10+
URL="${NCZARR_S3_TEST_HOST}/${NCZARR_S3_TEST_BUCKET}"
11+
KEY="/netcdf-c"
12+
13+
THISDIR=`pwd`
14+
RESULTSDIR=tmp_notzarr
15+
sometestfailed=
16+
17+
testfailed() {
18+
if test "x$1" != "x-51" ; then
19+
echo "*** Failed"
20+
sometestfailed=1
21+
fi
22+
}
23+
24+
rm -fr ${RESULTSDIR}
25+
mkdir -p ${RESULTSDIR}
26+
cd ${RESULTSDIR}
27+
28+
# Make test sets
29+
mkdir empty.file # empty
30+
mkdir notzarr.file # non-empty, non-zarr
31+
echo "random data" >notzarr.file/notzarr.txt
32+
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then
33+
mkdir empty
34+
zip -r empty.zip empty
35+
cp -r notzarr.file ./notzarr
36+
zip -r notzarr.zip notzarr
37+
rm -fr empty notzarr
38+
fi
39+
if test "x$FEATURE_S3TESTS" = xyes ; then
40+
cat /dev/null > empty.txt
41+
# not possible: ${execdir}/s3util -f notzarr.txt -u "https://${URL}" -k "/netcdf-c/empty.s3" upload
42+
${execdir}/s3util -f notzarr.file/notzarr.txt -u "https://${URL}" -k "/netcdf-c/notzarr.s3/notzarr.txt" upload
43+
fi
44+
45+
echo "Test empty file"
46+
RET=`${execdir}/tst_notzarr "file://empty.file#mode=zarr,file"`
47+
testfailed "$RET"
48+
echo "Test non-zarr file"
49+
RET=`${execdir}/tst_notzarr "file://notzarr.file#mode=zarr,file"`
50+
testfailed "$RET"
51+
52+
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then
53+
echo "Test empty zip file"
54+
RET=`${execdir}/tst_notzarr "file://empty.zip#mode=zarr,zip"`
55+
testfailed "$RET"
56+
echo "Test non-zarr zip file"
57+
RET=`${execdir}/tst_notzarr "file://notzarr.zip#mode=zarr,zip"`
58+
testfailed "$RET"
59+
fi
60+
61+
if test "x$FEATURE_S3TESTS" = xyes ; then
62+
if test 1 = 0 ; then
63+
# This test is NA for S3
64+
echo "Test empty S3 file"
65+
KEY="/netcdf-c/empty.s3"
66+
RET=`${execdir}/tst_notzarr "https://$URL${KEY}#mode=zarr,s3"`
67+
testfailed "$RET"
68+
fi
69+
echo "Test non-zarr S3 file"
70+
RET=`${execdir}/tst_notzarr "https://$URL/netcdf-c/notzarr.s3#mode=zarr,s3"`
71+
testfailed "$RET"
72+
fi
73+
74+
cd ${THISDIR}
75+
76+
# Cleanup
77+
rm -fr ${RESULTSDIR}
78+
if test "x$FEATURE_S3TESTS" = xyes ; then
79+
awsdelete "/netcdf-c"
80+
fi
81+
82+
exit 0

nczarr_test/tst_notzarr.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/* This is part of the netCDF package.
2+
Copyright 2018 University Corporation for Atmospheric Research/Unidata
3+
See COPYRIGHT file for conditions of use.
4+
5+
Test nczarr filter loading
6+
Author: Dennis Heimbigner
7+
*/
8+
9+
#include <stdlib.h>
10+
#include <stdio.h>
11+
#include <string.h>
12+
13+
#include "netcdf.h"
14+
15+
#define ERR(r) {fprintf(stderr,"fail: line %d: (%d) %s\n",__LINE__,(r),nc_strerror((r)));}
16+
17+
int
18+
main(int argc, char **argv)
19+
{
20+
int ret = NC_NOERR;
21+
int ncid;
22+
23+
if(argc < 2) {
24+
fprintf(stderr,"Usage: tst_notzarr <url>\n");
25+
exit(1);
26+
}
27+
ret = nc_open(argv[1],NC_NETCDF4,&ncid);
28+
printf("%d",ret);
29+
if(ret == NC_NOERR) nc_close(ncid);
30+
exit(0);
31+
}

nczarr_test/ut_test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
x * Copyright 2018, University Corporation for Atmospheric Research
2+
* Copyright 2018, University Corporation for Atmospheric Research
33
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
44
*/
55

plugins/Makefile.am

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,21 @@ endif !ISMINGW
2424
# Create an alternate directory if not installing or for noinst installs.
2525
ALTPLUGINDIR = ${abs_top_builddir}/plugins/plugindir
2626

27+
RPATH = -rpath $(abs_builddir)/.libs
28+
2729
# This is where the plugins are to be installed
2830
if ENABLE_PLUGIN_DIR
29-
plugindir = @PLUGIN_INSTALL_DIR@
31+
plugindir = @PLUGIN_INSTALL_DIR@
3032
else
31-
plugindir = ${ALTPLUGINDIR}
32-
AM_LDFLAGS += -rpath $(abs_builddir)/.libs
33+
plugindir = ${ALTPLUGINDIR}
34+
AM_LDFLAGS += ${RPATH}
3335
endif
3436

3537
plugin_LTLIBRARIES =
3638
plugins_to_install =
3739

38-
# Apparently one cannot have plugin_LTLIBRARIES and also noinst_LTLIBRARIES.
39-
# So create a tmp location for "noinst" shared libraries.
40-
# tmpdir = ${ALTPLUGINDIR}
41-
40+
# These libraries are for testing only
4241
check_LTLIBRARIES =
43-
noinst_LTLIBRARIES =
4442

4543
if ISMINGW
4644
LDADD = ${top_builddir}/liblib/libnetcdf.la
@@ -107,22 +105,22 @@ endif #ENABLE_PLUGINS
107105
# Need two distinct instances
108106
lib__nch5noop_la_SOURCES = H5Znoop.c H5Zutil.c h5noop.h
109107
lib__nch5noop1_la_SOURCES = H5Znoop1.c H5Zutil.c h5noop.h
110-
lib__nch5noop_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
111-
lib__nch5noop1_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
108+
lib__nch5noop_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
109+
lib__nch5noop1_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
112110

113111
# The misc filter is to allow testing of filter arguments
114112
lib__nch5misc_la_SOURCES = H5Zmisc.c H5Zutil.c h5misc.h
115-
lib__nch5misc_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
113+
lib__nch5misc_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
116114
lib__nczmisc_la_SOURCES = NCZmisc.c
117-
lib__nczmisc_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
115+
lib__nczmisc_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
118116

119117
# Provide a filter to test missing filter
120118
lib__nch5unknown_la_SOURCES = H5Zunknown.c
121-
lib__nch5unknown_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
119+
lib__nch5unknown_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
122120

123121
check_LTLIBRARIES += lib__nch5noop.la lib__nch5noop1.la lib__nch5unknown.la
124122
# findplugin.sh needs these plugins, and I want to see if these get built properly
125-
noinst_LTLIBRARIES += lib__nch5misc.la lib__nczmisc.la
123+
check_LTLIBRARIES += lib__nch5misc.la lib__nczmisc.la
126124

127125
# Bzip2 is used to test more complex filters
128126
lib__nch5bzip2_la_SOURCES = H5Zbzip2.c h5bzip2.h
@@ -138,7 +136,7 @@ endif #ENABLE_FILTER_TESTING
138136
if ENABLE_PLUGIN_DIR
139137
plugin_LTLIBRARIES += $(plugins_to_install)
140138
else
141-
noinst_LTLIBRARIES += $(plugins_to_install)
139+
check_LTLIBRARIES += $(plugins_to_install)
142140
endif
143141

144142
BUILT_SOURCES = H5Znoop1.c
@@ -157,7 +155,3 @@ bzip2::
157155
tar -zxf ${BZIP2DIR}.tar.gz
158156
cd ${BZIP2DIR}; cp ${BZIP2SRC} ..; cp LICENSE ../BZIP2_LICENSE ; cd ..
159157
rm -fr ./${BZIP2DIR}
160-
161-
# Custom clean
162-
clean-local:
163-
rm -fr ${ALTPLUGINDIR}

0 commit comments

Comments
 (0)