Skip to content

Commit f1506d5

Browse files
Change (again), and hopefully simplify, the file model inference algorithm.
* For URL paths, the new approach essentially centralizes all information in the URL into the "#mode=" fragment key and uses that value to determine the dispatcher for (most) URLs. * The new approach has the following steps: 1. canonicalize the path if it is a URL. 2. use the mode= fragment key to determine the dispatcher 3. if dispatcher still not determined, then use the mode flags argument to nc_open/nc_create to determine the dispatcher. 4. if the path points to something readable, attempt to read the magic number at the front, and use that to determine the dispatcher. this case may override all previous cases. * Misc changes. 1. Update documentation 2. Moved some unit tests from libdispatch to unit_test directory. 3. Fixed use of wrong #ifdef macro in test_filter_reg.c [I think this may fix an previously reported esupport query].
1 parent 40cf6fb commit f1506d5

File tree

30 files changed

+588
-580
lines changed

30 files changed

+588
-580
lines changed

debug/nc_test/Make0

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,52 @@
11
# Test c output
2-
T=tst_diskless5
2+
T=tst_vars3
33

4-
#H58=8
5-
H510=10
4+
#SRC=hdf5plugins/H5Zmisc.c
65

7-
#ARGS=diskless persist
6+
#CMD=valgrind --leak-check=full
7+
CMD=gdb --args
88

9-
#SRC=
10-
11-
#CMD=env HDF5_DEBUG=trace
12-
#CMD=export NETCDF_LOG_LEVEL=5 ;gdb --args
13-
CMD=valgrind --leak-check=full
14-
#CMD=gdb --args
9+
#FILTER=H5Zmisc
10+
#FILTEROBJ=hdf5plugins/${FILTER}.o
1511

1612
#PAR=1
13+
#SZIP=1
1714

18-
ifdef H58
19-
H5L=/usr/local
20-
endif
21-
ifdef H510
22-
H5L=/opt
23-
endif
15+
#CFLAGS = -Wall -Wno-unused-variable -Wno-unused-function -g -O0 -I.. -I../include
16+
CFLAGS = -Wall -g -O0 -I.. -I../include
2417

25-
CFLAGS=-Wall -Wno-unused-variable -Wno-unused-function -g -O0 -I.. -I../include
18+
LDFLAGS = ../liblib/.libs/libnetcdf.a -L/usr/local/lib -lhdf5_hl -lhdf5 -lz -ldl -lcurl -lm -lmfhdf -ldf
2619

2720
ifdef PAR
2821
CC=mpicc
29-
#CC=/usr/local/bin/mpicc
30-
LDFLAGS=../liblib/.libs/libnetcdf.a -L${H5L}/lib -lhdf5_hl -lhdf5 -lz -ldl -lcurl -lpnetcdf -lmpich -lm
22+
LDFLAGS += -lmpich
3123
else
3224
CC=gcc
33-
#LDFLAGS=../liblib/.libs/libnetcdf.a -L${H5L}/lib -lhdf5_hl -lhdf5 -lz -lm -lcurl
34-
LDFLAGS=../liblib/.libs/libnetcdf.a -L${H5L}/lib -lhdf5_hl -lhdf5 -lz -ldl -lm -lcurl
3525
endif
3626

37-
# cd .. ; ${MAKE} all
27+
ifdef SZIP
28+
LDFLAGS += -lsz -laec
29+
endif
3830

39-
LLP=${H5L}/lib:${LD_LIBRARY_PATH}
31+
LLP=/usr/local/lib:${LD_LIBRARY_PATH}
4032

4133
all:: cmp
4234
export LD_LIBRARY_PATH=${LLP}; export CFLAGS; export LDFLAGS; \
4335
${CMD} ./t ${ARGS}
4436

4537
cmp::
4638
export LD_LIBRARY_PATH=${LLP}; export CFLAGS; export LDFLAGS; \
47-
${CC} -o t ${CFLAGS} ${T}.c ${SRC} ${LDFLAGS}; \
39+
${CC} -o t ${CFLAGS} ${T}.c ${SRC} ${FILTEROBJ} ${LDFLAGS}
40+
41+
filter::
42+
${CC} ${CFLAGS} -c hdf5plugins/${FILTER}.c ${LDFLAGS}
4843

4944
cpp::
5045
${CC} -E ${CFLAGS} ${T}.c > ${T}.txt
5146

52-
#TS = tst_diskless tst_diskless2 tst_diskless3 tst_diskless4 tst_diskless5 tst_diskless6
53-
TS = tst_diskless5
54-
several::
47+
H5=h5testszip
48+
EXT=testszip.nc
49+
h5::
5550
export LD_LIBRARY_PATH=${LLP}; export CFLAGS; export LDFLAGS; \
56-
for f in ${TS} ; do ${CC} -o ${TS} ${CFLAGS} ${TS}.c ${SRC} ${LDFLAGS}; done
57-
51+
${CC} -o h5 ${CFLAGS} ${H5}.c ${SRC} ${LDFLAGS}; \
52+
${CMD} ./h5 ${EXT}

docs/byterange.dox

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ the remote server supports byte-range access.
2020
Two examples:
2121

2222
1. An Amazon S3 object containing a netcdf classic file.
23-
- location: "http://149.165.169.123:8080/thredds/fileServer/testdata/2004050300_eta_211.nc#bytes"
23+
- location: "http://149.165.169.123:8080/thredds/fileServer/testdata/2004050300_eta_211.nc#mode=bytes"
2424
2. A Thredds Server dataset supporting the Thredds HTTPServer protocol.
2525
and containing a netcdf enhanced file.
26-
- location: "http://noaa-goes16.s3.amazonaws.com/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#bytes"
26+
- location: "http://noaa-goes16.s3.amazonaws.com/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#mode=bytes"
2727

2828
Other remote servers may also provide byte-range access in a similar form.
2929

3030
It is important to note that this is not intended as a true
31-
production capability because, as is known, this kind of access
31+
production capability because it is believed that this kind of access
3232
can be quite slow. In addition, the byte-range IO drivers do not
3333
currently do any sort of optimization or caching.
3434

@@ -42,15 +42,15 @@ This capability requires access to *libcurl*, and an error will occur
4242
if byterange is enabled, but no *libcurl* could not be located.
4343
In this, it is similar to the DAP2 and DAP4 capabilities.
4444

45-
Note also that the term "http" is often used as a synonym for *byterange*.
45+
Note also that here, the term "http" is often used as a synonym for *byterange*.
4646

4747
# Run-time Usage {#byterange_url}
4848

4949
In order to use this capability at run-time, with *ncdump* for
5050
example, it is necessary to provide a URL pointing to the basic
5151
dataset to be accessed. The URL must be annotated to tell the
5252
netcdf-c library that byte-range access should be used. This is
53-
indicated by appending the phrase ````#bytes````
53+
indicated by appending the phrase ````#mode=bytes````
5454
to the end of the URL.
5555
The two examples above show how this will look.
5656

docs/internal.dox

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -494,39 +494,45 @@ _libdispatch/dinfermodel.c_ via the API in _include/ncmodel.h_.
494494
The term _model_ is used here to include (at least) the following
495495
information (see the structure type _NCmodel_ in _include/ncmodel.h_).
496496

497-
1. format -- this is an NC_FORMAT_XXX value defining the file format
498-
as seen by the user program.
499-
2. version -- the specific version of the format.
500-
3. iosp -- this is and NC_IOSP_XXX value describing internal protocols to use.
501-
4. impl -- this is an NC_FORMATX_XXX value defining, in effect, the
497+
1. impl -- this is an NC_FORMATX_XXX value defining, in effect, the
502498
dispatch table to use.
503499

504-
For example, if the format was NC_FORMAT_CLASSIC, then the client
505-
will see the netcdf-3 data model, as modified by the version. If the
506-
version was 5, for example, then that indicates the file format
507-
is actually NC_FORMAT_64BIT_DATA, which is a variant of the netcdf-3
508-
format.
509-
510-
The _iosp_ provides information about how the protocol the
511-
dispatch table will use to access the actual dataset. If the iosp
512-
is NC_IOSP_S3RAW, then it indicates that the dispatcher, NC_FORMATX_NC3,
513-
for example, will access the dataset using the Amazon S3 REST API.
514-
515500
The construction of the model is primarily carried out by the function
516-
_NC_infermodel()_. It is given the following parameters:
501+
_NC_infermodel()_ (in _libdispatch/dinfermodel.c_).
502+
It is given the following parameters:
517503
1. path -- (IN) absolute file path or URL
518-
2. omodep -- (IN/OUT) the set of mode flags given to _NC_open_ or _NC_create_.
504+
2. modep -- (IN/OUT) the set of mode flags given to _NC_open_ or _NC_create_.
519505
3. iscreate -- (IN) distinguish open from create.
520506
4. useparallel -- (IN) indicate if parallel IO can be used.
521507
5. params -- (IN/OUT) arbitrary data dependent on the mode and path.
522-
6. model -- (IN/OUT) place to store inferred model information (e.g. format
523-
or version).
524-
7. newpathp -- (OUT) sometimes, it is necessary to rewrite the path.
508+
6. model -- (IN/OUT) place to store inferred model.
509+
7. newpathp -- (OUT) the canonical rewrite of the path argument.
525510

526511
As a rule, these values are used in the this order to infer the model.
527512
1. file contents -- highest precedence
528-
2. url (if it is one) -- using the protocol and fragment arguments
513+
2. url (if it is one) -- using the "mode=" key in the fragment (see below).
529514
3. mode flags
530515
4. default format -- lowest precedence
531516

517+
If the path appears to be a URL, then it is parsed.
518+
Information is extracted from the URL, and specifically,
519+
the fragment key "mode=" is the critical element.
520+
The URL will be rewritten to a canonical form with the following
521+
changes.
522+
1. The fragment part ("#..." at the end) is parsed and the "mode=" key
523+
is extracted and its value is converted to a list of tags.
524+
2. If the leading protocol is not http/https, then the protocol is added
525+
to the mode list. That protocol is then replaced with either http or https.
526+
3. Certain singleton values inb the fragment are extracted and removed
527+
and added to the mode list. Consider, for example, "http://....#dap4".
528+
The "dap4" singleton is removed and added to the mode list.
529+
4. For backward compatibility, the values of "proto=" and "protocol="
530+
are removed from the fragment and their value is added to the mode list.
531+
5. The final mode list is converted to a comma separated string
532+
and re-inserted into the fragment.
533+
6. The final mode list is modified to remove duplicates.
534+
535+
The final result is the canonical form of the URL and is returned in the
536+
newpathp argument described above.
537+
532538
*/

include/nc.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
#include "config.h"
99
#include "netcdf.h"
1010

11-
/* Forward */
12-
struct NCmodel;
13-
1411
/* There's an external ncid (ext_ncid) and an internal ncid
1512
* (int_ncid). The ext_ncid is the ncid returned to the user. If
1613
* the user has opened or created a netcdf-4 file, then the
@@ -30,7 +27,6 @@ typedef struct NC {
3027
void* dispatchdata; /*per-'file' data; points to e.g. NC3_INFO data*/
3128
char* path;
3229
int mode; /* as provided to nc_open/nc_create */
33-
struct NCmodel* model; /* as determined by libdispatch/dfile.c */
3430
} NC;
3531

3632
/*
@@ -79,6 +75,6 @@ extern int iterate_NCList(int i,NC**); /* Walk from 0 ...; ERANGE return => stop
7975

8076
/* Defined in nc.c */
8177
extern void free_NC(NC*);
82-
extern int new_NC(const struct NC_Dispatch*, const char*, int, struct NCmodel*, NC**);
78+
extern int new_NC(const struct NC_Dispatch*, const char*, int, NC**);
8379

8480
#endif /* _NC_H_ */

include/ncmodel.h

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,39 +9,20 @@
99
#ifndef NCINFERMODEL_H
1010
#define NCINFERMODEL_H
1111

12-
/* Define the io handler to be used to do lowest level
13-
access. This is above the libcurl level and below the
14-
dispatcher level. This is only used for remote
15-
datasets or for implementations where the implementation
16-
multiplexes more than one IOSP in a single dispatcher.
17-
*/
18-
#define NC_IOSP_FILE (1)
19-
#define NC_IOSP_MEMORY (2)
20-
#define NC_IOSP_DAP2 (3)
21-
#define NC_IOSP_DAP4 (4)
22-
#define NC_IOSP_UDF (5) /*Placeholder since we do not know IOSP for UDF*/
23-
#define NC_IOSP_HTTP (6)
24-
2512
/* Track the information hat will help us
2613
infer how to access the data defined by
27-
path + omode.
14+
path + omode + (sometimes) file content.
2815
*/
2916
typedef struct NCmodel {
30-
int format; /* NC_FORMAT_XXX value */
3117
int impl; /* NC_FORMATX_XXX value */
32-
int iosp; /* NC_IOSP_XXX value (above) */
18+
int format; /* NC_FORMAT_XXX value; Used to remember extra info; */
3319
} NCmodel;
3420

3521
/* Keep compiler quiet */
3622
struct NCURI;
3723
struct NC_dispatch;
3824

39-
#if 0
40-
/* return first IOSP or NULL if none */
41-
EXTERNL int NC_urliosp(struct NCURI* u);
42-
#endif
43-
44-
/* Infer model format and implementation */
25+
/* Infer model implementation */
4526
EXTERNL int NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void* params, NCmodel* model, char** newpathp);
4627

4728
/**
@@ -53,15 +34,4 @@ EXTERNL int NC_infermodel(const char* path, int* omodep, int iscreate, int usepa
5334
*/
5435
EXTERNL int nc__testurl(const char* path, char** basenamep);
5536

56-
#if 0
57-
/* allow access url parse and params without exposing nc_url.h */
58-
EXTERNL int NCDAP_urlparse(const char* s, void** dapurl);
59-
EXTERNL void NCDAP_urlfree(void* dapurl);
60-
EXTERNL const char* NCDAP_urllookup(void* dapurl, const char* param);
61-
62-
/* Ping a specific server */
63-
EXTERNL int NCDAP2_ping(const char*);
64-
EXTERNL int NCDAP4_ping(const char*);
65-
#endif
66-
6737
#endif /*NCINFERMODEL_H*/

include/ncrc.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,6 @@ extern char* NC_entityescape(const char* s);
6161
extern int NC_readfile(const char* filename, NCbytes* content);
6262
extern int NC_writefile(const char* filename, size_t size, void* content);
6363
extern char* NC_mktmp(const char* base);
64-
64+
extern int NC_getmodelist(const char* url, NClist** modelistp);
65+
extern int NC_testmode(const char* path, const char* tag);
6566
#endif /*NCRC_H*/

include/ncuri.h

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,6 @@
66
#ifndef NCURI_H
77
#define NCURI_H
88

9-
/* Define error codes */
10-
#define NCU_OK (0)
11-
#define NCU_EINVAL (1) /* Generic, mostly means bad argument */
12-
#define NCU_EBADURL (2)
13-
#define NCU_ENOMEM (3)
14-
#define NCU_EPROTO (4)
15-
#define NCU_EPATH (5)
16-
#define NCU_EUSRPWD (6)
17-
#define NCU_EHOST (7)
18-
#define NCU_EPORT (8)
19-
#define NCU_EPARAMS (9)
20-
#define NCU_ENOPARAM (10)
21-
#define NCU_ECONSTRAINTS (11)
229

2310
/* Define flags to control what is included by ncuribuild*/
2411
#define NCURIPATH 1
@@ -79,6 +66,9 @@ extern int ncurisetprotocol(NCURI*,const char* newprotocol);
7966
/* Replace the constraints */
8067
EXTERNL int ncurisetquery(NCURI*,const char* query);
8168

69+
/* Replace the fragment list */
70+
extern int ncurisetfragments(NCURI*, const char* fragments);
71+
8272
/* Construct a complete NC URI; caller frees returned string */
8373
EXTERNL char* ncuribuild(NCURI*,const char* prefix, const char* suffix, int flags);
8474

libdap2/ncd2dispatch.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ NCD2_open(const char* path, int mode, int basepe, size_t *chunksizehintp,
322322
dapcomm->oc.rawurltext = strdup(path);
323323
#endif
324324

325-
if(ncuriparse(dapcomm->oc.rawurltext,&dapcomm->oc.url) != NCU_OK)
325+
if(ncuriparse(dapcomm->oc.rawurltext,&dapcomm->oc.url))
326326
{ncstat = NC_EURL; goto done;}
327327

328328
if(!constrainable(dapcomm->oc.url))

libdap4/d4file.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ NCD4_open(const char * path, int mode,
6565
d4info->controller = (NC*)nc;
6666

6767
/* Parse url and params */
68-
if(ncuriparse(nc->path,&d4info->uri) != NCU_OK)
68+
if(ncuriparse(nc->path,&d4info->uri))
6969
{ret = NC_EDAPURL; goto done;}
7070

7171
/* Load auth info from rc file */

libdap4/d4util.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ ncd4__testurl(const char* path, char** basenamep)
3737
{
3838
NCURI* uri;
3939
int ok = NC_NOERR;
40-
if(ncuriparse(path,&uri) != NCU_OK)
40+
if(ncuriparse(path,&uri))
4141
ok = NC_EURL;
4242
else {
4343
char* slash = (uri->path == NULL ? NULL : strrchr(uri->path, '/'));

0 commit comments

Comments
 (0)