Skip to content

Commit 6e8e1c3

Browse files
authored
Merge pull request #2278 from DennisHeimbigner/jsonconvention.dmh
Allow the read/write of JSON-valued Zarr attributes.
2 parents 1e19d4d + 771b959 commit 6e8e1c3

File tree

12 files changed

+267
-71
lines changed

12 files changed

+267
-71
lines changed

RELEASE_NOTES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ This file contains a high-level description of this package's evolution. Release
88
## 4.8.2 - TBD
99

1010

11+
* [Enhancement] Allow the read/write of JSON-valued Zarr attributes to allow
12+
for domain specific info such as used by GDAL/Zarr. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).
1113
* [Enhancement] Turn on the XArray convention for NCZarr files by default. WARNING, this means that the mode should explicitly specify nczarr" or "zarr" even if "xarray" or "noxarray" is specified. See [Github #2257](https://github.com/Unidata/netcdf-c/pull/2257).
1214
* [Enhancement] Update the documentation to match the current filter capabilities See [Github #2249](https://github.com/Unidata/netcdf-c/pull/2249).
1315
* [Enhancement] Support installation of pre-built standard filters into user-specified location. See [Github #2318](https://github.com/Unidata/netcdf-c/pull/2318).

include/ncjson.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,12 @@ struct NCJconst {int bval; long long ival; double dval; char* sval;};
5656
extern "C" {
5757
#endif
5858

59-
/* Parse a JSON string */
59+
/* Parse a string to NCjson*/
6060
DLLEXPORT int NCJparse(const char* text, unsigned flags, NCjson** jsonp);
6161

62+
/* Parse a counted string to NCjson*/
63+
DLLEXPORT int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp);
64+
6265
/* Reclaim a JSON tree */
6366
DLLEXPORT extern void NCJreclaim(NCjson* json);
6467

libdispatch/ncjson.c

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ static int NCJyytext(NCJparser*, char* start, size_t pdlen);
9494
static void NCJreclaimArray(struct NCjlist*);
9595
static void NCJreclaimDict(struct NCjlist*);
9696
static int NCJunescape(NCJparser* parser);
97+
static int unescape1(int c);
9798
static int listappend(struct NCjlist* list, NCjson* element);
9899

99100
#ifndef NETCDF_JSON_H
@@ -109,24 +110,28 @@ static int bytesappendc(NCJbuf* bufp, const char c);
109110

110111
int
111112
NCJparse(const char* text, unsigned flags, NCjson** jsonp)
113+
{
114+
return NCJparsen(strlen(text),text,flags,jsonp);
115+
}
116+
117+
int
118+
NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp)
112119
{
113120
int stat = NCJ_OK;
114-
size_t len;
115121
NCJparser* parser = NULL;
116122
NCjson* json = NULL;
117123

118124
/* Need at least 1 character of input */
119-
if(text == NULL || text[0] == '\0')
125+
if(len == 0 || text == NULL)
120126
{stat = NCJTHROW(NCJ_ERR); goto done;}
121127
if(jsonp == NULL) goto done;
122128
parser = calloc(1,sizeof(NCJparser));
123129
if(parser == NULL)
124130
{stat = NCJTHROW(NCJ_ERR); goto done;}
125-
len = strlen(text);
126131
parser->text = (char*)malloc(len+1+1);
127132
if(parser->text == NULL)
128133
{stat = NCJTHROW(NCJ_ERR); goto done;}
129-
strcpy(parser->text,text);
134+
memcpy(parser->text,text,len);
130135
parser->text[len] = '\0';
131136
parser->text[len+1] = '\0';
132137
parser->pos = &parser->text[0];
@@ -334,16 +339,21 @@ NCJlex(NCJparser* parser)
334339
c = *parser->pos;
335340
if(c == '\0') {
336341
token = NCJ_EOF;
337-
} else if(c <= ' ' || c == '\177') {
342+
} else if(c <= ' ' || c == '\177') {/* ignore whitespace */
343+
parser->pos++;
344+
continue;
345+
} else if(c == NCJ_ESCAPE) {
338346
parser->pos++;
339-
continue; /* ignore whitespace */
347+
c = *parser->pos;
348+
*parser->pos = unescape1(c);
349+
continue;
340350
} else if(strchr(JSON_WORD, c) != NULL) {
341351
start = parser->pos;
342352
for(;;) {
343353
c = *parser->pos++;
344354
if(c == '\0' || strchr(JSON_WORD,c) == NULL) break; /* end of word */
345355
}
346-
/* Pushback c if not whitespace */
356+
/* Pushback c */
347357
parser->pos--;
348358
count = ((parser->pos) - start);
349359
if(NCJyytext(parser,start,count)) goto done;
@@ -604,6 +614,21 @@ NCJunescape(NCJparser* parser)
604614
return NCJTHROW(NCJ_OK);
605615
}
606616

617+
/* Unescape a single character */
618+
static int
619+
unescape1(int c)
620+
{
621+
switch (c) {
622+
case 'b': c = '\b'; break;
623+
case 'f': c = '\f'; break;
624+
case 'n': c = '\n'; break;
625+
case 'r': c = '\r'; break;
626+
case 't': c = '\t'; break;
627+
default: c = c; break;/* technically not Json conformant */
628+
}
629+
return c;
630+
}
631+
607632
#ifdef NCJDEBUG
608633
static char*
609634
tokenname(int token)
@@ -896,7 +921,7 @@ NCJunparseR(const NCjson* json, NCJbuf* buf, unsigned flags)
896921
if(json->list.len > 0 && json->list.contents != NULL) {
897922
int shortlist = 0;
898923
for(i=0;!shortlist && i < json->list.len;i+=2) {
899-
if(i > 0) bytesappendc(buf,NCJ_COMMA);
924+
if(i > 0) {bytesappendc(buf,NCJ_COMMA);bytesappendc(buf,' ');};
900925
NCJunparseR(json->list.contents[i],buf,flags); /* key */
901926
bytesappendc(buf,NCJ_COLON);
902927
bytesappendc(buf,' ');
@@ -945,7 +970,7 @@ escape(const char* text, NCJbuf* buf)
945970
case '\n': replace = 'n'; break;
946971
case '\r': replace = 'r'; break;
947972
case '\t': replace = 't'; break;
948-
case NCJ_QUOTE: replace = '\''; break;
973+
case NCJ_QUOTE: replace = '\"'; break;
949974
case NCJ_ESCAPE: replace = '\\'; break;
950975
default: break;
951976
}

libnczarr/zsync.c

Lines changed: 77 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,17 @@
1212

1313
#undef FILLONCLOSE
1414

15+
/*mnemonics*/
16+
#define DICTOPEN '{'
17+
#define DICTCLOSE '}'
18+
1519
/* Forward */
1620
static int ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp);
1721
static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose);
1822

1923
static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp);
2024
static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypes);
21-
static int zconvert(nc_type typeid, size_t typelen, void* dst, NCjson* src);
25+
static int zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst);
2226
static int computeattrinfo(const char* name, NClist* atypes, NCjson* values,
2327
nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap);
2428
static int parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps);
@@ -37,6 +41,8 @@ static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, s
3741
static int inferattrtype(NCjson* values, nc_type* typeidp);
3842
static int mininttype(unsigned long long u64, int negative);
3943
static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims);
44+
static int read_dict(NCjson* jdict, NCjson** jtextp);
45+
static int write_dict(size_t len, const void* data, NCjson** jsonp);
4046

4147
/**************************************************/
4248
/**************************************************/
@@ -791,6 +797,7 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc
791797
Note that this does not push to the file.
792798
Also note that attributes of length 1 are stored as singletons, not arrays.
793799
This is to be more consistent with pure zarr.
800+
Also implements the JSON dictionary convention.
794801
@param attlist - [in] the attributes to dictify
795802
@param jattrsp - [out] the json'ized att list
796803
@return NC_NOERR
@@ -800,7 +807,7 @@ static int
800807
ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp)
801808
{
802809
int stat = NC_NOERR;
803-
int i;
810+
int i, isdict;
804811
NCjson* jattrs = NULL;
805812
NCjson* akey = NULL;
806813
NCjson* jdata = NULL;
@@ -810,9 +817,18 @@ ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp)
810817
/* Iterate over the attribute list */
811818
for(i=0;i<ncindexsize(attlist);i++) {
812819
NC_ATT_INFO_T* att = (NC_ATT_INFO_T*)ncindexith(attlist,i);
820+
isdict = 0;
813821
/* Create the attribute dict value*/
814-
if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata)))
815-
goto done;
822+
if(att->nc_typeid == NC_CHAR
823+
&& ((char*)att->data)[0] == DICTOPEN
824+
&& ((char*)att->data)[att->len-1] == DICTCLOSE) {
825+
/* this is subject to the JSON dictionary convention? */
826+
if(write_dict(att->len,att->data,&jdata)==NC_NOERR) isdict=1;
827+
}
828+
if(!isdict) {
829+
if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata)))
830+
goto done;
831+
}
816832
if((stat = NCJinsert(jattrs,att->hdr.name,jdata))) goto done;
817833
jdata = NULL;
818834
}
@@ -933,7 +949,7 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis
933949

934950
/* Convert a json value to actual data values of an attribute. */
935951
static int
936-
zconvert(nc_type typeid, size_t typelen, void* dst0, NCjson* src)
952+
zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst0)
937953
{
938954
int stat = NC_NOERR;
939955
int i;
@@ -1019,19 +1035,28 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
10191035
void* data = NULL;
10201036
size_t typelen;
10211037
nc_type typeid = NC_NAT;
1038+
NCjson* jtext = NULL;
10221039
int reclaimvalues = 0;
10231040

10241041
/* Get assumed type */
10251042
if(typeidp) typeid = *typeidp;
10261043
if(typeid == NC_NAT) if((stat = inferattrtype(values,&typeid))) goto done;
10271044
if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;}
10281045

1046+
if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen)))
1047+
goto done;
1048+
10291049
/* Collect the length of the attribute; might be a singleton */
10301050
switch (NCJsort(values)) {
1031-
case NCJ_DICT: stat = NC_ENCZARR; goto done;
10321051
case NCJ_ARRAY:
10331052
count = NCJlength(values);
10341053
break;
1054+
case NCJ_DICT:
1055+
/* Apply the JSON dictionary convention and convert to string */
1056+
if((stat = read_dict(values,&jtext))) goto done;
1057+
values = jtext; jtext = NULL;
1058+
reclaimvalues = 1;
1059+
/* fall thru */
10351060
case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */
10361061
if(typeid == NC_CHAR) {
10371062
count = strlen(NCJstring(values));
@@ -1044,18 +1069,16 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
10441069
break;
10451070
}
10461071

1047-
if(count > 0) {
1072+
if(count > 0 && data == NULL) {
10481073
/* Allocate data space */
1049-
if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen)))
1050-
goto done;
10511074
if(typeid == NC_CHAR)
10521075
data = malloc(typelen*(count+1));
10531076
else
10541077
data = malloc(typelen*count);
10551078
if(data == NULL)
10561079
{stat = NC_ENOMEM; goto done;}
10571080
/* convert to target type */
1058-
if((stat = zconvert(typeid, typelen, data, values)))
1081+
if((stat = zconvert(typeid, typelen, values, data)))
10591082
goto done;
10601083
}
10611084
if(lenp) *lenp = count;
@@ -1094,7 +1117,9 @@ inferattrtype(NCjson* value, nc_type* typeidp)
10941117
case NCJ_NULL:
10951118
typeid = NC_CHAR;
10961119
return NC_NOERR;
1097-
case NCJ_DICT: /* fall thru */
1120+
case NCJ_DICT:
1121+
typeid = NC_CHAR;
1122+
goto done;
10981123
case NCJ_UNDEF:
10991124
return NC_EINVAL;
11001125
default: /* atomic */
@@ -2302,42 +2327,48 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra
23022327
return THROW(stat);
23032328
}
23042329

2305-
#if 0
2306-
Not currently used
2307-
Special compatibility case:
2308-
if the value of the attribute is a dictionary,
2309-
or an array with non-atomic values, then
2310-
then stringify it and pretend it is of char type.
2311-
/* Return 1 if this json is not an
2312-
atomic value or an array of atomic values.
2313-
That is, it does not look like valid
2314-
attribute data.
2330+
/**
2331+
Implement the JSON convention for dictionaries.
2332+
2333+
Reading: If the value of the attribute is a dictionary, then stringify
2334+
it as the value and make the attribute be of type "char".
2335+
2336+
Writing: if the attribute is of type char and looks like a JSON dictionary,
2337+
then parse it as JSON and use that as its value in .zattrs.
23152338
*/
2339+
23162340
static int
2317-
iscomplexjson(NCjson* j)
2341+
read_dict(NCjson* jdict, NCjson** jtextp)
23182342
{
2319-
int i;
2320-
switch(NCJsort(j)) {
2321-
case NCJ_ARRAY:
2322-
/* verify that the elements of the array are not complex */
2323-
for(i=0;i<NCJlength(j);i++) {
2324-
switch (NCJith(j,NCJsort(i)))) {
2325-
case NCJ_DICT:
2326-
case NCJ_ARRAY:
2327-
case NCJ_UNDEF:
2328-
case NCJ_NULL:
2329-
return 1;
2330-
default: break;
2331-
}
2332-
}
2333-
return 0;
2334-
case NCJ_DICT:
2335-
case NCJ_UNDEF:
2336-
case NCJ_NULL:
2337-
break;
2338-
default:
2339-
return 0;
2340-
}
2341-
return 1;
2343+
int stat = NC_NOERR;
2344+
NCjson* jtext = NULL;
2345+
char* text = NULL;
2346+
2347+
if(jdict == NULL) {stat = NC_EINVAL; goto done;}
2348+
if(NCJsort(jdict) != NCJ_DICT) {stat = NC_EINVAL; goto done;}
2349+
if(NCJunparse(jdict,0,&text)) {stat = NC_EINVAL; goto done;}
2350+
if(NCJnewstring(NCJ_STRING,text,&jtext)) {stat = NC_EINVAL; goto done;}
2351+
*jtextp = jtext; jtext = NULL;
2352+
done:
2353+
NCJreclaim(jtext);
2354+
nullfree(text);
2355+
return stat;
23422356
}
2343-
#endif
2357+
2358+
static int
2359+
write_dict(size_t len, const void* data, NCjson** jsonp)
2360+
{
2361+
int stat = NC_NOERR;
2362+
NCjson* jdict = NULL;
2363+
2364+
assert(jsonp != NULL);
2365+
if(NCJparsen(len,(char*)data,0,&jdict))
2366+
{stat = NC_EINVAL; goto done;}
2367+
if(NCJsort(jdict) != NCJ_DICT)
2368+
{stat = NC_EINVAL; goto done;}
2369+
*jsonp = jdict; jdict = NULL;
2370+
done:
2371+
NCJreclaim(jdict);
2372+
return stat;
2373+
}
2374+

nc_test4/tst_broken_files.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ main() {
3131
if(fwrite(TRUNCATED_FILE_CONTENT, sizeof(char), sizeof(TRUNCATED_FILE_CONTENT), fp) != sizeof(TRUNCATED_FILE_CONTENT)) ERR;
3232
fclose(fp);
3333

34+
3435
int ncid, stat;
3536
stat = nc_open(FILE_NAME, 0, &ncid);
3637
if (stat != NC_EHDFERR && stat != NC_ENOFILTER && stat != NC_ENOTNC) ERR;
38+
3739
}
3840

3941
{

ncdump/tst_nccopy3.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
99
. ../test_common.sh
1010

11-
set -x
1211
set -e
1312

1413
echo ""

ncdump/tst_output.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi
44
. ../test_common.sh
55

66
# This shell script tests the output from several previous tests.
7-
set -x
87
set -e
98

109
echo ""

nczarr_test/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ IF(ENABLE_TESTS)
7373
BUILD_BIN_TEST(zmapio ${COMMONSRC})
7474
TARGET_INCLUDE_DIRECTORIES(zmapio PUBLIC ../libnczarr)
7575
BUILD_BIN_TEST(zhex)
76-
BUILD_BIN_TEST(zisjson)
76+
BUILD_BIN_TEST(zisjson ${COMMONSRC})
7777
TARGET_INCLUDE_DIRECTORIES(zisjson PUBLIC ../libnczarr)
7878
BUILD_BIN_TEST(zs3parse ${COMMONSRC})
7979
TARGET_INCLUDE_DIRECTORIES(zs3parse PUBLIC ../libnczarr)
@@ -108,6 +108,7 @@ IF(ENABLE_TESTS)
108108
add_sh_test(nczarr_test run_interop)
109109
add_sh_test(nczarr_test run_misc)
110110
add_sh_test(nczarr_test run_nczarr_fill)
111+
add_sh_test(nczarr_test run_jsonconvention)
111112

112113
BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC})
113114
add_sh_test(nczarr_test run_quantize)

0 commit comments

Comments
 (0)