Skip to content

Commit fcf0fa7

Browse files
authored
Merge pull request #2134 from DennisHeimbigner/zarrs3part2.dmh
2 parents 228e7f5 + 6f3bfde commit fcf0fa7

59 files changed

Lines changed: 2329 additions & 636 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CMakeLists.txt

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,7 +1049,6 @@ IF(!MSVC)
10491049
#FIND_LIBRARY(SZIP PATH NAMES szip sz)
10501050
SET(SZIP_LIBRARY ${SZIP})
10511051
ENDIF()
1052-
message("xxx: ${SZIP_FOUND} ; ${SZIP}")
10531052
# Define a test flag for have szip library
10541053
IF(SZIP_FOUND)
10551054
INCLUDE_DIRECTORIES(${SZIP_INCLUDE_DIRS})
@@ -1133,10 +1132,10 @@ ENDIF()
11331132
# See if aws-s3-sdk is available
11341133
# But only if enabled
11351134
IF(ENABLE_NCZARR_S3)
1136-
find_package(AWSSDK REQUIRED)
1137-
set(SERVICE s3)
1138-
AWSSDK_DETERMINE_LIBS_TO_LINK(SERVICE AWSSDK_LINK_LIBRARIES)
1135+
find_package(AWSSDK REQUIRED COMPONENTS s3;core)
11391136
IF(AWSSDK_FOUND)
1137+
SET(service s3;core)
1138+
AWSSDK_DETERMINE_LIBS_TO_LINK(service AWS_LINK_LIBRARIES)
11401139
SET(ENABLE_S3_SDK ON CACHE BOOL "S3 SDK" FORCE)
11411140
ELSE()
11421141
SET(ENABLE_S3_SDK OFF CACHE BOOL "S3 SDK" FORCE)
@@ -1712,7 +1711,17 @@ CHECK_FUNCTION_EXISTS(fileno HAVE_FILENO)
17121711

17131712
CHECK_FUNCTION_EXISTS(clock_gettime HAVE_CLOCK_GETTIME)
17141713
CHECK_SYMBOL_EXISTS("struct timespec" "time.h" HAVE_STRUCT_TIMESPEC)
1714+
CHECK_FUNCTION_EXISTS(atexit HAVE_ATEXIT)
17151715

1716+
# Control invoking nc_finalize at exit
1717+
OPTION(ENABLE_ATEXIT_FINALIZE "Invoke nc_finalize at exit." ON)
1718+
IF(NOT HAVE_ATEXIT)
1719+
IF(ENABLE_ATEXIT_FINALIZE AND NOT HAVE_ATEXIT)
1720+
SET(ENABLE_ATEXIT_FINALIZE OFF CACHE BOOL "Enable ATEXIT" FORCE)
1721+
MESSAGE(WARNING "ENABLE_ATEXIT_FINALIZE set but atexit() function not defined")
1722+
ENDIF()
1723+
ENDIF()
1724+
17161725
# Check to see if MAP_ANONYMOUS is defined.
17171726
IF(MSVC)
17181727
MESSAGE(WARNING "mmap not supported under visual studio: disabling MMAP support.")

RELEASE_NOTES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release
77

88
## 4.8.2 - TBD
99

10+
* [Enhancement] Support byte-range reading of netcdf-3 files stored in private buckets in S3. See [Github #2134](https://github.com/Unidata/netcdf-c/pull/2134)
1011
* [Enhancement] Support Amazon S3 access for NCZarr. Also support use of the existing Amazon SDK credentials system. See [Github #2114](https://github.com/Unidata/netcdf-c/pull/2114)
1112
* [Bug Fix] Fix string allocation error in H5FDhttp.c. See [Github #2127](https://github.com/Unidata/netcdf-c/pull/2127).
1213
* [Bug Fix] Apply patches for ezxml and for selected oss-fuzz detected errors. See [Github #2125](https://github.com/Unidata/netcdf-c/pull/2125).

config.h.cmake.in

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ are set when opening a binary file on Windows. */
126126
/* set this only when building a DLL under MinGW */
127127
#cmakedefine DLL_NETCDF 1
128128

129+
/* if true, use atexist */
130+
#cmakedefine ENABLE_ATEXIT_FINALIZE 1
131+
129132
/* if true, build byte-range Client */
130133
#cmakedefine ENABLE_BYTERANGE 1
131134

@@ -183,10 +186,12 @@ are set when opening a binary file on Windows. */
183186
/* Define to 1 if you have `alloca', as a function or macro. */
184187
#cmakedefine HAVE_ALLOCA 1
185188

186-
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
187-
*/
189+
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix). */
188190
#cmakedefine HAVE_ALLOCA_H 1
189191

192+
/* Define to 1 if you have the `atexit function. */
193+
#cmakedefine HAVE_ATEXIT 1
194+
190195
/* Define to 1 if you have hdf5_coll_metadata_ops */
191196
#cmakedefine HDF5_HAS_COLL_METADATA_OPS 1
192197

configure.ac

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,6 +1113,29 @@ if test "x$enable_byterange" = xyes; then
11131113
AC_DEFINE([ENABLE_BYTERANGE], [1], [if true, support byte-range read of remote datasets.])
11141114
fi
11151115

1116+
# Does the user want to disable atexit?
1117+
AC_MSG_CHECKING([whether nc_finalize should be invoked at exit])
1118+
AC_ARG_ENABLE([atexit-finalize],
1119+
[AS_HELP_STRING([--disable-atexit-finalize],
1120+
[disable invoking nc_finalize at exit])])
1121+
test "x$enable_atexit_finalize" = xno || enable_atexit_finalize=yes
1122+
AC_MSG_RESULT($enable_atexit_finalize)
1123+
1124+
# Check for atexit
1125+
AC_CHECK_FUNCS([atexit])
1126+
1127+
# If no atexit, then disable atexit finalize
1128+
if test "x$enable_atexit_finalize" = xyes ; then
1129+
if test "x$ac_cv_func_function" = xno ; then
1130+
enable_atexit_finalize=no
1131+
AC_MSG_ERROR([atexit() required for enable-atexit-finalize.])
1132+
fi
1133+
fi
1134+
1135+
if test "x$enable_atexit_finalize" = xyes ; then
1136+
AC_DEFINE([ENABLE_ATEXIT_FINALIZE], [1], [If true, enable nc_finalize via atexit()])
1137+
fi
1138+
11161139
# Need libdl(d) for plugins
11171140
AC_CHECK_LIB([dl],[dlopen],[have_libdld=yes],[have_libdld=no])
11181141
if test "x$have_libdld" = "xyes" ; then

docs/nczarr.md

Lines changed: 85 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -554,24 +554,6 @@ The relevant tests for S3 support are in the _nczarr_test_ directory.
554554
Currently, by default, testing of S3 with NCZarr is supported only for Unidata members of the NetCDF Development Group.
555555
This is because it uses a Unidata-specific bucket is inaccessible to the general user.
556556
557-
However, an untested mechanism exists by which others may be able to run the S3 specific tests.
558-
If someone else wants to attempt these tests, then they need to define the following environment variables:
559-
* NCZARR_S3_TEST_HOST=\<host\>
560-
* NCZARR_S3_TEST_BUCKET=\<bucket-name\>
561-
562-
This assumes a Path Style address (see above) where
563-
* host -- the complete host part of the url
564-
* bucket -- a bucket in which testing can occur without fear of damaging anything.
565-
566-
_Example:_
567-
568-
````
569-
NCZARR_S3_TEST_HOST=s3.us-west-1.amazonaws.com
570-
NCZARR_S3_TEST_BUCKET=testbucket
571-
````
572-
If anyone tries to use this mechanism, it would be appreciated
573-
it any difficulties were reported to Unidata as a Github issue.
574-
575557
# Appendix B. Building aws-sdk-cpp {#nczarr_s3sdk}
576558
577559
In order to use the S3 storage driver, it is necessary to install the Amazon [aws-sdk-cpp library](https://github.com/aws/aws-sdk-cpp.git).
@@ -580,19 +562,22 @@ Building this package from scratch has proven to be a formidable task.
580562
This appears to be due to dependencies on very specific versions of,
581563
for example, openssl.
582564
583-
However, the following context does work. Of course your mileage may vary.
565+
## **nix** Build
566+
567+
For linux, the following context works. Of course your mileage may vary.
584568
* OS: ubuntu 21
585-
* aws-sdk-cpp version 1.9.96 or later?
569+
* aws-sdk-cpp version 1.9.96 (or later?)
586570
* Required installed libraries: openssl, libcurl, cmake, ninja (ninja-build in apt)
587571
588-
The recipe used:
572+
### AWS-SDK-CPP Build Recipe
573+
589574
````
590575
git clone --recurse-submodules https://www.github.com/aws/aws-sdk-cpp
591576
pushd aws-sdk-cpp
592577
mkdir build
593578
cd build
594579
PREFIX=/usr/local
595-
FLAGS="-DCMAKE_INSTALL_PREFIX=${PREFIX}
580+
FLAGS="-DCMAKE_INSTALL_PREFIX=${PREFIX} \
596581
-DCMAKE_INSTALL_LIBDIR=lib \
597582
-DCMAKE_MODULE_PATH=${PREFIX}/lib/cmake \
598583
-DCMAKE_POLICY_DEFAULT_CMP0075=NEW \
@@ -608,8 +593,84 @@ cd ..
608593
popd
609594
````
610595
611-
For Windows we do not yet have solution. If you successfully install
612-
on Windows, please let us know how you did it.
596+
### NetCDF Build
597+
598+
In order to build netcdf-c with S3 sdk support,
599+
the following options must be specified for ./configure.
600+
````
601+
--enable-nczarr-s3
602+
````
603+
If you have access to the Unidata bucket on Amazon, then you can
604+
also test S3 support with this option.
605+
````
606+
--enable-nczarr-s3-tests
607+
````
608+
609+
## Windows build
610+
It is possible to build and install aws-sdk-cpp. It is also possible
611+
to build netcdf-c using cmake. Unfortunately, testing currently fails.
612+
613+
For Windows, the following context work. Of course your mileage may vary.
614+
* OS: Windows 10 64-bit with Visual Studio community edition 2019.
615+
* aws-sdk-cpp version 1.9.96 (or later?)
616+
* Required installed libraries: openssl, libcurl, cmake
617+
618+
### AWS-SDK-CPP Build Recipe
619+
620+
This command-line build assumes one is using Cygwin or Mingw to provide
621+
tools such as bash.
622+
623+
````
624+
git clone --recurse-submodules https://www.github.com/aws/aws-sdk-cpp
625+
pushd aws-sdk-cpp
626+
mkdir build
627+
cd build
628+
CFG="Release"
629+
PREFIX="c:/tools/aws-sdk-cpp"
630+
631+
FLAGS="-DCMAKE_INSTALL_PREFIX=${PREFIX} \
632+
-DCMAKE_INSTALL_LIBDIR=lib" \
633+
-DCMAKE_MODULE_PATH=${PREFIX}/cmake \
634+
-DCMAKE_POLICY_DEFAULT_CMP0075=NEW \
635+
-DBUILD_ONLY=s3 \
636+
-DENABLE_UNITY_BUILD=ON \
637+
-DCMAKE_BUILD_TYPE=$CFG \
638+
-DSIMPLE_INSTALL=ON"
639+
640+
rm -fr build
641+
mkdir -p build
642+
cd build
643+
cmake -DCMAKE_BUILD_TYPE=${CFG} $FLAGS ..
644+
cmake --build . --config ${CFG}
645+
cmake --install . --config ${CFG}
646+
cd ..
647+
popd
648+
````
649+
Notice that the sdk is being installed in the directory "c:\tools\aws-sdk-cpp"
650+
rather than the default location "c:\Program Files (x86)/aws-sdk-cpp-all"
651+
This is because when using a command line, an install path that contains
652+
blanks may not work.
653+
654+
### NetCDF CMake Build
655+
656+
Enabling S3 support is controlled by these two cmake options:
657+
````
658+
-DENABLE_NCZARR_S3=ON
659+
-DENABLE_NCZARR_S3_TESTS=OFF
660+
````
661+
662+
However, to find the aws sdk libraries,
663+
the following environment variables must be set:
664+
````
665+
AWSSDK_ROOT_DIR="c:/tools/aws-sdk-cpp"
666+
AWSSDKBIN="/cygdrive/c/tools/aws-sdk-cpp/bin"
667+
PATH="$PATH:${AWSSDKBIN}"
668+
````
669+
Then the following options must be specified for cmake.
670+
````
671+
-DAWSSDK_ROOT_DIR=${AWSSDK_ROOT_DIR}
672+
-DAWSSDK_DIR=${AWSSDK_ROOT_DIR}/lib/cmake/AWSSDK"
673+
````
613674
614675
# Appendix C. Amazon S3 Imposed Limits {#nczarr_s3limits}
615676

include/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ IF(ENABLE_PNETCDF OR ENABLE_PARALLEL4)
4141
COMPONENT headers)
4242
ENDIF()
4343

44+
#INSTALL(FILES ${netCDF_BINARY_DIR}/include/netcdf_json.h
45+
# DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
46+
# COMPONENT headers)
47+
4448
FILE(GLOB CUR_EXTRA_DIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
4549
SET(CUR_EXTRA_DIST ${CUR_EXTRA_DIST} Makefile.am CMakeLists.txt)
4650
ADD_EXTRA_DIST("${CUR_EXTRA_DIST}")

include/Makefile.am

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ nc4internal.h nctime.h nc3internal.h onstack.h ncrc.h ncauth.h \
2020
ncoffsets.h nctestserver.h nc4dispatch.h nc3dispatch.h ncexternl.h \
2121
ncpathmgr.h ncindex.h hdf4dispatch.h hdf5internal.h nc_provenance.h \
2222
hdf5dispatch.h ncmodel.h isnan.h nccrc.h ncexhash.h ncxcache.h \
23-
ncfilter.h ncjson.h ezxml.h
23+
ncfilter.h ncjson.h ezxml.h ncs3sdk.h
2424

2525
if USE_DAP
2626
noinst_HEADERS += ncdap.h
@@ -32,10 +32,10 @@ endif
3232

3333
EXTRA_DIST = CMakeLists.txt XGetopt.h netcdf_meta.h.in netcdf_dispatch.h.in
3434

35-
DISTCLEANFILES = netcdf_json.h
36-
37-
BUILT_SOURCES = netcdf_json.h
38-
netcdf_json.h: Makefile ${srcdir}/ncjson.h ${srcdir}/../libdispatch/ncjson.c
35+
# netcdf_json.h is part of the distribution.
36+
# If either of the files ncjson.h ../libdispatch/ncjson.c is changed
37+
# then netcdf_json.h should be reconstructed using this recipe.
38+
build_netcdf_json.h::
3939
sed -e 's/NCJSON_H/NETCDF_JSON_H/' -e '/ncjson.h/d' <${srcdir}/ncjson.h > $@
4040
sed -e '/ncjson.h/d' < ${srcdir}/../libdispatch/ncjson.c >> $@
4141

include/hdf5internal.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,12 @@ struct NCauth;
6060
/** Struct to hold HDF5-specific info for the file. */
6161
typedef struct NC_HDF5_FILE_INFO {
6262
hid_t hdfid;
63-
#if defined(ENABLE_BYTERANGE) || defined(ENABLE_HDF5_ROS3) || defined(ENABLE_S3_SDK)
64-
struct HTTP {
65-
NCURI* uri; /* Parse of the incoming path, if url */
66-
int iosp; /* We are using the S3 rawvirtual file driver */
67-
struct NCauth* auth;
68-
} http;
63+
#if defined(ENABLE_BYTERANGE)
64+
int byterange;
65+
NCURI* uri; /* Parse of the incoming path, if url */
66+
#if defined(ENABLE_HDF5_ROS3) || defined(ENABLE_S3_SDK)
67+
struct NCauth* auth;
68+
#endif
6969
#endif
7070
} NC_HDF5_FILE_INFO_T;
7171

include/nchttp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ typedef struct NC_HTTP_STATE {
3434

3535
extern int nc_http_init(NC_HTTP_STATE** state);
3636
extern int nc_http_init_verbose(NC_HTTP_STATE** state, int verbose);
37-
extern int nc_http_size(NC_HTTP_STATE* state, const char* url, long long* sizep);
37+
extern int nc_http_size(NC_HTTP_STATE* state, const char* url, long long unsigned* sizep);
3838
extern int nc_http_read(NC_HTTP_STATE* state, const char* url, size64_t start, size64_t count, NCbytes* buf);
3939
extern int nc_http_write(NC_HTTP_STATE* state, const char* url, NCbytes* payload);
4040
extern int nc_http_close(NC_HTTP_STATE* state);

include/ncrc.h

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@ struct AWSentry {
6565
char* value;
6666
};
6767

68+
typedef struct NCS3INFO {
69+
char* host; /* non-null if other*/
70+
char* region; /* region */
71+
char* bucket; /* bucket name */
72+
char* rootkey;
73+
char* profile;
74+
} NCS3INFO;
75+
6876
#if defined(__cplusplus)
6977
extern "C" {
7078
#endif
@@ -95,15 +103,22 @@ EXTERNL char* NC_entityescape(const char* s);
95103
EXTERNL int NC_readfile(const char* filename, NCbytes* content);
96104
EXTERNL int NC_writefile(const char* filename, size_t size, void* content);
97105
EXTERNL char* NC_mktmp(const char* base);
98-
EXTERNL int NC_getmodelist(const char* url, NClist** modelistp);
99-
EXTERNL int NC_testmode(const char* path, const char* tag);
106+
EXTERNL int NC_getmodelist(const char* modestr, NClist** modelistp);
107+
EXTERNL int NC_testmode(NCURI* uri, const char* tag);
108+
EXTERNL int NC_testpathmode(const char* path, const char* tag);
100109
EXTERNL int NC_split_delim(const char* path, char delim, NClist* segments);
110+
EXTERNL int NC_join(struct NClist* segments, char** pathp);
111+
112+
/* From ds3util.c */
113+
/* S3 profiles */
101114
EXTERNL int NC_s3urlrebuild(NCURI* url, NCURI** newurlp, char** bucketp, char** regionp);
102115
EXTERNL int NC_getactives3profile(NCURI* uri, const char** profilep);
103116
EXTERNL int NC_getdefaults3region(NCURI* uri, const char** regionp);
104-
/* S3 profiles */
105117
EXTERNL int NC_authgets3profile(const char* profile, struct AWSprofile** profilep);
106118
EXTERNL int NC_s3profilelookup(const char* profile, const char* key, const char** valuep);
119+
EXTERNL int NC_s3urlprocess(NCURI* url, NCS3INFO* s3);
120+
EXTERNL int NC_s3clear(NCS3INFO* s3);
121+
EXTERNL int NC_iss3(NCURI* uri);
107122

108123
#if defined(__cplusplus)
109124
}

0 commit comments

Comments
 (0)