I'm testing updating Fedora to netcdf 4.9.1 and I'm seeing a new failure when running the tests for the octave-netcdf package. This might be tricky to track down, but I'm not seeing the failure with the current netcdf 4.9.0 package. HDF5 is remaining constant at 1.12.1.
(gdb) bt
#0 0x00007fffcee93eb0 in ?? ()
#1 0x00007ffff5522b63 in H5FD__free_cls (cls=0x555555f09810) at ../../src/H5FD.c:188
#2 0x00007ffff557e57a in H5I__mark_node (key=0x0, _udata=<synthetic pointer>, _info=0x5555555a6c60) at ../../src/H5Iint.c:393
#3 H5I_clear_type (type=<optimized out>, force=false, app_ref=<optimized out>) at ../../src/H5Iint.c:339
#4 0x00007ffff5522af8 in H5FD_term_package () at ../../src/H5FD.c:147
#5 0x00007ffff5466e54 in H5_term_library () at ../../src/H5.c:377
#6 0x00007ffff546771d in H5_term_library () at ../../src/H5.c:460
#7 0x00007ffff546772d in H5close () at ../../src/H5.c:989
#8 0x00007ffff79077dd in octave::load_save_system::~load_save_system (this=<optimized out>, this=<optimized out>) at libinterp/corefcn/load-save.cc:274
#9 0x00007ffff78f75b5 in octave::interpreter::~interpreter (this=<optimized out>, this=<optimized out>) at libinterp/corefcn/interpreter.cc:661
#10 0x00007ffff703ae47 in std::default_delete<octave::interpreter>::operator() (this=<optimized out>, __ptr=0x5555555c62f0) at /usr/include/c++/12/bits/unique_ptr.h:95
#11 std::default_delete<octave::interpreter>::operator() (__ptr=0x5555555c62f0, this=<optimized out>) at /usr/include/c++/12/bits/unique_ptr.h:89
#12 std::unique_ptr<octave::interpreter, std::default_delete<octave::interpreter> >::~unique_ptr (this=<optimized out>, this=<optimized out>) at /usr/include/c++/12/bits/unique_ptr.h:396
#13 octave::application::~application (this=<optimized out>, this=<optimized out>) at libinterp/octave.cc:296
#14 0x0000555555556590 in octave::cli_application::~cli_application (this=<optimized out>, this=<optimized out>) at libinterp/octave.h:377
#15 main (argc=<optimized out>, argv=<optimized out>) at src/main-cli.cc:122
(gdb) up
#1 0x00007ffff5522b63 in H5FD__free_cls (cls=0x555555f09810) at ../../src/H5FD.c:188
188 if (cls->terminate && cls->terminate() < 0)
(gdb) list
183
184 /* If the file driver has a terminate callback, call it to give the file
185 * driver a chance to free singletons or other resources which will become
186 * invalid once the class structure is freed.
187 */
188 if (cls->terminate && cls->terminate() < 0)
189 HGOTO_ERROR(H5E_VFL, H5E_CANTCLOSEOBJ, FAIL, "virtual file driver '%s' did not terminate cleanly",
190 cls->name)
191
192 H5MM_xfree(cls);
(gdb) print cls
$1 = (H5FD_class_t *) 0x555555f09810
(gdb) print *cls
$2 = {name = 0x7fffcef4c0d0 <error: Cannot access memory at address 0x7fffcef4c0d0>, maxaddr = 9223372036854775807, fc_degree = H5F_CLOSE_WEAK, terminate = 0x7fffcee93eb0, sb_size = 0x0, sb_encode = 0x0, sb_decode = 0x0,
fapl_size = 0, fapl_get = 0x0, fapl_copy = 0x0, fapl_free = 0x0, dxpl_size = 0, dxpl_copy = 0x0, dxpl_free = 0x0, open = 0x7fffcee97280, close = 0x7fffcee97230, cmp = 0x7fffcee94170, query = 0x7fffcee93ed0, get_type_map = 0x0,
alloc = 0x7fffcee93f40, free = 0x0, get_eoa = 0x7fffcee93f70, set_eoa = 0x7fffcee93fa0, get_eof = 0x7fffcee93fd0, get_handle = 0x7fffcee94060, read = 0x7fffcee97610, write = 0x7fffcee940f0, flush = 0x7fffcee94000, truncate = 0x0,
lock = 0x7fffcee94020, unlock = 0x7fffcee94040, fl_map = {H5FD_MEM_SUPER, H5FD_MEM_SUPER, H5FD_MEM_SUPER, H5FD_MEM_DRAW, H5FD_MEM_DRAW, H5FD_MEM_SUPER, H5FD_MEM_SUPER}}
(gdb) print *cls->terminate
Cannot access memory at address 0x7fffcee93eb0
(gdb) print cls->terminate
$3 = (herr_t (*)(void)) 0x7fffcee93eb0
(gdb) up
#3 H5I_clear_type (type=<optimized out>, force=false, app_ref=<optimized out>) at ../../src/H5Iint.c:339
339 if (H5I__mark_node((void *)item, NULL, (void *)&udata) < 0)
(gdb) print *item
$6 = {id = 576460752303423489, count = 1, app_count = 1, object = 0x555555e33f50, marked = false, hh = {tbl = 0x55555558ebf0, prev = 0x55555558eb90, next = 0x0, hh_prev = 0x0, hh_next = 0x0, key = 0x5555555a6c60, keylen = 8,
hashv = 3217010591}}
==166== Jump to the invalid address stated on the next line
==166== at 0x32D48EB0: ???
==166== by 0x6D5F579: UnknownInlinedFun (H5Iint.c:393)
==166== by 0x6D5F579: H5I_clear_type (H5Iint.c:339)
==166== by 0x6D03AF7: H5FD_term_package (H5FD.c:147)
==166== by 0x6C47E53: H5_term_library.part.0 (H5.c:377)
==166== by 0x6C4872C: H5close (H5.c:989)
==166== by 0x55597DC: octave::load_save_system::~load_save_system() (load-save.cc:274)
==166== by 0x55495B4: octave::interpreter::~interpreter() (interpreter.cc:661)
==166== by 0x4C8CE46: UnknownInlinedFun (unique_ptr.h:95)
==166== by 0x4C8CE46: UnknownInlinedFun (unique_ptr.h:89)
==166== by 0x4C8CE46: UnknownInlinedFun (unique_ptr.h:396)
==166== by 0x4C8CE46: octave::application::~application() (octave.cc:296)
==166== by 0x10A58F: UnknownInlinedFun (octave.h:377)
==166== by 0x10A58F: main (main-cli.cc:122)
==166== Address 0x32d48eb0 is not stack'd, malloc'd or (recently) free'd
==166==
fatal: caught signal Segmentation fault -- stopping myself...
==166==
==166== Process terminating with default action of signal 11 (SIGSEGV)
==166== at 0x6A8EB94: __pthread_kill_implementation (in /usr/lib64/libc.so.6)
==166== by 0x6A3DAED: raise (in /usr/lib64/libc.so.6)
==166== by 0x6A3DB9F: ??? (in /usr/lib64/libc.so.6)
==166== by 0x32D48EAF: ???
==166== by 0x6D5F579: UnknownInlinedFun (H5Iint.c:393)
==166== by 0x6D5F579: H5I_clear_type (H5Iint.c:339)
==166== by 0x6D03AF7: H5FD_term_package (H5FD.c:147)
==166== by 0x6C47E53: H5_term_library.part.0 (H5.c:377)
==166== by 0x6C4872C: H5close (H5.c:989)
==166== by 0x55597DC: octave::load_save_system::~load_save_system() (load-save.cc:274)
==166== by 0x55495B4: octave::interpreter::~interpreter() (interpreter.cc:661)
==166== by 0x4C8CE46: UnknownInlinedFun (unique_ptr.h:95)
==166== by 0x4C8CE46: UnknownInlinedFun (unique_ptr.h:89)
==166== by 0x4C8CE46: UnknownInlinedFun (unique_ptr.h:396)
==166== by 0x4C8CE46: octave::application::~application() (octave.cc:296)
==166== by 0x10A58F: UnknownInlinedFun (octave.h:377)
==166== by 0x10A58F: main (main-cli.cc:122)
There are quite a lot of iterations through the loop in H5I_clear_type before the segfault. I have no idea what other information would be useful for tracking this down.
I'm testing updating Fedora to netcdf 4.9.1 and I'm seeing a new failure when running the tests for the octave-netcdf package. This might be tricky to track down, but I'm not seeing the failure with the current netcdf 4.9.0 package. HDF5 is remaining constant at 1.12.1.
The segfault occurs when exiting:
valgrind:
There are quite a lot of iterations through the loop in H5I_clear_type before the segfault. I have no idea what other information would be useful for tracking this down.