When I try to create "flan2021_submix", the process fails with a wrong checksum on the AESLC dataset.
Downloading and preparing dataset 11.10 MiB (download: 11.10 MiB, generated: Unknown size, total: 11.10 MiB) to /home/one/tensorflow_datasets/aeslc/1.0.0...
Extraction completed...: 0 file [00:05, ? file/s]█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00, 5.05s/ url]
Dl Size...: 11 MiB [00:05, 2.17 MiB/s]
Dl Completed...: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:05<00:00, 5.06s/ url]
ERROR:absl:Failed to load task 'aeslc_template_0to10_no_opt_x_shot' as part of mixture 'flan2021_submix'
Traceback (most recent call last):
File "/home/one/anaconda3/envs/flan/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/one/anaconda3/envs/flan/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/mnt/datadrive/Datasets/LLM/FLAN/flan/v2/run_example.py", line 100, in <module>
dataset = selected_mixture.get_dataset(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/seqio/dataset_providers.py", line 1805, in get_dataset
ds = task.get_dataset(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/seqio/dataset_providers.py", line 1443, in get_dataset
ds = source.get_dataset(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/seqio/experimental.py", line 370, in get_dataset
train_ds = _get_maybe_sharded_dataset(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/seqio/experimental.py", line 333, in _get_maybe_sharded_dataset
ds = self._original_source.get_dataset(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/seqio/dataset_providers.py", line 496, in get_dataset
return self.tfds_dataset.load(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/seqio/utils.py", line 182, in load
return tfds.load(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/logging/__init__.py", line 169, in __call__
return function(*args, **kwargs)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/load.py", line 640, in load
_download_and_prepare_builder(dbuilder, download, download_and_prepare_kwargs)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/load.py", line 499, in _download_and_prepare_builder
dbuilder.download_and_prepare(**download_and_prepare_kwargs)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/logging/__init__.py", line 169, in __call__
return function(*args, **kwargs)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/dataset_builder.py", line 646, in download_and_prepare
self._download_and_prepare(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/dataset_builder.py", line 1498, in _download_and_prepare
split_generators = self._split_generators( # pylint: disable=unexpected-keyword-arg
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/datasets/aeslc/aeslc_dataset_builder.py", line 46, in _split_generators
dl_path = dl_manager.download_and_extract(_URL)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py", line 687, in download_and_extract
return _map_promise(self._download_extract, url_or_urls)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py", line 830, in _map_promise
res = tree_utils.map_structure(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tree/__init__.py", line 435, in map_structure
[func(*args) for args in zip(*map(flatten, structures))])
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tree/__init__.py", line 435, in <listcomp>
[func(*args) for args in zip(*map(flatten, structures))])
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py", line 831, in <lambda>
lambda p: p.get(), all_promises
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/promise/promise.py", line 512, in get
return self._target_settled_value(_raise=True)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/promise/promise.py", line 516, in _target_settled_value
return self._target()._settled_value(_raise)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/promise/promise.py", line 226, in _settled_value
reraise(type(raise_val), raise_val, self._traceback)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/six.py", line 719, in reraise
raise value
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/promise/promise.py", line 87, in try_catch
return (handler(*args, **kwargs), None)
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py", line 407, in <lambda>
lambda dl_result: self._register_or_validate_checksums( # pylint: disable=g-long-lambda
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py", line 464, in _register_or_validate_checksums
_validate_checksums(
File "/home/one/anaconda3/envs/flan/lib/python3.10/site-packages/tensorflow_datasets/core/download/download_manager.py", line 808, in _validate_checksums
raise NonMatchingChecksumError(msg)
tensorflow_datasets.core.download.download_manager.NonMatchingChecksumError: Artifact https://github.com/ryanzhumich/AESLC/archive/master.zip, downloaded to /home/one/tensorflow_datasets/downloads/ryanzhumich_AESLC_archive_masterACSpoxw627Ay4UrkswMeyz6RrOey8kKfkhEM4VySJWU.zip.tmp.30a54832017f490096e6110966e756b4/AESLC-master.zip, has wrong checksum:
* Expected: UrlInfo(size=11.10 MiB, checksum='b5ea2ffb837c5cfb9b033d62b3940a8a2330a9eb69bd2a39a9f55db6a23a40a4', filename='AESLC-master.zip')
* Got: UrlInfo(size=11.11 MiB, checksum='cd042fa034790609468250518d44060017df153c382ae888091a37d5fa320032', filename='AESLC-master.zip')
To debug, see: https://www.tensorflow.org/datasets/overview#fixing_nonmatchingchecksumerror
When I try to create "flan2021_submix", the process fails with a wrong checksum on the AESLC dataset.