1313
1414from dvuploader .file import File
1515from dvuploader .packaging import distribute_files , zip_files
16- from dvuploader .utils import build_url , retrieve_dataset_files
16+ from dvuploader .utils import (
17+ build_url ,
18+ init_logging ,
19+ retrieve_dataset_files ,
20+ wait_for_dataset_unlock ,
21+ )
1722
1823##### CONFIGURATION #####
1924
2227#
2328# This will exponentially increase the wait time between retries.
2429# The max wait time is 240 seconds per retry though.
30+ LOCK_WAIT_TIME = int (os .environ .get ("DVUPLOADER_LOCK_WAIT_TIME" , 1.5 ))
31+ LOCK_TIMEOUT = int (os .environ .get ("DVUPLOADER_LOCK_TIMEOUT" , 300 ))
2532MAX_RETRIES = int (os .environ .get ("DVUPLOADER_MAX_RETRIES" , 15 ))
2633MAX_RETRY_TIME = int (os .environ .get ("DVUPLOADER_MAX_RETRY_TIME" , 60 ))
2734MIN_RETRY_TIME = int (os .environ .get ("DVUPLOADER_MIN_RETRY_TIME" , 1 ))
3239 max = MAX_RETRY_TIME ,
3340)
3441
42+
43+ assert isinstance (LOCK_WAIT_TIME , int ), "DVUPLOADER_LOCK_WAIT_TIME must be an integer"
44+ assert isinstance (LOCK_TIMEOUT , int ), "DVUPLOADER_LOCK_TIMEOUT must be an integer"
3545assert isinstance (MAX_RETRIES , int ), "DVUPLOADER_MAX_RETRIES must be an integer"
3646assert isinstance (MAX_RETRY_TIME , int ), "DVUPLOADER_MAX_RETRY_TIME must be an integer"
3747assert isinstance (MIN_RETRY_TIME , int ), "DVUPLOADER_MIN_RETRY_TIME must be an integer"
@@ -85,6 +95,9 @@ def __getattr__(self, name):
8595 return getattr (self ._file , name )
8696
8797
98+ init_logging ()
99+
100+
88101async def native_upload (
89102 files : List [File ],
90103 dataverse_url : str ,
@@ -116,7 +129,12 @@ async def native_upload(
116129 session_params = {
117130 "base_url" : dataverse_url ,
118131 "headers" : {"X-Dataverse-key" : api_token },
119- "timeout" : None ,
132+ "timeout" : httpx .Timeout (
133+ None ,
134+ read = None ,
135+ write = None ,
136+ connect = None ,
137+ ),
120138 "limits" : httpx .Limits (max_connections = n_parallel_uploads ),
121139 "proxy" : proxy ,
122140 }
@@ -295,6 +313,14 @@ async def _single_native_upload(
295313 - dict: JSON response from the upload request
296314 """
297315
316+ # Check if the dataset is locked
317+ await wait_for_dataset_unlock (
318+ session = session ,
319+ persistent_id = persistent_id ,
320+ sleep_time = LOCK_WAIT_TIME ,
321+ timeout = LOCK_TIMEOUT ,
322+ )
323+
298324 if not file .to_replace :
299325 endpoint = build_url (
300326 endpoint = NATIVE_UPLOAD_ENDPOINT ,
@@ -306,13 +332,14 @@ async def _single_native_upload(
306332 )
307333
308334 json_data = _get_json_data (file )
335+ handler = file .get_handler ()
309336
310- assert file . handler is not None , "File handler is required for native upload"
337+ assert handler is not None , "File handler is required for native upload"
311338
312339 files = {
313340 "file" : (
314341 file .file_name ,
315- _ProgressFileWrapper (file . handler , progress , pbar ), # type: ignore[arg-type]
342+ _ProgressFileWrapper (handler , progress , pbar ), # type: ignore[arg-type]
316343 file .mimeType ,
317344 ),
318345 "jsonData" : (
@@ -442,6 +469,7 @@ async def _update_metadata(
442469 session = session ,
443470 url = NATIVE_METADATA_ENDPOINT .format (FILE_ID = file_id ),
444471 file = file ,
472+ persistent_id = persistent_id ,
445473 )
446474
447475 tasks .append (task )
@@ -457,6 +485,7 @@ async def _update_single_metadata(
457485 session : httpx .AsyncClient ,
458486 url : str ,
459487 file : File ,
488+ persistent_id : str ,
460489) -> None :
461490 """
462491 Updates the metadata of a single file in a Dataverse repository.
@@ -470,6 +499,13 @@ async def _update_single_metadata(
470499 ValueError: If metadata update fails.
471500 """
472501
502+ await wait_for_dataset_unlock (
503+ session = session ,
504+ persistent_id = persistent_id ,
505+ sleep_time = LOCK_WAIT_TIME ,
506+ timeout = LOCK_TIMEOUT ,
507+ )
508+
473509 json_data = _get_json_data (file )
474510
475511 # Send metadata as a readable byte stream
@@ -490,7 +526,16 @@ async def _update_single_metadata(
490526 else :
491527 await asyncio .sleep (1.0 )
492528
493- raise ValueError (f"Failed to update metadata for file { file .file_name } ." )
529+ if "message" in response .json ():
530+ # If the response is a JSON object, we can get the error message from the "message" key.
531+ error_message = response .json ()["message" ]
532+ else :
533+ # If the response is not a JSON object, we can get the error message from the response text.
534+ error_message = response .text
535+
536+ raise ValueError (
537+ f"Failed to update metadata for file { file .file_name } : { error_message } "
538+ )
494539
495540
496541def _retrieve_file_ids (
0 commit comments