Skip to content

Commit e1b5d98

Browse files
authored
Merge pull request #203 from gdcc/fix-form-data
Fix `jsonData` not passed correctly
2 parents 44ec54e + 484c366 commit e1b5d98

3 files changed

Lines changed: 153 additions & 77 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,6 @@ poetry.lock
170170

171171
# Ruff
172172
.ruff_cache/
173+
174+
# JetBrains
175+
.idea/

pyDataverse/api.py

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -170,21 +170,20 @@ def post_request(self, url, data=None, auth=False, params=None, files=None):
170170
if isinstance(data, str):
171171
data = json.loads(data)
172172

173+
# Decide whether to use 'data' or 'json' args
174+
request_params = self._check_json_data_form(data)
175+
173176
if self.client is None:
174177
return self._sync_request(
175-
method=httpx.post,
176-
url=url,
177-
json=data,
178-
params=params,
179-
files=files,
178+
method=httpx.post, url=url, params=params, files=files, **request_params
180179
)
181180
else:
182181
return self._async_request(
183182
method=self.client.post,
184183
url=url,
185-
json=data,
186184
params=params,
187185
files=files,
186+
**request_params,
188187
)
189188

190189
def put_request(self, url, data=None, auth=False, params=None):
@@ -216,19 +215,22 @@ def put_request(self, url, data=None, auth=False, params=None):
216215
if isinstance(data, str):
217216
data = json.loads(data)
218217

218+
# Decide whether to use 'data' or 'json' args
219+
request_params = self._check_json_data_form(data)
220+
219221
if self.client is None:
220222
return self._sync_request(
221223
method=httpx.put,
222224
url=url,
223-
json=data,
224225
params=params,
226+
**request_params,
225227
)
226228
else:
227229
return self._async_request(
228230
method=self.client.put,
229231
url=url,
230-
json=data,
231232
params=params,
233+
**request_params,
232234
)
233235

234236
def delete_request(self, url, auth=False, params=None):
@@ -268,6 +270,33 @@ def delete_request(self, url, auth=False, params=None):
268270
params=params,
269271
)
270272

273+
@staticmethod
274+
def _check_json_data_form(data: Optional[Dict]):
275+
"""This method checks and distributes given payload to match Dataverse expectations.
276+
277+
In the case of the form-data keyed by "jsonData", Dataverse expects
278+
the payload as a string in a form of a dictionary. This is not possible
279+
using HTTPXs json parameter, so we need to handle this case separately.
280+
"""
281+
282+
if not data:
283+
return {}
284+
elif not isinstance(data, dict):
285+
raise ValueError("Data must be a dictionary.")
286+
elif "jsonData" not in data:
287+
return {"json": data}
288+
289+
assert list(data.keys()) == [
290+
"jsonData"
291+
], "jsonData must be the only key in the dictionary."
292+
293+
# Content of JSON data should ideally be a string
294+
content = data["jsonData"]
295+
if not isinstance(content, str):
296+
data["jsonData"] = json.dumps(content)
297+
298+
return {"data": data}
299+
271300
def _sync_request(
272301
self,
273302
method,
@@ -1807,9 +1836,10 @@ def upload_datafile(self, identifier, filename, json_str=None, is_pid=True):
18071836
url += "/datasets/{0}/add".format(identifier)
18081837

18091838
files = {"file": open(filename, "rb")}
1810-
return self.post_request(
1811-
url, data={"jsonData": json_str}, files=files, auth=True
1812-
)
1839+
metadata = {}
1840+
if json_str is not None:
1841+
metadata["jsonData"] = json_str
1842+
return self.post_request(url, data=metadata, files=files, auth=True)
18131843

18141844
def update_datafile_metadata(self, identifier, json_str=None, is_filepid=False):
18151845
"""Update datafile metadata.

tests/api/test_upload.py

Lines changed: 109 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import httpx
66

7-
from pyDataverse.api import NativeApi
7+
from pyDataverse.api import DataAccessApi, NativeApi
88
from pyDataverse.models import Datafile
99

1010

@@ -45,6 +45,41 @@ def test_file_upload(self):
4545
# Assert
4646
assert response.status_code == 200, "File upload failed."
4747

48+
def test_file_upload_without_metadata(self):
49+
"""
50+
Test case for uploading a file to a dataset without metadata.
51+
52+
--> json_str will be set as None
53+
54+
This test case performs the following steps:
55+
1. Creates a dataset using the provided metadata.
56+
2. Prepares a file for upload.
57+
3. Uploads the file to the dataset.
58+
4. Asserts that the file upload was successful.
59+
60+
Raises:
61+
AssertionError: If the file upload fails.
62+
63+
"""
64+
# Arrange
65+
BASE_URL = os.getenv("BASE_URL").rstrip("/")
66+
API_TOKEN = os.getenv("API_TOKEN")
67+
68+
# Create dataset
69+
metadata = json.load(open("tests/data/file_upload_ds_minimum.json"))
70+
pid = self._create_dataset(BASE_URL, API_TOKEN, metadata)
71+
api = NativeApi(BASE_URL, API_TOKEN)
72+
73+
# Act
74+
response = api.upload_datafile(
75+
identifier=pid,
76+
filename="tests/data/datafile.txt",
77+
json_str=None,
78+
)
79+
80+
# Assert
81+
assert response.status_code == 200, "File upload failed."
82+
4883
def test_bulk_file_upload(self, create_mock_file):
4984
"""
5085
Test case for uploading bulk files to a dataset.
@@ -97,9 +132,66 @@ def test_bulk_file_upload(self, create_mock_file):
97132
# Assert
98133
assert response.status_code == 200, "File upload failed."
99134

100-
def test_file_replacement(self):
135+
def test_file_replacement_wo_metadata(self):
136+
"""
137+
Test case for replacing a file in a dataset without metadata.
138+
139+
Steps:
140+
1. Create a dataset using the provided metadata.
141+
2. Upload a datafile to the dataset.
142+
3. Replace the uploaded datafile with a mutated version.
143+
4. Verify that the file replacement was successful and the content matches the expected content.
144+
"""
145+
146+
# Arrange
147+
BASE_URL = os.getenv("BASE_URL").rstrip("/")
148+
API_TOKEN = os.getenv("API_TOKEN")
149+
150+
# Create dataset
151+
metadata = json.load(open("tests/data/file_upload_ds_minimum.json"))
152+
pid = self._create_dataset(BASE_URL, API_TOKEN, metadata)
153+
api = NativeApi(BASE_URL, API_TOKEN)
154+
data_api = DataAccessApi(BASE_URL, API_TOKEN)
155+
156+
# Perform file upload
157+
df = Datafile({"pid": pid, "filename": "datafile.txt"})
158+
response = api.upload_datafile(
159+
identifier=pid,
160+
filename="tests/data/replace.xyz",
161+
json_str=df.json(),
162+
)
163+
164+
# Retrieve file ID
165+
file_id = response.json()["data"]["files"][0]["dataFile"]["id"]
166+
167+
# Act
168+
with tempfile.TemporaryDirectory() as tempdir:
169+
original = open("tests/data/replace.xyz").read()
170+
mutated = "Z" + original[1::]
171+
mutated_path = os.path.join(tempdir, "replace.xyz")
172+
173+
with open(mutated_path, "w") as f:
174+
f.write(mutated)
175+
176+
json_data = {}
177+
178+
response = api.replace_datafile(
179+
identifier=file_id,
180+
filename=mutated_path,
181+
json_str=json.dumps(json_data),
182+
is_filepid=False,
183+
)
184+
185+
# Assert
186+
file_id = response.json()["data"]["files"][0]["dataFile"]["id"]
187+
content = data_api.get_datafile(file_id, is_pid=False).text
188+
189+
assert response.status_code == 200, "File replacement failed."
190+
assert content == mutated, "File content does not match the expected content."
191+
192+
def test_file_replacement_w_metadata(self):
101193
"""
102-
Test case for replacing a file in a dataset.
194+
Test case for replacing a file in a dataset with metadata.
103195
104196
Steps:
105197
1. Create a dataset using the provided metadata.
@@ -116,6 +208,7 @@ def test_file_replacement(self):
116208
metadata = json.load(open("tests/data/file_upload_ds_minimum.json"))
117209
pid = self._create_dataset(BASE_URL, API_TOKEN, metadata)
118210
api = NativeApi(BASE_URL, API_TOKEN)
211+
data_api = DataAccessApi(BASE_URL, API_TOKEN)
119212

120213
# Perform file upload
121214
df = Datafile({"pid": pid, "filename": "datafile.txt"})
@@ -126,7 +219,7 @@ def test_file_replacement(self):
126219
)
127220

128221
# Retrieve file ID
129-
file_id = self._get_file_id(BASE_URL, API_TOKEN, pid)
222+
file_id = response.json()["data"]["files"][0]["dataFile"]["id"]
130223

131224
# Act
132225
with tempfile.TemporaryDirectory() as tempdir:
@@ -141,6 +234,7 @@ def test_file_replacement(self):
141234
"description": "My description.",
142235
"categories": ["Data"],
143236
"forceReplace": False,
237+
"directoryLabel": "some/other",
144238
}
145239

146240
response = api.replace_datafile(
@@ -151,17 +245,19 @@ def test_file_replacement(self):
151245
)
152246

153247
# Assert
154-
replaced_id = self._get_file_id(BASE_URL, API_TOKEN, pid)
155-
replaced_content = self._fetch_datafile_content(
156-
BASE_URL,
157-
API_TOKEN,
158-
replaced_id,
159-
)
248+
file_id = response.json()["data"]["files"][0]["dataFile"]["id"]
249+
data_file = api.get_dataset(pid).json()["data"]["latestVersion"]["files"][0]
250+
content = data_api.get_datafile(file_id, is_pid=False).text
160251

161-
assert response.status_code == 200, "File replacement failed."
162252
assert (
163-
replaced_content == mutated
164-
), "File content does not match the expected content."
253+
data_file["description"] == "My description."
254+
), "Description does not match."
255+
assert data_file["categories"] == ["Data"], "Categories do not match."
256+
assert (
257+
data_file["directoryLabel"] == "some/other"
258+
), "Directory label does not match."
259+
assert response.status_code == 200, "File replacement failed."
260+
assert content == mutated, "File content does not match the expected content."
165261

166262
@staticmethod
167263
def _create_dataset(
@@ -193,56 +289,3 @@ def _create_dataset(
193289
response.raise_for_status()
194290

195291
return response.json()["data"]["persistentId"]
196-
197-
@staticmethod
198-
def _get_file_id(
199-
BASE_URL: str,
200-
API_TOKEN: str,
201-
pid: str,
202-
):
203-
"""
204-
Retrieves the file ID for a given persistent identifier (PID) in Dataverse.
205-
206-
Args:
207-
BASE_URL (str): The base URL of the Dataverse instance.
208-
API_TOKEN (str): The API token for authentication.
209-
pid (str): The persistent identifier (PID) of the dataset.
210-
211-
Returns:
212-
str: The file ID of the latest version of the dataset.
213-
214-
Raises:
215-
HTTPError: If the HTTP request to retrieve the file ID fails.
216-
"""
217-
response = httpx.get(
218-
url=f"{BASE_URL}/api/datasets/:persistentId/?persistentId={pid}",
219-
headers={"X-Dataverse-key": API_TOKEN},
220-
)
221-
222-
response.raise_for_status()
223-
224-
return response.json()["data"]["latestVersion"]["files"][0]["dataFile"]["id"]
225-
226-
@staticmethod
227-
def _fetch_datafile_content(
228-
BASE_URL: str,
229-
API_TOKEN: str,
230-
id: str,
231-
):
232-
"""
233-
Fetches the content of a datafile from the specified BASE_URL using the provided API_TOKEN.
234-
235-
Args:
236-
BASE_URL (str): The base URL of the Dataverse instance.
237-
API_TOKEN (str): The API token for authentication.
238-
id (str): The ID of the datafile.
239-
240-
Returns:
241-
str: The content of the datafile as a decoded UTF-8 string.
242-
"""
243-
url = f"{BASE_URL}/api/access/datafile/{id}"
244-
headers = {"X-Dataverse-key": API_TOKEN}
245-
response = httpx.get(url, headers=headers)
246-
response.raise_for_status()
247-
248-
return response.content.decode("utf-8")

0 commit comments

Comments
 (0)