Skip to content

Commit 90209a6

Browse files
authored
Merge pull request #1121 from ethho/dev-tests-plat-158-filepath
PLAT-158: Migrate test_filepath.py
2 parents ead5896 + de128a0 commit 90209a6

1 file changed

Lines changed: 265 additions & 0 deletions

File tree

tests/test_filepath.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
import pytest
2+
import datajoint as dj
3+
import os
4+
from pathlib import Path
5+
import random
6+
from .schema_external import Filepath, FilepathS3
7+
import logging
8+
import io
9+
10+
11+
def test_path_match(schema_ext, enable_filepath_feature, minio_client, store="repo"):
12+
"""test file path matches and empty file"""
13+
ext = schema_ext.external[store]
14+
stage_path = dj.config["stores"][store]["stage"]
15+
16+
# create a mock file
17+
relpath = "path/to/films"
18+
managed_file = Path(stage_path, relpath, "vid.mov")
19+
managed_file.parent.mkdir(parents=True, exist_ok=True)
20+
open(str(managed_file), "a").close()
21+
22+
# put the file
23+
uuid = ext.upload_filepath(str(managed_file))
24+
25+
# remove
26+
managed_file.unlink()
27+
assert not managed_file.exists()
28+
29+
# check filepath
30+
assert (ext & {"hash": uuid}).fetch1("filepath") == str(
31+
managed_file.relative_to(stage_path).as_posix()
32+
)
33+
34+
# # Download the file and check its contents.
35+
restored_path, checksum = ext.download_filepath(uuid)
36+
assert restored_path == str(managed_file)
37+
assert checksum == dj.hash.uuid_from_file(str(managed_file))
38+
39+
# cleanup
40+
ext.delete(delete_external_files=True)
41+
42+
43+
@pytest.mark.parametrize("store", ("repo", "repo-s3"))
44+
def test_filepath(enable_filepath_feature, schema_ext, store):
45+
"""test file management"""
46+
ext = schema_ext.external[store]
47+
stage_path = dj.config["stores"][store]["stage"]
48+
filename = "picture.dat"
49+
50+
# create a mock file
51+
relpath = "one/two/three"
52+
managed_file = Path(stage_path, relpath, filename)
53+
managed_file.parent.mkdir(parents=True, exist_ok=True)
54+
data = os.urandom(3000)
55+
with managed_file.open("wb") as f:
56+
f.write(data)
57+
58+
# put the same file twice to ensure storing once
59+
uuid1 = ext.upload_filepath(str(managed_file))
60+
# no duplication should arise if file is the same
61+
uuid2 = ext.upload_filepath(str(managed_file))
62+
assert uuid1 == uuid2
63+
64+
# remove to ensure downloading
65+
managed_file.unlink()
66+
assert not managed_file.exists()
67+
68+
# Download the file and check its contents. Repeat causes no download from remote
69+
for _ in 1, 2:
70+
restored_path, checksum = ext.download_filepath(uuid1)
71+
assert restored_path == str(managed_file)
72+
assert checksum == dj.hash.uuid_from_file(str(managed_file))
73+
74+
# verify same data
75+
with managed_file.open("rb") as f:
76+
synced_data = f.read()
77+
assert data == synced_data
78+
79+
# cleanup
80+
ext.delete(delete_external_files=True)
81+
assert not ext.exists(ext._make_external_filepath(str(Path(relpath, filename))))
82+
83+
84+
@pytest.mark.parametrize("store", ("repo", "repo-s3"))
85+
def test_duplicate_upload(schema_ext, store):
86+
ext = schema_ext.external[store]
87+
stage_path = dj.config["stores"][store]["stage"]
88+
relpath = "one/two/three"
89+
managed_file = Path(stage_path, relpath, "plot.dat")
90+
managed_file.parent.mkdir(parents=True, exist_ok=True)
91+
with managed_file.open("wb") as f:
92+
f.write(os.urandom(300))
93+
ext.upload_filepath(str(managed_file))
94+
ext.upload_filepath(str(managed_file)) # this is fine because the file is the same
95+
96+
97+
@pytest.mark.parametrize("store", ("repo", "repo-s3"))
98+
def test_duplicate_error(schema_ext, store):
99+
"""syncing duplicate non-matching file should fail"""
100+
ext = schema_ext.external[store]
101+
stage_path = dj.config["stores"][store]["stage"]
102+
relpath = "one/two/three"
103+
managed_file = Path(stage_path, relpath, "thesis.dat")
104+
managed_file.parent.mkdir(parents=True, exist_ok=True)
105+
with managed_file.open("wb") as f:
106+
f.write(os.urandom(300))
107+
ext.upload_filepath(str(managed_file))
108+
with managed_file.open("wb") as f:
109+
f.write(os.urandom(300))
110+
# this should raise exception because the file has changed
111+
with pytest.raises(dj.DataJointError):
112+
ext.upload_filepath(str(managed_file))
113+
114+
115+
class TestFilepath:
116+
def _test_filepath_class(
117+
self, table=Filepath(), store="repo", verify_checksum=True
118+
):
119+
if not verify_checksum:
120+
dj.config["filepath_checksum_size_limit"] = 0
121+
stage_path = dj.config["stores"][store]["stage"]
122+
# create a mock file
123+
relative_path = "one/two/three"
124+
managed_file = Path(stage_path, relative_path, "attachment.dat")
125+
managed_file.parent.mkdir(parents=True, exist_ok=True)
126+
data = os.urandom(3000)
127+
with managed_file.open("wb") as f:
128+
f.write(data)
129+
with managed_file.open("rb") as f:
130+
contents = f.read()
131+
assert data == contents
132+
133+
# upload file into shared repo
134+
table.insert1((1, str(managed_file)))
135+
136+
# remove file locally
137+
managed_file.unlink()
138+
assert not managed_file.is_file()
139+
140+
# fetch file from remote
141+
filepath = (table & {"fnum": 1}).fetch1("img")
142+
assert filepath == str(managed_file)
143+
144+
# verify original contents
145+
with managed_file.open("rb") as f:
146+
contents = f.read()
147+
assert data == contents
148+
149+
# delete from table
150+
table.delete()
151+
assert table.external[store]
152+
153+
# delete from external table
154+
table.external[store].delete(delete_external_files=True)
155+
dj.config["filepath_checksum_size_limit"] = None
156+
157+
@pytest.mark.parametrize(
158+
"table, store, n_repeats",
159+
(
160+
(Filepath(), "repo", 2),
161+
(FilepathS3(), "repo-s3", 2),
162+
),
163+
)
164+
def test_filepath_class(
165+
self,
166+
schema_ext,
167+
table,
168+
store,
169+
n_repeats,
170+
minio_client,
171+
enable_filepath_feature,
172+
verify_checksum=True,
173+
):
174+
for _ in range(n_repeats):
175+
self._test_filepath_class(table, store, verify_checksum)
176+
177+
def test_filepath_class_no_checksum(self, schema_ext, enable_filepath_feature):
178+
logger = logging.getLogger("datajoint")
179+
log_capture = io.StringIO()
180+
stream_handler = logging.StreamHandler(log_capture)
181+
log_format = logging.Formatter(
182+
"[%(asctime)s][%(funcName)s][%(levelname)s]: %(message)s"
183+
)
184+
stream_handler.setFormatter(log_format)
185+
stream_handler.set_name("test_limit_warning")
186+
logger.addHandler(stream_handler)
187+
self._test_filepath_class(table=Filepath(), store="repo", verify_checksum=False)
188+
log_contents = log_capture.getvalue()
189+
log_capture.close()
190+
for handler in logger.handlers: # Clean up handler
191+
if handler.name == "test_limit_warning":
192+
logger.removeHandler(handler)
193+
assert "Skipped checksum for file with hash:" in log_contents
194+
195+
196+
@pytest.mark.parametrize(
197+
"table, store",
198+
(
199+
(Filepath(), "repo"),
200+
(FilepathS3(), "repo-s3"),
201+
),
202+
)
203+
def test_filepath_cleanup(table, store, schema_ext, enable_filepath_feature):
204+
"""test deletion of filepath entries from external table"""
205+
stage_path = dj.config["stores"][store]["stage"]
206+
n = 20
207+
contents = os.urandom(345)
208+
for i in range(n):
209+
relative_path = Path(*random.sample(("one", "two", "three", "four"), k=3))
210+
managed_file = Path(stage_path, relative_path, "file.dat")
211+
managed_file.parent.mkdir(parents=True, exist_ok=True)
212+
with managed_file.open("wb") as f:
213+
f.write(contents) # same in all files
214+
table.insert1((i, str(managed_file)))
215+
assert len(table) == n
216+
217+
ext = schema_ext.external[store]
218+
219+
assert len(table) == n
220+
assert 0 < len(ext) < n
221+
222+
(table & "fnum in (1, 2, 3, 4, 5, 6)").delete()
223+
m = n - len(table) # number deleted
224+
assert m == 6
225+
226+
ext.delete(delete_external_files=True) # delete unused entries
227+
assert 0 < len(ext) <= n - m
228+
229+
230+
def test_delete_without_files(
231+
schema_ext,
232+
enable_filepath_feature,
233+
store="repo",
234+
):
235+
"""test deletion of filepath entries from external table without removing files"""
236+
# do not delete unused entries
237+
schema_ext.external[store].delete(delete_external_files=False)
238+
239+
240+
def test_return_string(
241+
schema_ext, enable_filepath_feature, table=Filepath(), store="repo"
242+
):
243+
"""test returning string on fetch"""
244+
stage_path = dj.config["stores"][store]["stage"]
245+
# create a mock file
246+
relative_path = "this/is/a/test"
247+
managed_file = Path(stage_path, relative_path, "string.dat")
248+
managed_file.parent.mkdir(parents=True, exist_ok=True)
249+
data = os.urandom(3000)
250+
with managed_file.open("wb") as f:
251+
f.write(data)
252+
with managed_file.open("rb") as f:
253+
contents = f.read()
254+
assert data == contents
255+
256+
# upload file into shared repo
257+
table.insert1((138, str(managed_file)))
258+
259+
# remove file locally
260+
managed_file.unlink()
261+
assert not managed_file.is_file()
262+
263+
# fetch file from remote
264+
filepath = (table & {"fnum": 138}).fetch1("img")
265+
assert isinstance(filepath, str)

0 commit comments

Comments
 (0)