Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ target/

# build artifacts
version.txt
.claude/settings.local.json
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ IMAGE="$(REPO)/$(REPO_PATH):$(VERSION)"
docker: version
docker build --build-arg VERSION=$(VERSION) -t $(IMAGE) .

.PHONY: devshell # Open a developer shell in the docker env
devshell: docker
.PHONY: dev # Open a developer shell in the docker env
dev: docker
docker run --rm -it -v $$PWD:/opt --entrypoint /bin/bash $(IMAGE)

test-client: docker
Expand Down
49 changes: 48 additions & 1 deletion api/datalake_api/v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from flask import current_app as app
import os
import simplejson as json
from datetime import datetime, timezone
import decimal
from .querier import ArchiveQuerier, Cursor, InvalidCursor, \
DEFAULT_LOOKBACK_DAYS
from .fetcher import ArchiveFileFetcher
Expand All @@ -29,6 +31,38 @@

_archive_querier = None


def unix_ms_to_utc_iso(unix_ms):
if unix_ms is None:
return unix_ms
unix_ms_to_iso = unix_ms
if isinstance(unix_ms_to_iso, decimal.Decimal):
unix_ms_to_iso = float(unix_ms_to_iso)
iso = datetime.fromtimestamp(
unix_ms_to_iso / 1000.0, tz=timezone.utc
).isoformat(timespec='milliseconds').replace('+00:00', 'Z')
return iso


def add_utc_metadata(metadata):
"""Add ISO-8601 UTC timestamp fields to metadata dict

This function takes a metadata dict and adds start_iso and end_iso fields
based on existing start and end epoch timestamps
iso precision is set to milliseconds
Can be expanded to add any api-level metadata
"""
if not metadata:
return metadata

start_iso = unix_ms_to_utc_iso(metadata['start'])
end_iso = unix_ms_to_utc_iso(metadata['end'])

metadata['start_iso'] = start_iso
metadata['end_iso'] = end_iso
return metadata


def _get_aws_kwargs():
kwargs = dict(
region_name=app.config.get('AWS_REGION'),
Expand Down Expand Up @@ -305,6 +339,14 @@ def files_get():
type: string
description: 16-byte blake2 hash of the file
content
start_iso:
type: string
description: the start time of the file in ISO
format UTC iso timezone
end_iso:
type: string
description: the end time of the file in ISO
format UTC iso timezone

next:
type: string
Expand Down Expand Up @@ -349,7 +391,10 @@ def files_get():
where=params.get('where'),
cursor=params.get('cursor'))

[r.update(http_url=_get_canonical_http_url(r)) for r in results]
for r in results:
r.update(http_url=_get_canonical_http_url(r))
r['metadata'] = add_utc_metadata(r['metadata'])

response = {
'records': results,
'next': _get_next_url(flask.request, results),
Expand Down Expand Up @@ -476,6 +521,7 @@ def file_get_metadata(file_id):
id: DatalakeAPIError
'''
f = _get_file(file_id)
f.metadata = add_utc_metadata(f.metadata)
return Response(json.dumps(f.metadata), content_type='application/json')


Expand Down Expand Up @@ -542,6 +588,7 @@ def latest_get(what, where):
params = _validate_latest_params(params)
f = _get_latest(what, where, params.get('lookback', DEFAULT_LOOKBACK_DAYS))
f.update(http_url=_get_canonical_http_url(f))
f['metadata'] = add_utc_metadata(f['metadata'])
return Response(json.dumps(f), content_type='application/json')


Expand Down
2 changes: 1 addition & 1 deletion api/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_version_from_pyver():
if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
raise ImportError('You must install pyver to create a package')
else:
return 'noversion'
return '0.0.0'
version, version_info = pyver.get_version(pkg="datalake_api",
public=True)
return version
Expand Down
17 changes: 16 additions & 1 deletion api/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
from datetime import datetime, timezone
from decimal import Decimal
import pytest
import simplejson as json

Expand All @@ -32,7 +34,20 @@ def test_get_metadata(metadata_getter, s3_file_maker, random_metadata):
res = metadata_getter('12345')
assert res.status_code == 200
assert res.content_type == 'application/json'
assert json.loads(res.data) == random_metadata
res_data = json.loads(res.data)
for k, v in res_data.items():
if k == 'start_iso' or k == 'end_iso':
k_epoch = k.replace('_iso','')
v_epoch = res_data[k_epoch]
if v is None:
assert v == v_epoch

expected_v_iso = datetime.fromtimestamp(
v_epoch / 1000.0, tz=timezone.utc
).isoformat(timespec='milliseconds').replace('+00:00', 'Z')
assert v == expected_v_iso
else:
assert v == random_metadata[k]


def test_no_such_metadata(s3_bucket_maker, metadata_getter):
Expand Down
2 changes: 1 addition & 1 deletion ingester/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_version_from_pyver():
if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
raise ImportError('You must install pyver to create a package')
else:
return 'noversion'
return '0.0.0'
version, version_info = pyver.get_version(pkg="datalake_ingester",
public=True)
return version
Expand Down
Loading