Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions .github/workflows/actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ on: [push, pull_request]

jobs:
test-client:
runs-on: ubuntu-20.04
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
matrix:
python: [3.8, 3.9, "3.10", 3.12]
extras: ["test", "test,queuable,sentry"]
steps:
- name: Setup Python
uses: actions/setup-python@v2.2.2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
- name: Check out repository code
Expand All @@ -20,13 +21,14 @@ jobs:
- name: Test
working-directory: ./client
run: |
pip install -e .[${{ matrix.extras }}]
py.test
pip --version
pip install --verbose .[${{ matrix.extras }}]
python -m pytest --import-mode=importlib ./datalake/tests/
test-docker:
runs-on: ubuntu-latest
steps:
- name: Check out repository code
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Test
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,6 @@ target/

# build artifacts
version.txt

# Claude Code
.claude/
40 changes: 22 additions & 18 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,33 @@ ENV LC_ALL C.UTF-8

# TODO: keep requirements in one place
RUN pip install \
blinker>=1.4 \
boto3>=1.1.3 \
click>=5.1 \
Flask>=0.10.1 \
flask-swagger>=0.2.14 \
memoized_property>=1.0.1 \
python-dateutil>=2.4.2 \
python-dotenv>=0.1.3 \
pytz>=2015.4 \
sentry-sdk[flask]>=0.19.5 \
requests>=2.5 \
simplejson>=3.3.1 \
six>=1.10.0 \
'blinker>=1.4' \
'boto3==1.35.41' \
'botocore==1.35.64' \
'click>=5.1' \
'datalake<2' \
'Flask>=0.10.1' \
'flask-swagger>=0.2.14' \
'memoized_property>=1.0.1' \
'pyinotify>=0.9.4' \
'python-dateutil>=2.4.2' \
'python-dotenv>=0.1.3' \
'pytz>=2015.4' \
'raven>=5.0.0' \
'requests>=2.5' \
'sentry-sdk[flask]>=0.19.5' \
'simplejson>=3.3.1' \
'six>=1.10.0' \
# test requirements
'flake8>=2.5.0,<4.1' \
'freezegun<1' \
'moto<3' \
'moto>=5,<6' \
'pytest<8' \
'pytest-cov>=2.5.1,<4' \
'pytest-xdist' \
'responses<0.22.0' \
pyinotify>=0.9.4, \
raven>=5.0.0 \
'tox>4,<5' \
'datalake<2'
'tox>4,<5'


RUN mkdir -p /opt/
COPY . /opt/
Expand Down
31 changes: 26 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,27 @@ IMAGE="$(REPO)/$(REPO_PATH):$(VERSION)"
docker: version
docker build --build-arg VERSION=$(VERSION) -t $(IMAGE) .

.PHONY: devshell # Open a developer shell in the docker env
devshell: docker
.PHONY: dev # Open a developer shell in the docker env
dev: docker
docker run --rm -it -v $$PWD:/opt --entrypoint /bin/bash $(IMAGE)

test-client: docker
docker run --rm --entrypoint tox $(IMAGE) -c /opt/client/tox.ini
docker run --rm -t --entrypoint tox $(IMAGE) -c /opt/client/tox.ini

test-ingester: docker
docker run --rm --entrypoint py.test $(IMAGE) ingester
docker run --rm -t --entrypoint pytest $(IMAGE) /opt/ingester -svvx

test-api: docker
docker run --rm --entrypoint py.test $(IMAGE) api
docker run --rm -t --entrypoint pytest $(IMAGE) /opt/api -svvx

testp-client: docker
docker run --rm -t --entrypoint /bin/bash $(IMAGE) -c "cd /opt/client && pytest -svvx -n auto"

testp-ingester: docker
docker run --rm -t --entrypoint pytest $(IMAGE) /opt/ingester -svvx -n auto

testp-api: docker
docker run --rm -t --entrypoint pytest $(IMAGE) /opt/api -svvx -n auto

.PHONY: test # Run the tests
test:
Expand All @@ -27,6 +36,18 @@ test:
$(MAKE) test-ingester
$(MAKE) test-api

.PHONY: testp # Run all tests in parallel with pytest-xdist
testp: docker
docker run --rm -t --entrypoint /bin/bash $(IMAGE) -c "\
set -e && \
echo '==> Running client tests...' && \
cd /opt/client && pytest -svvx -n auto && \
echo '==> Running ingester tests...' && \
cd /opt/ingester && pytest -svvx -n auto && \
echo '==> Running API tests...' && \
cd /opt/api && pytest -svvx -n auto && \
echo '==> All tests passed!'"

.PHONY: push
push:
docker push $(IMAGE)
Expand Down
49 changes: 48 additions & 1 deletion api/datalake_api/v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from flask import current_app as app
import os
import simplejson as json
from datetime import datetime, timezone
import decimal
from .querier import ArchiveQuerier, Cursor, InvalidCursor, \
DEFAULT_LOOKBACK_DAYS
from .fetcher import ArchiveFileFetcher
Expand All @@ -29,6 +31,38 @@

_archive_querier = None


def unix_ms_to_utc_iso(unix_ms):
if unix_ms is None:
return unix_ms
unix_ms_to_iso = unix_ms
if isinstance(unix_ms_to_iso, decimal.Decimal):
unix_ms_to_iso = float(unix_ms_to_iso)
iso = datetime.fromtimestamp(
unix_ms_to_iso / 1000.0, tz=timezone.utc
).isoformat(timespec='milliseconds').replace('+00:00', 'Z')
return iso


def add_utc_metadata(metadata):
"""Add ISO-8601 UTC timestamp fields to metadata dict

This function takes a metadata dict and adds start_iso and end_iso fields
based on existing start and end epoch timestamps
iso precision is set to milliseconds
Can be expanded to add any api-level metadata
"""
if not metadata:
return metadata

start_iso = unix_ms_to_utc_iso(metadata['start'])
end_iso = unix_ms_to_utc_iso(metadata['end'])

metadata['start_iso'] = start_iso
metadata['end_iso'] = end_iso
return metadata


def _get_aws_kwargs():
kwargs = dict(
region_name=app.config.get('AWS_REGION'),
Expand Down Expand Up @@ -305,6 +339,14 @@ def files_get():
type: string
description: 16-byte blake2 hash of the file
content
start_iso:
type: string
description: the start time of the file in ISO
format UTC iso timezone
end_iso:
type: string
description: the end time of the file in ISO
format UTC iso timezone

next:
type: string
Expand Down Expand Up @@ -349,7 +391,10 @@ def files_get():
where=params.get('where'),
cursor=params.get('cursor'))

[r.update(http_url=_get_canonical_http_url(r)) for r in results]
for r in results:
r.update(http_url=_get_canonical_http_url(r))
r['metadata'] = add_utc_metadata(r['metadata'])

response = {
'records': results,
'next': _get_next_url(flask.request, results),
Expand Down Expand Up @@ -476,6 +521,7 @@ def file_get_metadata(file_id):
id: DatalakeAPIError
'''
f = _get_file(file_id)
f.metadata = add_utc_metadata(f.metadata)
return Response(json.dumps(f.metadata), content_type='application/json')


Expand Down Expand Up @@ -542,6 +588,7 @@ def latest_get(what, where):
params = _validate_latest_params(params)
f = _get_latest(what, where, params.get('lookback', DEFAULT_LOOKBACK_DAYS))
f.update(http_url=_get_canonical_http_url(f))
f['metadata'] = add_utc_metadata(f['metadata'])
return Response(json.dumps(f), content_type='application/json')


Expand Down
2 changes: 1 addition & 1 deletion api/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_version_from_pyver():
if 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
raise ImportError('You must install pyver to create a package')
else:
return 'noversion'
return '0.0.0'
version, version_info = pyver.get_version(pkg="datalake_api",
public=True)
return version
Expand Down
31 changes: 14 additions & 17 deletions api/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
ClientError as BotoClientError,
NoCredentialsError
)
from moto import mock_dynamodb2
from moto import mock_aws

from datalake_api import app as datalake_api
from datalake.tests import * # noqa
Expand All @@ -33,30 +33,27 @@
# This will cause moto to fail
# But more critically, may impact production systems
# So we test for real credentials and fail hard if they exist
sts = boto3.client('sts')
try:
sts.get_caller_identity()
pytest.exit("Real AWS credentials detected, aborting", 3)
except NoCredentialsError:
pass # no credentials are good
# Session fixture runs in pytest setup rather than at import time
@pytest.fixture(scope='session', autouse=True)
def verify_no_aws_credentials():
sts = boto3.client('sts')
try:
sts.get_caller_identity()
pytest.exit("Real AWS credentials detected, aborting", 3)
except NoCredentialsError:
pass


def get_client():
from datalake_api import settings
datalake_api.app.config.from_object(settings)

datalake_api.app.config['TESTING'] = True
datalake_api.app.config['AWS_REGION'] = 'us-east-1'
datalake_api.app.config['AWS_ACCESS_KEY_ID'] = 'abc'
datalake_api.app.config['AWS_SECRET_ACCESS_KEY'] = '123'

# TODO: Sigh. The api caches the archive_fetcher and s3_bucket, which is
# the right thing. However, because moto<3 still uses httpretty, and
# because httpretty wreaks havoc on the python socket code, these cached
# parts end up in a bad state after their first use. The right thing to do
# here is to upgrade moto. But for that we will also have to move
# everything from boto to boto3. This is a near-term goal. But first lets
# get everything off of python2.
for a in ('archive_fetcher', 's3_bucket'):
for a in ('archive_fetcher', 's3_bucket', 'dynamodb'):
try:
delattr(datalake_api.app, a)
except AttributeError:
Expand All @@ -71,15 +68,15 @@ def client():

@pytest.fixture
def dynamodb(request):
mock = mock_dynamodb2()
mock = mock_aws()
mock.start()

def tear_down():
mock.stop()
request.addfinalizer(tear_down)

return boto3.resource('dynamodb',
region_name='us-west-2',
region_name='us-east-1',
aws_secret_access_key='123',
aws_access_key_id='abc')

Expand Down
17 changes: 16 additions & 1 deletion api/tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
from datetime import datetime, timezone
from decimal import Decimal
import pytest
import simplejson as json

Expand All @@ -32,7 +34,20 @@ def test_get_metadata(metadata_getter, s3_file_maker, random_metadata):
res = metadata_getter('12345')
assert res.status_code == 200
assert res.content_type == 'application/json'
assert json.loads(res.data) == random_metadata
res_data = json.loads(res.data)
for k, v in res_data.items():
if k == 'start_iso' or k == 'end_iso':
k_epoch = k.replace('_iso','')
v_epoch = res_data[k_epoch]
if v is None:
assert v == v_epoch

expected_v_iso = datetime.fromtimestamp(
v_epoch / 1000.0, tz=timezone.utc
).isoformat(timespec='milliseconds').replace('+00:00', 'Z')
assert v == expected_v_iso
else:
assert v == random_metadata[k]


def test_no_such_metadata(s3_bucket_maker, metadata_getter):
Expand Down
22 changes: 3 additions & 19 deletions client/datalake/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


try:
from moto import mock_s3
from moto import mock_aws
import boto3
from six.moves.urllib.parse import urlparse
import json
Expand Down Expand Up @@ -125,24 +125,8 @@ def get_tmpfile(content):


@pytest.fixture
def aws_connector(request):

def create_connection(mocker, connector):
mock = mocker()
mock.start()

def tear_down():
mock.stop()
request.addfinalizer(tear_down)

return connector()

return create_connection


@pytest.fixture
def s3_connection(aws_connector):
with mock_s3():
def s3_connection():
with mock_aws():
yield boto3.resource('s3')


Expand Down
Loading
Loading