From 7189f7190c7753e29b2179f2ba74c553fdf3db4b Mon Sep 17 00:00:00 2001 From: Einar Omang Date: Fri, 20 Mar 2026 08:56:13 +0100 Subject: [PATCH] Schemas for the unstable module, and tests for schemas --- .github/workflows/build-and-test.yml | 21 ++ ExtractorUtils.Test/test_schemas.sh | 17 ++ .../Unstable/Configuration/Connection.cs | 6 +- .../Unstable/DestinationUtilsUnstable.cs | 8 +- schema/cognite_config.schema.json | 31 ++- .../unstable/base_cognite_config.schema.json | 192 ++++++++++++++++++ schema/unstable/connection_config.schema.json | 179 ++++++++++++++++ schema/unstable/retries_config.schema.json | 27 +++ 8 files changed, 465 insertions(+), 16 deletions(-) create mode 100755 ExtractorUtils.Test/test_schemas.sh create mode 100644 schema/unstable/base_cognite_config.schema.json create mode 100644 schema/unstable/connection_config.schema.json create mode 100644 schema/unstable/retries_config.schema.json diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 70f48b33..ecd8e2b6 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -54,3 +54,24 @@ jobs: uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} + + test_schemas: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v6 + - name: Install python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Install extractor uploader + env: + PYPI_ARTIFACTORY_USERNAME: ${{ secrets.ARTIFACTORY_READONLY_TOKEN_USER_PUBLIC_REPOS }} + PYPI_ARTIFACTORY_PASSWORD: ${{ secrets.ARTIFACTORY_READONLY_TOKEN_PUBLIC_REPOS }} + run: | + python -m pip install --upgrade pip + pip install cognite-extractor-publisher --extra-index-url "https://${PYPI_ARTIFACTORY_USERNAME}:${PYPI_ARTIFACTORY_PASSWORD}@cognite.jfrog.io/cognite/api/pypi/snakepit/simple" + + - name: Test schemas + run: ./ExtractorUtils.Test/test_schemas.sh diff --git a/ExtractorUtils.Test/test_schemas.sh b/ExtractorUtils.Test/test_schemas.sh new file mode 100755 index 00000000..a99c6d2f --- /dev/null +++ b/ExtractorUtils.Test/test_schemas.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e -x + +target_schemas=( + "schema/base_config.schema.json" + "schema/unstable/connection_config.schema.json" + "schema/unstable/base_cognite_config.schema.json" +) + +for schema in "${target_schemas[@]}"; do + echo "Processing $schema" + publish-extractor schema --schema "$schema" --output bundled.schema.json + echo "Generating docs for $schema" + publish-extractor docs --schema bundled.schema.json + rm bundled.schema.json +done diff --git a/ExtractorUtils/Unstable/Configuration/Connection.cs b/ExtractorUtils/Unstable/Configuration/Connection.cs index fb518d7c..53066e7e 100644 --- a/ExtractorUtils/Unstable/Configuration/Connection.cs +++ b/ExtractorUtils/Unstable/Configuration/Connection.cs @@ -37,9 +37,9 @@ public class ConnectionConfig : VersionedConfig /// /// Configuration for the connection to CDF. /// - public CdfConnectionConfig CdfConnection { get => _cdfConnection; set { _cdfConnection = value ?? _cdfConnection; } } + public CdfConnectionConfig Connection { get => _connection; set { _connection = value ?? _connection; } } - private CdfConnectionConfig _cdfConnection = new CdfConnectionConfig(); + private CdfConnectionConfig _connection = new CdfConnectionConfig(); /// /// Register any necessary yaml converters. @@ -157,4 +157,4 @@ public class SdkLoggingConfig } -} \ No newline at end of file +} diff --git a/ExtractorUtils/Unstable/DestinationUtilsUnstable.cs b/ExtractorUtils/Unstable/DestinationUtilsUnstable.cs index de41a86f..ec3d5d52 100644 --- a/ExtractorUtils/Unstable/DestinationUtilsUnstable.cs +++ b/ExtractorUtils/Unstable/DestinationUtilsUnstable.cs @@ -110,7 +110,7 @@ public static IHttpClientBuilder ConfigureCogniteHttpClientHandlers(this IHttpCl { try { - var retryConfig = provider.GetService()?.CdfConnection.Retries ?? new Configuration.RetryConfig(); + var retryConfig = provider.GetService()?.Connection.Retries ?? new Configuration.RetryConfig(); return CogniteExtensions.GetRetryPolicy(provider.GetService>(), retryConfig.MaxRetries, retryConfig.MaxBackoffValue.Value); } @@ -123,7 +123,7 @@ public static IHttpClientBuilder ConfigureCogniteHttpClientHandlers(this IHttpCl { try { - var retryConfig = provider.GetService()?.CdfConnection.Retries ?? new Configuration.RetryConfig(); + var retryConfig = provider.GetService()?.Connection.Retries ?? new Configuration.RetryConfig(); return CogniteExtensions.GetTimeoutPolicy(retryConfig.TimeoutValue.Value); } catch (ObjectDisposedException) @@ -136,7 +136,7 @@ public static IHttpClientBuilder ConfigureCogniteHttpClientHandlers(this IHttpCl { try { - var certConfig = provider.GetService()?.CdfConnection?.SslCertificates; + var certConfig = provider.GetService()?.Connection?.SslCertificates; return GetClientHandler(certConfig); } catch (ObjectDisposedException) @@ -295,4 +295,4 @@ public static Client.Builder Configure( return builder; } } -} \ No newline at end of file +} diff --git a/schema/cognite_config.schema.json b/schema/cognite_config.schema.json index df297d9f..e26e9929 100644 --- a/schema/cognite_config.schema.json +++ b/schema/cognite_config.schema.json @@ -250,6 +250,13 @@ "description": "Maximum number of data modeling instances per get/create instance request", "max": 1000, "min": 1 + }, + "stream-records": { + "type": "integer", + "default": 1000, + "description": "Maximum number of stream records per get/create stream records request", + "max": 1000, + "min": 1 } }, "unevaluatedProperties": false @@ -260,51 +267,57 @@ "properties": { "time-series": { "type": "integer", - "default": 20, + "default": 5, "description": "Maximum number of parallel requests per timeseries operation", "min": 1 }, "assets": { "type": "integer", - "default": 20, + "default": 5, "description": "Maximum number of parallel requests per assets operation", "min": 1 }, "data-points": { "type": "integer", - "default": 10, + "default": 5, "description": "Maximum number of parallel requests per datapoints operation", "min": 1 }, "raw": { "type": "integer", - "default": 10, + "default": 5, "description": "Maximum number of parallel requests per raw operation", "min": 1 }, "ranges": { "type": "integer", - "default": 20, + "default": 5, "description": "Maximum number of parallel requests per get first/last datapoint operation", "min": 1 }, "events": { "type": "integer", - "default": 20, + "default": 5, "description": "Maximum number of parallel requests per events operation", "min": 1 }, "sequences": { "type": "integer", - "default": 10, + "default": 5, "description": "Maximum number of parallel requests per sequences operation", "min": 1 }, "instances": { "type": "integer", - "default": 4, + "default": 2, "description": "Maximum number of parallel requests per data modeling instances operation", "min": 1 + }, + "stream-records": { + "type": "integer", + "default": 4, + "description": "Maximum number of parallel requests per stream records operation", + "min": 1 } }, "unevaluatedProperties": false @@ -382,4 +395,4 @@ "unevaluatedProperties": false } } -} \ No newline at end of file +} diff --git a/schema/unstable/base_cognite_config.schema.json b/schema/unstable/base_cognite_config.schema.json new file mode 100644 index 00000000..4ec55153 --- /dev/null +++ b/schema/unstable/base_cognite_config.schema.json @@ -0,0 +1,192 @@ +{ + "$id": "base_cognite_config.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "unevaluatedProperties": false, + "properties": { + "cdf-chunking": { + "type": "object", + "description": "Configure chunking of data on requests to CDF. Note that increasing these may cause requests to fail due to limits in the API itself", + "properties": { + "time-series": { + "type": "integer", + "default": 1000, + "description": "Maximum number of timeseries per get/create timeseries request", + "max": 1000, + "min": 1 + }, + "assets": { + "type": "integer", + "default": 1000, + "description": "Maximum number of assets per get/create assets request", + "max": 1000, + "min": 1 + }, + "data-point-time-series": { + "type": "integer", + "default": 10000, + "description": "Maximum number of timeseries per datapoint create request", + "max": 10000, + "min": 1 + }, + "data-point-delete": { + "type": "integer", + "default": 10000, + "description": "Maximum number of ranges per delete datapoints request", + "max": 10000, + "min": 1 + }, + "data-point-list": { + "type": "integer", + "default": 100, + "description": "Maximum number of timeseries per datapoint read request. Used when getting the first point in a timeseries.", + "max": 100, + "min": 1 + }, + "data-points": { + "type": "integer", + "default": 100000, + "description": "Maximum number of datapoints per datapoints create request", + "max": 100000, + "min": 1 + }, + "data-points-gzip-limit": { + "type": "integer", + "default": 5000, + "description": "Minimum number of datapoints in request to switch to using gzip. Set to -1 to disable, and 0 to always enable (not recommended). The minimum HTTP packet size is generally 1500 bytes, so this should never be set below 100 for numeric datapoints. Even for larger packages gzip is efficient enough that packages are compressed below 1500 bytes. At 5000 it is always a performance gain. It can be set lower if bandwidth is a major issue", + "max": 100000, + "min": 1 + }, + "raw-rows": { + "type": "integer", + "default": 10000, + "description": "Maximum number of rows per request to cdf raw", + "max": 10000, + "min": 1 + }, + "raw-rows-delete": { + "type": "integer", + "default": 1000, + "description": "Maximum number of row keys per delete request to raw", + "max": 1000, + "min": 1 + }, + "data-point-latest": { + "type": "integer", + "default": 100, + "description": "Maximum number of timeseries per datapoint read latest request", + "max": 100, + "min": 1 + }, + "events": { + "type": "integer", + "default": 1000, + "description": "Maximum number of events per get/create events request", + "max": 1000, + "min": 1 + }, + "sequences": { + "type": "integer", + "default": 1000, + "description": "Maximum number of sequences per get/create sequences request", + "max": 1000, + "min": 1 + }, + "sequence-row-sequences": { + "type": "integer", + "default": 1000, + "description": "Maximum number of sequences per create sequence rows request", + "max": 1000, + "min": 1 + }, + "sequence-rows": { + "type": "integer", + "default": 10000, + "description": "Maximum number of sequence rows per sequence when creating rows", + "max": 10000, + "min": 1 + }, + "instances": { + "type": "integer", + "default": 1000, + "description": "Maximum number of data modeling instances per get/create instance request", + "max": 1000, + "min": 1 + }, + "stream-records": { + "type": "integer", + "default": 1000, + "description": "Maximum number of stream records per get/create stream records request", + "max": 1000, + "min": 1 + } + }, + "unevaluatedProperties": false + }, + "cdf-throttling": { + "type": "object", + "description": "Configure the maximum number of parallel requests for different CDF resources.", + "properties": { + "time-series": { + "type": "integer", + "default": 5, + "description": "Maximum number of parallel requests per timeseries operation", + "min": 1 + }, + "assets": { + "type": "integer", + "default": 5, + "description": "Maximum number of parallel requests per assets operation", + "min": 1 + }, + "data-points": { + "type": "integer", + "default": 5, + "description": "Maximum number of parallel requests per datapoints operation", + "min": 1 + }, + "raw": { + "type": "integer", + "default": 5, + "description": "Maximum number of parallel requests per raw operation", + "min": 1 + }, + "ranges": { + "type": "integer", + "default": 5, + "description": "Maximum number of parallel requests per get first/last datapoint operation", + "min": 1 + }, + "events": { + "type": "integer", + "default": 5, + "description": "Maximum number of parallel requests per events operation", + "min": 1 + }, + "sequences": { + "type": "integer", + "default": 5, + "description": "Maximum number of parallel requests per sequences operation", + "min": 1 + }, + "instances": { + "type": "integer", + "default": 2, + "description": "Maximum number of parallel requests per data modeling instances operation", + "min": 1 + }, + "stream-records": { + "type": "integer", + "default": 4, + "description": "Maximum number of parallel requests per stream records operation", + "min": 1 + } + }, + "unevaluatedProperties": false + }, + "nan-replacement": { + "type": "number", + "description": "Replacement for NaN values when writing to CDF. If left out, NaN values are skipped." + } + } +} diff --git a/schema/unstable/connection_config.schema.json b/schema/unstable/connection_config.schema.json new file mode 100644 index 00000000..8dc1e4cf --- /dev/null +++ b/schema/unstable/connection_config.schema.json @@ -0,0 +1,179 @@ +{ + "$id": "connection_config.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "unevaluatedProperties": false, + "description": "Configuration for a connection to a data source or destination", + "properties": { + "project": { + "type": "string", + "description": "CDF project used for the connection." + }, + "base-url": { + "type": "string", + "description": "CDF base URL used for the connection, for example https://api.cognitedata.com or https://az-eastus-1.cognitedata.com" + }, + "integration": { + "type": "object", + "description": "Name of the integration to use for remote config and status reporting.", + "unevaluatedProperties": false, + "properties": { + "external-id": { + "type": "string", + "description": "External ID of the integration to use for remote config and status reporting." + } + }, + "required": [ + "external-id" + ] + }, + "authentication": { + "type": "object", + "unevaluatedProperties": false, + "description": "Authentication configuration for the CDF connection.", + "discriminatorProp": "type", + "oneOf": [ + { + "type": "object", + "description": "Configuration for connecting to CDF using client credentials.", + "unevaluatedProperties": false, + "title": "Client Credentials", + "properties": { + "type": { + "type": "string", + "const": "client-credentials", + "description": "Type of authentication, must be 'client-credentials' for connecting to CDF using client credentials." + }, + "client-id": { + "type": "string", + "description": "Client ID" + }, + "scopes": { + "oneOf": [ + { + "type": "string", + "description": "Space separated list of scopes to request when obtaining access tokens, for example https://api.cognitedata.com/.default." + }, + { + "type": "array", + "description": "List of scopes to request when obtaining the access token.", + "items": { + "type": "string" + } + } + ] + }, + "client-secret": { + "type": "string", + "description": "Client secret" + }, + "token-url": { + "type": "string", + "description": "Token URL for obtaining access tokens, for example https://login.microsoftonline.com/{tenant-id}/oauth2/v2.0/token" + }, + "resource": { + "type": "string", + "description": "Resource identifier passed along with token requests." + }, + "audience": { + "type": "string", + "description": "Audience identifier passed along with token requests." + }, + "min-ttl": { + "type": "string", + "description": "Minimum time to live for the token. On the form 15s, 5m, 1h, etc.", + "pattern": "^[0-9]+(ms?|s|m|h|w|d)$" + } + }, + "required": [ + "type", + "client-id", + "scopes", + "client-secret", + "token-url" + ] + }, + { + "type": "object", + "description": "Configuration for connecting to CDF using a certificate.", + "unevaluatedProperties": false, + "title": "Client Certificate", + "properties": { + "type": { + "type": "string", + "const": "client-certificate", + "description": "Type of authentication, must be 'client-certificate' for connecting to CDF using a client certificate." + }, + "client-id": { + "type": "string", + "description": "Client ID" + }, + "scopes": { + "oneOf": [ + { + "type": "string", + "description": "Space separated list of scopes to request when obtaining access tokens, for example https://api.cognitedata.com/.default." + }, + { + "type": "array", + "description": "List of scopes to request when obtaining the access token.", + "items": { + "type": "string" + } + } + ] + }, + "path": { + "type": "string", + "description": "Path to the client certificate file." + }, + "authority-url": { + "type": "string", + "description": "Authority URL for obtaining access tokens." + }, + "password": { + "type": "string", + "description": "Password for the client certificate, if it is encrypted." + } + }, + "required": [ + "type", + "client-id", + "scopes", + "path", + "authority-url" + ] + } + ] + }, + "connection": { + "type": "object", + "description": "Additional configuration for the connection.", + "unevaluatedProperties": false, + "properties": { + "retries": { + "$ref": "retries_config.schema.json" + }, + "ssl-certificates": { + "type": "object", + "description": "Configuration for SSL certificates to use for the connection.", + "unevaluatedProperties": false, + "properties": { + "verify": { + "type": "boolean", + "description": "Whether to verify SSL certificates for the connection.", + "default": true + }, + "allow-list": { + "type": "array", + "description": "List of SSL certificate thumbprints to allow for the connection, even if they are not valid according to the system's certificate store.", + "items": { + "type": "string" + } + } + } + } + } + } + } +} diff --git a/schema/unstable/retries_config.schema.json b/schema/unstable/retries_config.schema.json new file mode 100644 index 00000000..1282e6b1 --- /dev/null +++ b/schema/unstable/retries_config.schema.json @@ -0,0 +1,27 @@ +{ + "$id": "retries_config.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "unevaluatedProperties": false, + "description": "Configuration for retries", + "properties": { + "max-retries": { + "type": "integer", + "description": "Maximum number of retries before giving up. Setting to -1 will retry indefinitely.", + "default": 10, + "minimum": -1 + }, + "max-backoff": { + "type": "string", + "description": "Maximum delay between retries, on the form 12s, 15m, 1h, etc.", + "default": "30s", + "pattern": "^[0-9]+(ms?|s|m|h|w|d)$" + }, + "timeout": { + "type": "string", + "description": "Timeout in seconds for each retry attempt, on the form 12s, 15m, 1h, etc.", + "default": "30s", + "pattern": "^[0-9]+(ms?|s|m|h|w|d)$" + } + } +}