Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,15 @@ tasks.distTar {
duplicatesStrategy DuplicatesStrategy.EXCLUDE
}

tasks.register('copyDistFiles', Copy) {
from tarTree(tasks.distTar.outputs.files.singleFile)
into layout.buildDirectory.dir('docker')
doFirst {
delete layout.buildDirectory.dir('docker')
}
dependsOn tasks.distTar
}

tasks.register('validateDependencies') {
doLast {
def dependencyVersions = [:]
Expand Down
46 changes: 28 additions & 18 deletions demo/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
##

kafka_container=kafka-ts
bootstrap_server=kafka:29092
bootstrap_server_host=localhost:9092

# Topic defaults
Expand All @@ -28,11 +27,13 @@ local_retention_bytes=1
retention_ms=360000000 # 100 hours
local_retention_ms=1000 # 1 second

kafka_image=apache/kafka:3.9.0

.PHONY: create_topic_by_size_ts
create_topic_by_size_ts:
docker exec -e KAFKA_OPTS= $(kafka_container) \
kafka-topics \
--bootstrap-server $(bootstrap_server) \
docker run --network host $(kafka_image) \
/opt/kafka/bin/kafka-topics.sh \
--bootstrap-server $(bootstrap_server_host) \
--create \
--config remote.storage.enable=true \
--config retention.ms=-1 \
Expand All @@ -45,9 +46,9 @@ create_topic_by_size_ts:

.PHONY: create_topic_by_time_ts
create_topic_by_time_ts:
docker exec -e KAFKA_OPTS= $(kafka_container) \
kafka-topics \
--bootstrap-server $(bootstrap_server) \
docker run --network host $(kafka_image) \
/opt/kafka/bin/kafka-topics.sh \
--bootstrap-server $(bootstrap_server_host) \
--create \
--config remote.storage.enable=true \
--config segment.bytes=$(segment) \
Expand All @@ -59,9 +60,9 @@ create_topic_by_time_ts:

.PHONY: create_topic_by_size_no_ts
create_topic_by_size_no_ts:
docker exec -e KAFKA_OPTS= $(kafka_container) \
kafka-topics \
--bootstrap-server $(bootstrap_server) \
docker run --network host $(kafka_image) \
/opt/kafka/bin/kafka-topics.sh \
--bootstrap-server $(bootstrap_server_host) \
--create \
--config retention.ms=-1 \
--config segment.bytes=$(segment) \
Expand All @@ -72,9 +73,9 @@ create_topic_by_size_no_ts:

.PHONY: create_topic_by_time_no_ts
create_topic_by_time_no_ts:
docker exec -e KAFKA_OPTS= $(kafka_container) \
kafka-topics \
--bootstrap-server $(bootstrap_server) \
docker run --network host $(kafka_image) \
/opt/kafka/bin/kafka-topics.sh \
--bootstrap-server $(bootstrap_server_host) \
--create \
--config segment.bytes=$(segment) \
--config retention.ms=$(retention_ms) \
Expand All @@ -88,8 +89,9 @@ throughput = 1000

.PHONY: fill_topic
fill_topic:
docker exec -e KAFKA_OPTS= $(kafka_container) \
kafka-producer-perf-test --producer-props bootstrap.servers=$(bootstrap_server) \
docker run --network host $(kafka_image) \
/opt/kafka/bin/kafka-producer-perf-test.sh \
--producer-props bootstrap.servers=$(bootstrap_server_host) \
--topic $(topic) \
--num-records $(num_records) \
--record-size $(record_size) \
Expand All @@ -115,13 +117,21 @@ run_gcs_fake_gcs_server:
run_azure_blob_azurite:
docker compose -f compose-azure-blob-azurite.yml up

docker_volume:
cd .. && ./gradlew copyDistFiles

.PHONY: run_kraft_s3_minio
run_kraft_s3_minio: docker_volume
docker compose -f compose-kraft-s3-minio.yml up

.PHONY: clean
clean:
docker compose -f compose-local-fs.yml down
docker compose -f compose-s3-aws.yml down
docker compose -f compose-s3-minio.yml down
docker compose -f compose-gcs-fake-gcs-server.yml down
docker compose -f compose-azure-blob-azurite.yml down
docker compose -f compose-kraft-s3-minio.yml down

.PHONY: show_local_data
show_local_data:
Expand All @@ -139,7 +149,7 @@ show_remote_data_s3_aws:

.PHONY: show_remote_data_s3_minio
show_remote_data_s3_minio:
docker run --rm --network=host --entrypoint /usr/bin/bash quay.io/minio/mc \
docker run --rm --network host --entrypoint /usr/bin/bash quay.io/minio/mc \
-c "mc alias set mycloud http://localhost:9000 minioadmin minioadmin && mc ls --recursive mycloud/test-bucket | grep $(topic)"

.PHONY: show_remote_data_gcs_fake_gcs_server
Expand All @@ -148,7 +158,7 @@ show_remote_data_gcs_fake_gcs_server:

.PHONY: show_remote_data_azurite
show_remote_data_azurite:
docker run --rm --network=host mcr.microsoft.com/azure-cli \
docker run --rm --network host mcr.microsoft.com/azure-cli \
az storage blob list --container-name test-container \
--account-name devstoreaccount1 \
--account-key Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== \
Expand All @@ -160,7 +170,7 @@ kcat_opts =
# kcat_opts = "-X fetch.wait.max.ms=100"
.PHONY: consume
consume:
docker run --rm --network=host edenhill/kcat:1.7.1 \
docker run --rm --network host edenhill/kcat:1.7.1 \
-b $(bootstrap_server_host) -C -t $(topic) -c $(consume) -o $(offset) -e -f '%t-%p-%o\n' $(kcat_opts)

.env:
Expand Down
6 changes: 6 additions & 0 deletions demo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,17 @@ You can also see the remote data in https://s3.console.aws.amazon.com/s3/buckets

### MinIO S3 as remote storage: `compose-s3-minio.yml`

> [!NOTE]
> There is a KRaft version of this scenario in `compose-kraft-s3-minio.yml`.
> The steps are the same, it just requires the binaries to placed at a different location.
> The `make` commands will take care of this.

This scenario uses `S3Storage` with MinIO S3 as the remote storage.

```bash
# Start the compose
make run_s3_minio
# or make run_kraft_s3_minio

# Create the topic with any variation
make create_topic_by_size_ts
Expand Down
94 changes: 94 additions & 0 deletions demo/compose-kraft-s3-minio.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
##
# Copyright 2023 Aiven Oy
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
services:
kafka:
build: ./kraft-s3-minio/
container_name: "kafka-ts"
depends_on:
- minio
ports:
- "9092:9092"
environment:
CLUSTER_ID: "4L6g3nShT-eMCtK--X86sw"
KAFKA_PROCESS_ROLES: "broker,controller"
KAFKA_NODE_ID: 1
KAFKA_CONTROLLER_QUORUM_VOTERS: "1@kafka:29093"
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: "CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT"
KAFKA_ADVERTISED_LISTENERS: "PLAINTEXT_HOST://localhost:9092,PLAINTEXT://kafka:29092"
KAFKA_LISTENERS: "CONTROLLER://:29093,PLAINTEXT_HOST://:9092,PLAINTEXT://:29092"
KAFKA_INTER_BROKER_LISTENER_NAME: "PLAINTEXT"
KAFKA_CONTROLLER_LISTENER_NAMES: "CONTROLLER"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_OFFSETS_TOPIC_NUM_PARTITIONS: 1
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
KAFKA_AUTO_CREATE_TOPICS_ENABLE: false
# Increase Tiered Storage log level
KAFKA_LOG4J_LOGGERS: "io.aiven.kafka.tieredstorage=DEBUG"
# Tweak retention checking
KAFKA_LOG_RETENTION_CHECK_INTERVAL_MS: 10000
# Enable Tiered Storage
KAFKA_REMOTE_LOG_STORAGE_SYSTEM_ENABLE: true
KAFKA_REMOTE_LOG_MANAGER_TASK_INTERVAL_MS: 5000
# Remote metadata manager
KAFKA_REMOTE_LOG_METADATA_MANAGER_CLASS_NAME: "org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManager"
KAFKA_REMOTE_LOG_METADATA_MANAGER_LISTENER_NAME: "PLAINTEXT"
KAFKA_RLMM_CONFIG_REMOTE_LOG_METADATA_TOPIC_REPLICATION_FACTOR: 1
# Remote storage manager
KAFKA_REMOTE_LOG_STORAGE_MANAGER_CLASS_PATH: "/tiered-storage-for-apache-kafka/core/*:/tiered-storage-for-apache-kafka/s3/*"
KAFKA_REMOTE_LOG_STORAGE_MANAGER_CLASS_NAME: "io.aiven.kafka.tieredstorage.RemoteStorageManager"
KAFKA_RSM_CONFIG_CHUNK_SIZE: 4194304 # 4 MiB
KAFKA_RSM_CONFIG_FETCH_CHUNK_CACHE_CLASS: "io.aiven.kafka.tieredstorage.fetch.cache.DiskChunkCache"
KAFKA_RSM_CONFIG_FETCH_CHUNK_CACHE_PATH: /var/lib/kafka/tiered-storage-cache
KAFKA_RSM_CONFIG_FETCH_CHUNK_CACHE_SIZE: 1073741824 # 1 GiB
KAFKA_RSM_CONFIG_FETCH_CHUNK_CACHE_PREFETCH_MAX_SIZE: 16777216 # 16 MiB
KAFKA_RSM_CONFIG_FETCH_CHUNK_CACHE_RETENTION_MS: 600000 # 600000 ms
KAFKA_RSM_CONFIG_CUSTOM_METADATA_FIELDS_INCLUDE: "REMOTE_SIZE"
# Storage backend
KAFKA_RSM_CONFIG_KEY_PREFIX: "tiered-storage-demo/"
KAFKA_RSM_CONFIG_STORAGE_BACKEND_CLASS: "io.aiven.kafka.tieredstorage.storage.s3.S3Storage"
KAFKA_RSM_CONFIG_STORAGE_S3_ENDPOINT_URL: "http://minio:9000"
KAFKA_RSM_CONFIG_STORAGE_S3_BUCKET_NAME: "test-bucket"
KAFKA_RSM_CONFIG_STORAGE_S3_REGION: "us-east-1"
KAFKA_RSM_CONFIG_STORAGE_S3_PATH_STYLE_ACCESS_ENABLED: true
KAFKA_RSM_CONFIG_STORAGE_AWS_ACCESS_KEY_ID: "minioadmin"
KAFKA_RSM_CONFIG_STORAGE_AWS_SECRET_ACCESS_KEY: "minioadmin"
volumes:
# These paths depend on gradle copyDistFiles task run on root directory
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to document somewhere that you need to first run this task before starting the compose?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is defined on the Makefile, anyway I added a note about it to the demo/README, PTAL

- ./../build/docker/tiered-storage-for-apache-kafka-0.0.1-SNAPSHOT:/tiered-storage-for-apache-kafka/core
- ./../storage/s3/build/docker/s3-0.0.1-SNAPSHOT:/tiered-storage-for-apache-kafka/s3
- ./../storage/s3/build/docker/gcs-0.0.1-SNAPSHOT:/tiered-storage-for-apache-kafka/gcs
- ./../storage/s3/build/docker/azure-0.0.1-SNAPSHOT:/tiered-storage-for-apache-kafka/azure

minio:
image: quay.io/minio/minio
ports:
- "9000:9000"
- "9090:9090"
command: server /data --console-address ":9090"

minio-createbucket:
image: quay.io/minio/mc
restart: "no"
depends_on:
- minio
entrypoint: >
/bin/sh -c "
/usr/bin/mc config host add local http://minio:9000 minioadmin minioadmin;
/usr/bin/mc mb local/test-bucket;
exit 0;
"
8 changes: 8 additions & 0 deletions demo/kraft-s3-minio/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM apache/kafka:3.9.0

ARG _VERSION

USER appuser

RUN mkdir /var/lib/kafka/tiered-storage-cache