-
Notifications
You must be signed in to change notification settings - Fork 21
Add sampling logic #805
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Add sampling logic #805
Changes from 18 commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
a480739
add new sampling configuration keys and converters
quinnmil 9429452
add types to all config methods
quinnmil 23f18d2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] b099b2a
use 0-100 ints instead of floats
quinnmil 9d86e33
add sampler module
quinnmil 4addefd
add sampler tests
quinnmil 3eca717
improve sampler tests
quinnmil 7915e95
fix always sample endpoint test
quinnmil d396107
remove unnecessary logging
quinnmil 93c6f73
remove debug logging
quinnmil f9d92f2
short circuit sampling logic if no sampling configured
quinnmil 2ec02ec
simplify pattern match
quinnmil 0ee5b33
rename config option to X_sample_rate
quinnmil e643c94
check endpoint/job sample rate in any_sampling
quinnmil bea9fa8
add test assertion
quinnmil fd31e4e
add test assertion
quinnmil a99de8b
refactor sampler to better handler legacy ignore and prioritization
quinnmil eeefff4
de-couple from TrackedRequest
quinnmil ed7d5d0
simplify sampler logic to rely on configuration
quinnmil 574a981
flatten control flow
quinnmil 41e62a0
use prefix to check ignores
quinnmil File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,176 @@ | ||
| # coding=utf-8 | ||
|
|
||
| import logging | ||
| import random | ||
| from typing import Dict, Optional, Tuple | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class Sampler: | ||
| """ | ||
| Handles sampling decision logic for Scout APM. | ||
|
|
||
| This class encapsulates all sampling-related functionality including: | ||
| - Loading and managing sampling configuration | ||
| - Pattern matching for operations (endpoints and jobs) | ||
| - Making sampling decisions based on operation type and patterns | ||
| """ | ||
|
|
||
| # Constants for operation type detection | ||
| CONTROLLER_PREFIX = "Controller/" | ||
| JOB_PREFIX = "Job/" | ||
|
|
||
| def __init__(self, config): | ||
| """ | ||
| Initialize sampler with Scout configuration. | ||
|
|
||
| Args: | ||
| config: ScoutConfig instance containing sampling configuration | ||
| """ | ||
| self.config = config | ||
| self.sample_rate = config.value("sample_rate") | ||
| self.sample_endpoints = config.value("sample_endpoints") | ||
| self.sample_jobs = config.value("sample_jobs") | ||
| self.ignore_endpoints = set(config.value("ignore_endpoints")) | ||
| self.ignore_jobs = set(config.value("ignore_jobs")) | ||
| self.endpoint_sample_rate = config.value("endpoint_sample_rate") | ||
| self.job_sample_rate = config.value("job_sample_rate") | ||
| self.legacy_ignore = config.value("ignore") | ||
|
|
||
| def _any_sampling(self): | ||
| """ | ||
| Check if any sampling is enabled. | ||
|
|
||
| Returns: | ||
| Boolean indicating if any sampling is enabled | ||
| """ | ||
| return ( | ||
| self.sample_rate < 100 | ||
| or self.sample_endpoints | ||
| or self.sample_jobs | ||
| or self.ignore_endpoints | ||
| or self.ignore_jobs | ||
| or self.endpoint_sample_rate is not None | ||
| or self.job_sample_rate is not None | ||
| ) | ||
|
|
||
| def _find_exact_match( | ||
| self, name: str, patterns: Dict[str, float] | ||
| ) -> Optional[float]: | ||
| """ | ||
| Finds the exact sample rate for a given operation name. | ||
|
mitchh456 marked this conversation as resolved.
Outdated
|
||
|
|
||
| Args: | ||
| name: The operation name to match | ||
| patterns: Dictionary of pattern to sample rate mappings | ||
|
|
||
| Returns: | ||
| The sample rate for the matching pattern or None if no match found | ||
| """ | ||
| return patterns.get(name) | ||
|
|
||
| def _find_prefix_match( | ||
| self, name: str, patterns: Dict[str, float] | ||
| ) -> Optional[float]: | ||
| """Find the longest matching prefix in sample configurations.""" | ||
| matching_prefixes = [ | ||
| (prefix, rate) | ||
| for prefix, rate in patterns.items() | ||
| if name.startswith(prefix) | ||
| ] | ||
| if not matching_prefixes: | ||
| return None | ||
| # Return rate for longest matching prefix | ||
| return max(matching_prefixes, key=lambda x: len(x[0]))[1] | ||
|
|
||
| def _is_legacy_ignored(self, name: str) -> bool: | ||
| """Check if path matches any legacy ignore patterns.""" | ||
| return any(name.startswith(ignored) for ignored in self.legacy_ignore) | ||
|
|
||
| def _get_operation_type_and_name( | ||
| self, operation: str | ||
| ) -> Tuple[Optional[str], Optional[str]]: | ||
| """ | ||
| Determines if an operation is an endpoint or job and extracts its name. | ||
|
|
||
| Args: | ||
| operation: The full operation string (e.g. "Controller/users/show") | ||
|
|
||
| Returns: | ||
| Tuple of (type, name) where type is either 'endpoint' or 'job', | ||
| and name is the operation name without the prefix | ||
| """ | ||
| if operation.startswith(self.CONTROLLER_PREFIX): | ||
| return "endpoint", operation[len(self.CONTROLLER_PREFIX) :] | ||
| elif operation.startswith(self.JOB_PREFIX): | ||
| return "job", operation[len(self.JOB_PREFIX) :] | ||
| else: | ||
| return None, None | ||
|
|
||
| def get_effective_sample_rate( | ||
| self, operation: str, is_ignored: bool = False | ||
| ) -> int: | ||
| """ | ||
| Determines the effective sample rate for a given operation. | ||
| """ | ||
| op_type, name = self._get_operation_type_and_name(operation) | ||
|
|
||
| if not op_type or not name: | ||
| return self.sample_rate | ||
|
|
||
| patterns = self.sample_endpoints if op_type == "endpoint" else self.sample_jobs | ||
| ignored_set = ( | ||
| self.ignore_endpoints if op_type == "endpoint" else self.ignore_jobs | ||
| ) | ||
| default_rate = ( | ||
| self.endpoint_sample_rate if op_type == "endpoint" else self.job_sample_rate | ||
| ) | ||
|
|
||
| # Check for exact match in sampling patterns | ||
| exact_rate = self._find_exact_match(name, patterns) | ||
| if exact_rate is not None: | ||
| return exact_rate | ||
|
|
||
| # Check for exact endpoint/job ignores | ||
| if name in ignored_set: | ||
| return 0 | ||
|
|
||
| # Check for prefix match in sampling patterns | ||
| prefix_rate = self._find_prefix_match(name, patterns) | ||
| if prefix_rate is not None: | ||
| return prefix_rate | ||
|
|
||
| # Check legacy ignore patterns | ||
| if self._is_legacy_ignored(name): | ||
| return 0 | ||
|
|
||
| # Check if request is explicitly ignored via the | ||
| # is_ignored() tracked_request method. | ||
| if is_ignored: | ||
| return 0 | ||
|
|
||
| # Use operation-specific default rate if available | ||
| if default_rate is not None: | ||
| return default_rate | ||
|
|
||
| # Fall back to global sample rate | ||
| return self.sample_rate | ||
|
|
||
| def should_sample(self, operation: str, is_ignored: bool) -> bool: | ||
| """ | ||
| Determines if an operation should be sampled. | ||
| If no sampling is enabled, always return True. | ||
|
|
||
| Args: | ||
| operation: The operation string (e.g. "Controller/users/show" | ||
| or "Job/mailer") | ||
|
|
||
| Returns: | ||
| Boolean indicating whether to sample this operation | ||
| """ | ||
| if not self._any_sampling(): | ||
| return True | ||
| return random.randint(1, 100) <= self.get_effective_sample_rate( | ||
| operation, is_ignored | ||
| ) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| # coding=utf-8 | ||
|
|
||
| from unittest import mock | ||
|
|
||
| import pytest | ||
|
|
||
| from scout_apm.core.config import ScoutConfig | ||
| from scout_apm.core.sampler import Sampler | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def config(): | ||
| config = ScoutConfig() | ||
| ScoutConfig.set( | ||
| sample_rate=50, # 50% global sampling | ||
| sample_endpoints={ | ||
| "users": 100, # Always sample | ||
| "test": 20, # 20% sampling for test endpoints | ||
| "health": 0, # Never sample health checks | ||
| }, | ||
| sample_jobs={ | ||
| "critical-job": 100, # Always sample | ||
| "batch": 30, # 30% sampling for batch jobs | ||
| }, | ||
| ignore_endpoints=["metrics", "ping", "users/test"], | ||
| ignore_jobs=["test-job"], | ||
| endpoint_sample_rate=70, # 70% sampling for unspecified endpoints | ||
| job_sample_rate=40, # 40% sampling for unspecified jobs | ||
| ) | ||
| yield config | ||
| ScoutConfig.reset_all() | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def sampler(config): | ||
| return Sampler(config) | ||
|
|
||
|
|
||
| def test_should_sample_endpoint_always(sampler): | ||
| assert sampler.should_sample("Controller/users", False) is True | ||
|
|
||
|
|
||
| def test_should_sample_endpoint_never(sampler): | ||
| assert sampler.should_sample("Controller/health/check", False) is False | ||
| assert sampler.should_sample("Controller/users/test", False) is False | ||
|
|
||
|
|
||
| def test_should_sample_endpoint_ignored(sampler): | ||
| assert sampler.should_sample("Controller/metrics", False) is False | ||
|
|
||
|
|
||
| def test_should_sample_endpoint_partial(sampler): | ||
| with mock.patch("random.randint", return_value=10): | ||
| assert sampler.should_sample("Controller/test/endpoint", False) is True | ||
| with mock.patch("random.randint", return_value=30): | ||
| assert sampler.should_sample("Controller/test/endpoint", False) is False | ||
|
|
||
|
|
||
| def test_should_sample_job_always(sampler): | ||
| assert sampler.should_sample("Job/critical-job", False) is True | ||
|
|
||
|
|
||
| def test_should_sample_job_never(sampler): | ||
| assert sampler.should_sample("Job/test-job", False) is False | ||
|
|
||
|
|
||
| def test_should_sample_job_partial(sampler): | ||
| with mock.patch("random.randint", return_value=10): | ||
| assert sampler.should_sample("Job/batch-process", False) is True | ||
| with mock.patch("random.randint", return_value=40): | ||
| assert sampler.should_sample("Job/batch-process", False) is False | ||
|
|
||
|
|
||
| def test_should_sample_unknown_operation(sampler): | ||
| with mock.patch("random.randint", return_value=10): | ||
| assert sampler.should_sample("Unknown/operation", False) is True | ||
| with mock.patch("random.randint", return_value=60): | ||
| assert sampler.should_sample("Unknown/operation", False) is False | ||
|
|
||
|
|
||
| def test_should_sample_no_sampling_enabled(config): | ||
| config.set( | ||
| sample_rate=100, # Return config to defaults | ||
| sample_endpoints={}, | ||
| sample_jobs={}, | ||
| ignore_endpoints=[], | ||
| ignore_jobs=[], | ||
| endpoint_sample_rate=None, | ||
| job_sample_rate=None, | ||
| ) | ||
| sampler = Sampler(config) | ||
| assert sampler.should_sample("Controller/any_endpoint", False) is True | ||
| assert sampler.should_sample("Job/any_job", False) is True | ||
|
|
||
|
|
||
| def test_should_sample_endpoint_default_rate(sampler): | ||
| with mock.patch("random.randint", return_value=60): | ||
| assert sampler.should_sample("Controller/unspecified", False) is True | ||
| with mock.patch("random.randint", return_value=80): | ||
| assert sampler.should_sample("Controller/unspecified", False) is False | ||
|
|
||
|
|
||
| def test_should_sample_job_default_rate(sampler): | ||
| with mock.patch("random.randint", return_value=30): | ||
| assert sampler.should_sample("Job/unspecified-job", False) is True | ||
| with mock.patch("random.randint", return_value=50): | ||
| assert sampler.should_sample("Job/unspecified-job", False) is False | ||
|
|
||
|
|
||
| def test_should_sample_endpoint_fallback_to_global_rate(config): | ||
| config.set(endpoint_sample_rate=None) | ||
| sampler = Sampler(config) | ||
| with mock.patch("random.randint", return_value=40): | ||
| assert sampler.should_sample("Controller/unspecified", False) is True | ||
| with mock.patch("random.randint", return_value=60): | ||
| assert sampler.should_sample("Controller/unspecified", False) is False | ||
|
|
||
|
|
||
| def test_should_sample_job_fallback_to_global_rate(config): | ||
| config.set(job_sample_rate=None) | ||
| sampler = Sampler(config) | ||
| with mock.patch("random.randint", return_value=40): | ||
| assert sampler.should_sample("Job/unspecified-job", False) is True | ||
| with mock.patch("random.randint", return_value=60): | ||
| assert sampler.should_sample("Job/unspecified-job", False) is False | ||
|
|
||
|
|
||
| def test_should_handle_legacy_ignore_with_specific_sampling(config): | ||
| """Test that specific sampling rates override legacy ignore patterns.""" | ||
| config.set( | ||
| ignore=["foo"], | ||
| sample_endpoints={ | ||
| "foo/bar": 50 # Should override the ignore pattern for specific endpoint | ||
| }, | ||
| ) | ||
| sampler = Sampler(config) | ||
|
|
||
| # foo/bar should be sampled at 50% | ||
| with mock.patch("random.randint", return_value=40): | ||
| assert sampler.should_sample("Controller/foo/bar", False) is True | ||
| with mock.patch("random.randint", return_value=60): | ||
| assert sampler.should_sample("Controller/foo/bar", False) is False | ||
|
|
||
| # foo/other should be ignored (0% sampling) | ||
| assert sampler.should_sample("Controller/foo/other", False) is False | ||
|
|
||
|
|
||
| def test_prefix_matching_precedence(config): | ||
| """Test that longer prefix matches take precedence.""" | ||
| config.set( | ||
| sample_endpoints={ | ||
| "api": 0, # Ignore all API endpoints by default | ||
| "api/users": 50, # Sample 50% of user endpoints | ||
| "api/users/vip": 100, # Sample all VIP user endpoints | ||
| } | ||
| ) | ||
| sampler = Sampler(config) | ||
|
|
||
| # Regular API endpoint should be ignored | ||
| assert sampler.should_sample("Controller/api/status", False) is False | ||
|
|
||
| # Users API should be sampled at 50% | ||
| with mock.patch("random.randint", return_value=40): | ||
| assert sampler.should_sample("Controller/api/users/list", False) is True | ||
| with mock.patch("random.randint", return_value=60): | ||
| assert sampler.should_sample("Controller/api/users/list", False) is False | ||
|
|
||
| # VIP users API should always be sampled | ||
| assert sampler.should_sample("Controller/api/users/vip/list", False) is True |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.