apify · MQ37 · Feb 27, 2026 · Feb 18, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1 @@
+CLAUDE.md
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -45,7 +45,7 @@ The codebase is organized into logical modules:
 
 - Entry points:
   - `src/index.ts` - Main library export (`ActorsMcpServer` class)
-  - `src/index-internals.ts` - Internal exports for testing and advanced usage
+  - `src/index_internals.ts` - Internal exports for testing and advanced usage
   - `src/stdio.ts` - Standard input/output entry point (CLI, used for Docker)
   - `src/main.ts` - Actor entry point (for Apify platform)
   - `src/input.ts` - Input processing and validation

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -199,10 +199,10 @@ Use comments to guide reviewers:
 
 *   **Common patterns:**
     * **Tool implementation**: Tools are defined in `src/tools/` using Zod schemas for validation.
-    * **Actor interaction**: Use `src/utils/apify-client.ts` for Apify API calls — never call the Apify API directly.
+    * **Actor interaction**: Use `src/utils/apify_client.ts` for Apify API calls — never call the Apify API directly.
     * **Error responses**: Return user-friendly error messages with suggestions.
     * **Input validation**: Always validate tool inputs with Zod before processing.
-    * **Caching**: Use TTL-based caching for Actor schemas and details (see `src/utils/ttl-lru.ts`).
+    * **Caching**: Use TTL-based caching for Actor schemas and details (see `src/utils/ttl_lru.ts`).
     * **Constants and tool names**: Always use constants and never hardcoded values. When referring to tools, ALWAYS use the `HelperTools` enum.
         * Exception: Integration tests (`tests/integration/`) must use hardcoded strings for tool names. This ensures tests fail if a tool is renamed, preventing accidental breaking changes.
 

diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -29,7 +29,7 @@ tests/
 Key entry points:
 
 - `src/index.ts` - Main library export (`ActorsMcpServer` class)
-- `src/index-internals.ts` - Internal exports for testing / advanced usage
+- `src/index_internals.ts` - Internal exports for testing / advanced usage
 - `src/stdio.ts` - Standard input/output (CLI) entry point
 - `src/main.ts` - Actor entry point (standby server / debugging)
 - `src/input.ts` - Input processing and validation

diff --git a/evals/2025-11-04-failed-cases-analysis.md → evals/2025_11_04_failed_cases_analysis.md b/evals/2025-11-04-failed-cases-analysis.md → evals/2025_11_04_failed_cases_analysis.md
diff --git a/evals/README.md b/evals/README.md
@@ -44,13 +44,13 @@ export OPENROUTER_API_KEY="your_key"
 export OPENROUTER_BASE_URL="https://openrouter.ai/api/v1"
 
 npm ci
-npm run evals:create-dataset  # one-time: creates dataset from test-cases.json
+npm run evals:create-dataset  # one-time: creates dataset from test_cases.json
 npm run evals:run              # runs evaluation on default dataset (v1.4)
 ```
 
 ### Using a specific dataset version
 
-By default, the evaluation uses the dataset version from `test-cases.json` (`v1.4`). To use a different dataset:
+By default, the evaluation uses the dataset version from `test_cases.json` (`v1.4`). To use a different dataset:
 
 ```bash
 # Create a new dataset with custom name
@@ -285,4 +285,3 @@ NOTES:
 // System prompt - instructions mainly cursor (very similar instructions in copilot)
 // https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools/blob/main/Cursor%20Prompts/Agent%20Prompt%20v1.2.txt
 // https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools/blob/main/VSCode%20Agent/Prompt.txt
-
diff --git a/evals/config.ts b/evals/config.ts
@@ -11,10 +11,10 @@ import log from '@apify/log';
 // Re-export shared config
 export { OPENROUTER_CONFIG, sanitizeHeaderValue, validateEnvVars, getRequiredEnvVars } from './shared/config.js';
 
-// Read version from test-cases.json
+// Read the version from test-cases.json
 function getTestCasesVersion(): string {
     const dir = dirname(fileURLToPath(import.meta.url));
-    const raw = readFileSync(join(dir, 'test-cases.json'), 'utf-8');
+    const raw = readFileSync(join(dir, 'test_cases.json'), 'utf-8');
     return JSON.parse(raw).version;
 }
 
@@ -28,7 +28,7 @@ export type EvaluatorName = typeof EVALUATOR_NAMES[keyof typeof EVALUATOR_NAMES]
 
 // Models to evaluate
 // 'openai/gpt-4.1-mini', // DO NOT USE - it has much worse performance than gpt-4o-mini and other models
-// 'openai/gpt-4o-mini',  // Neither used in cursor nor copilot
+// 'openai/gpt-4o-mini', // Neither used in cursor nor copilot
 // 'openai/gpt-4.1',
 export const MODELS_TO_EVALUATE = [
     'anthropic/claude-haiku-4.5',

diff --git a/evals/create-dataset.ts → evals/create_dataset.ts b/evals/create-dataset.ts → evals/create_dataset.ts
@@ -15,7 +15,7 @@ import { hideBin } from 'yargs/helpers';
 import log from '@apify/log';
 
 import { sanitizeHeaderValue, validatePhoenixEnvVars } from './config.js';
-import { loadTestCases, filterByCategory, filterById, type TestCase } from './evaluation-utils.js';
+import { loadTestCases, filterByCategory, filterById, type TestCase } from './evaluation_utils.js';
 
 // Set log level to debug
 log.setLevel(log.LEVELS.INFO);

diff --git a/evals/eval-single.ts → evals/eval_single.ts b/evals/eval-single.ts → evals/eval_single.ts
@@ -8,7 +8,7 @@ import {
     createToolSelectionLLMEvaluator,
     loadTestCases, filterById,
     type TestCase
-} from './evaluation-utils.js';
+} from './evaluation_utils.js';
 import { PASS_THRESHOLD, sanitizeHeaderValue } from './config.js';
 
 dotenv.config({ path: '.env' });

diff --git a/evals/evaluation-utils.ts → evals/evaluation_utils.ts b/evals/evaluation-utils.ts → evals/evaluation_utils.ts
@@ -9,8 +9,8 @@ import { createClassifierFn } from '@arizeai/phoenix-evals';
 
 import log from '@apify/log';
 
-import { ApifyClient } from '../src/apify-client.js';
-import { getToolPublicFieldOnly, processParamsGetTools } from '../src/index-internals.js';
+import { ApifyClient } from '../src/apify_client.js';
+import { getToolPublicFieldOnly, processParamsGetTools } from '../src/index_internals.js';
 import type { ToolBase, ToolEntry } from '../src/types.js';
 import {
     SYSTEM_PROMPT,
@@ -20,16 +20,16 @@ import {
     TEMPERATURE,
     sanitizeHeaderValue
 } from './config.js';
-import { loadTestCases as loadTestCasesShared, filterByCategory, filterById } from './shared/test-case-loader.js';
-import { transformToolsToOpenAIFormat } from './shared/openai-tools.js';
+import { loadTestCases as loadTestCasesShared, filterByCategory, filterById } from './shared/test_case_loader.js';
+import { transformToolsToOpenAIFormat } from './shared/openai_tools.js';
 import type { ToolSelectionTestCase, TestData } from './shared/types.js';
 
 // Re-export types for backwards compatibility
 export type TestCase = ToolSelectionTestCase;
 export type { TestData } from './shared/types.js';
 
 // Re-export shared functions for backwards compatibility
-export { filterByCategory, filterById } from './shared/test-case-loader.js';
+export { filterByCategory, filterById } from './shared/test_case_loader.js';
 
 type ExampleInputOnly = { input: Record<string, unknown>, metadata?: Record<string, unknown>, output?: never };
 

diff --git a/evals/run-evaluation.ts → evals/run_evaluation.ts b/evals/run-evaluation.ts → evals/run_evaluation.ts
@@ -20,7 +20,7 @@ import {
     loadTools,
     createOpenRouterTask,
     createToolSelectionLLMEvaluator
-} from './evaluation-utils.js';
+} from './evaluation_utils.js';
 import {
     DATASET_NAME,
     MODELS_TO_EVALUATE,

diff --git a/evals/shared/line-range-filter.ts → evals/shared/line_range_filter.ts b/evals/shared/line-range-filter.ts → evals/shared/line_range_filter.ts
@@ -2,7 +2,7 @@
  * Filter test cases by line ranges
  */
 
-import type { LineRange } from './line-range-parser.js';
+import type { LineRange } from './line_range_parser.js';
 
 /**
  * Type for test cases with line number metadata

diff --git a/evals/shared/line-range-parser.ts → evals/shared/line_range_parser.ts b/evals/shared/line-range-parser.ts → evals/shared/line_range_parser.ts
diff --git a/evals/shared/openai-tools.ts → evals/shared/openai_tools.ts b/evals/shared/openai-tools.ts → evals/shared/openai_tools.ts
diff --git a/evals/shared/test-case-loader.ts → evals/shared/test_case_loader.ts b/evals/shared/test-case-loader.ts → evals/shared/test_case_loader.ts
diff --git a/evals/test-cases.json → evals/test_cases.json b/evals/test-cases.json → evals/test_cases.json
diff --git a/evals/workflows/README.md b/evals/workflows/README.md
@@ -32,7 +32,7 @@ npm run evals:workflow -- --category search
 # Run specific test
 npm run evals:workflow -- --id search-google-maps
 
-# Filter by line range in test-cases.json
+# Filter by line range in test_cases.json
 npm run evals:workflow -- --lines 277-283
 
 # Show detailed conversation logs
@@ -242,7 +242,7 @@ export OPENROUTER_API_KEY="your_openrouter_key" # Get from https://openrouter.ai
 | `--id <id>` | | Run specific test by ID | All tests |
 | `--lines <range>` | `-l` | Filter by line range in test-cases.json | All tests |
 | `--verbose` | | Show detailed conversation logs | `false` |
-| `--test-cases-path <path>` | | Custom test cases file path | `test-cases.json` |
+| `--test-cases-path <path>` | | Custom test cases file path | `test_cases.json` |
 | `--agent-model <model>` | | Override agent model | `anthropic/claude-haiku-4.5` |
 | `--judge-model <model>` | | Override judge model | `x-ai/grok-4.1-fast` |
 | `--tool-timeout <seconds>` | | Tool call timeout | `60` |
@@ -252,7 +252,7 @@ export OPENROUTER_API_KEY="your_openrouter_key" # Get from https://openrouter.ai
 
 ### Line Range Filtering
 
-The `--lines` (or `-l`) option filters test cases by their line numbers in the `test-cases.json` file.
+The `--lines` (or `-l`) option filters test cases by their line numbers in the `test_cases.json` file.
 
 **Format options:**
 - **Single line:** `--lines 100` (includes tests that contain line 100)

diff --git a/evals/workflows/conversation-executor.ts → evals/workflows/conversation_executor.ts b/evals/workflows/conversation-executor.ts → evals/workflows/conversation_executor.ts
@@ -6,10 +6,10 @@
 // eslint-disable-next-line import/extensions
 import type { ChatCompletionMessageParam, ChatCompletionTool } from 'openai/resources/chat/completions';
 
-import { mcpToolsToOpenAiTools } from '../shared/openai-tools.js';
+import { mcpToolsToOpenAiTools } from '../shared/openai_tools.js';
 import { AGENT_SYSTEM_PROMPT, MAX_CONVERSATION_TURNS, MODELS } from './config.js';
-import type { LlmClient } from './llm-client.js';
-import type { McpClient } from './mcp-client.js';
+import type { LlmClient } from './llm_client.js';
+import type { McpClient } from './mcp_client.js';
 import type { ConversationHistory, ConversationTurn } from './types.js';
 
 export type ConversationExecutorOptions = {

diff --git a/evals/workflows/llm-client.ts → evals/workflows/llm_client.ts b/evals/workflows/llm-client.ts → evals/workflows/llm_client.ts
diff --git a/evals/workflows/mcp-client.ts → evals/workflows/mcp_client.ts b/evals/workflows/mcp-client.ts → evals/workflows/mcp_client.ts
diff --git a/evals/workflows/output-formatter.ts → evals/workflows/output_formatter.ts b/evals/workflows/output-formatter.ts → evals/workflows/output_formatter.ts
@@ -2,9 +2,9 @@
  * Output formatter for evaluation results
  */
 
-import type { WorkflowTestCase } from './test-cases-loader.js';
+import type { WorkflowTestCase } from './test_cases_loader.js';
 import type { ConversationHistory } from './types.js';
-import type { JudgeResult } from './workflow-judge.js';
+import type { JudgeResult } from './workflow_judge.js';
 
 /**
  * Single evaluation result

diff --git a/evals/workflows/results-writer.ts → evals/workflows/results_writer.ts b/evals/workflows/results-writer.ts → evals/workflows/results_writer.ts
@@ -6,7 +6,7 @@
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
 import { dirname } from 'node:path';
 
-import type { EvaluationResult, ResultsDatabase, TestResultRecord } from './output-formatter.js';
+import type { EvaluationResult, ResultsDatabase, TestResultRecord } from './output_formatter.js';
 
 /**
  * Build composite key for storing results

diff --git a/evals/workflows/run-workflow-evals.ts → evals/workflows/run_workflow_evals.ts b/evals/workflows/run-workflow-evals.ts → evals/workflows/run_workflow_evals.ts
@@ -18,23 +18,23 @@ import pLimit from 'p-limit';
 import yargs from 'yargs';
 import { hideBin } from 'yargs/helpers';
 
-import { filterByLineRanges } from '../shared/line-range-filter.js';
-import type { LineRange } from '../shared/line-range-parser.js';
-import { checkRangesOutOfBounds, parseLineRanges, validateLineRanges } from '../shared/line-range-parser.js';
+import { filterByLineRanges } from '../shared/line_range_filter.js';
+import type { LineRange } from '../shared/line_range_parser.js';
+import { checkRangesOutOfBounds, parseLineRanges, validateLineRanges } from '../shared/line_range_parser.js';
 import { DEFAULT_TOOL_TIMEOUT_SECONDS, MODELS } from './config.js';
-import { executeConversation } from './conversation-executor.js';
-import { LlmClient } from './llm-client.js';
-import { McpClient } from './mcp-client.js';
-import type { EvaluationResult } from './output-formatter.js';
-import { formatDetailedResult, formatResultsTable } from './output-formatter.js';
+import { executeConversation } from './conversation_executor.js';
+import { LlmClient } from './llm_client.js';
+import { McpClient } from './mcp_client.js';
+import type { EvaluationResult } from './output_formatter.js';
+import { formatDetailedResult, formatResultsTable } from './output_formatter.js';
 import {
     loadResultsDatabase,
     saveResultsDatabase,
     updateResultsWithEvaluations,
-} from './results-writer.js';
-import type { WorkflowTestCase, WorkflowTestCaseWithLineNumbers } from './test-cases-loader.js';
-import { filterTestCases, loadTestCases, loadTestCasesWithLineNumbers } from './test-cases-loader.js';
-import { evaluateConversation } from './workflow-judge.js';
+} from './results_writer.js';
+import type { WorkflowTestCase, WorkflowTestCaseWithLineNumbers } from './test_cases_loader.js';
+import { filterTestCases, loadTestCases, loadTestCasesWithLineNumbers } from './test_cases_loader.js';
+import { evaluateConversation } from './workflow_judge.js';
 
 type CliArgs = {
     category?: string;

diff --git a/evals/workflows/test-cases.json → evals/workflows/test_cases.json b/evals/workflows/test-cases.json → evals/workflows/test_cases.json
diff --git a/evals/workflows/test-cases-loader.ts → evals/workflows/test_cases_loader.ts b/evals/workflows/test-cases-loader.ts → evals/workflows/test_cases_loader.ts
@@ -6,8 +6,8 @@
 import fs from 'node:fs';
 import path from 'node:path';
 
-import type { TestCaseWithLineNumbers } from '../shared/line-range-filter.js';
-import { filterTestCases as filterTestCasesShared, loadTestCases as loadTestCasesShared } from '../shared/test-case-loader.js';
+import type { TestCaseWithLineNumbers } from '../shared/line_range_filter.js';
+import { filterTestCases as filterTestCasesShared, loadTestCases as loadTestCasesShared } from '../shared/test_case_loader.js';
 import type { WorkflowTestCase } from '../shared/types.js';
 
 // Re-export WorkflowTestCase type for backwards compatibility
@@ -22,7 +22,7 @@ export type WorkflowTestCaseWithLineNumbers = WorkflowTestCase & TestCaseWithLin
  * Load workflow test cases from JSON file with validation
  */
 export function loadTestCases(filePath?: string): WorkflowTestCase[] {
-    const testCasesPath = filePath || path.join(process.cwd(), 'evals/workflows/test-cases.json');
+    const testCasesPath = filePath || path.join(process.cwd(), 'evals/workflows/test_cases.json');
 
     if (!fs.existsSync(testCasesPath)) {
         throw new Error(`Test cases file not found: ${testCasesPath}`);
@@ -89,7 +89,7 @@ export function loadTestCasesWithLineNumbers(filePath?: string): {
     testCases: WorkflowTestCaseWithLineNumbers[];
     totalLines: number;
 } {
-    const testCasesPath = filePath || path.join(process.cwd(), 'evals/workflows/test-cases.json');
+    const testCasesPath = filePath || path.join(process.cwd(), 'evals/workflows/test_cases.json');
 
     if (!fs.existsSync(testCasesPath)) {
         throw new Error(`Test cases file not found: ${testCasesPath}`);

diff --git a/evals/workflows/workflow-judge.ts → evals/workflows/workflow_judge.ts b/evals/workflows/workflow-judge.ts → evals/workflows/workflow_judge.ts
@@ -8,7 +8,7 @@ import type { ResponseFormatJSONSchema } from 'openai/resources/shared';
 
 import type { WorkflowTestCase } from '../shared/types.js';
 import { JUDGE_PROMPT_TEMPLATE, MODELS } from './config.js';
-import type { LlmClient } from './llm-client.js';
+import type { LlmClient } from './llm_client.js';
 import type { ConversationHistory } from './types.js';
 
 /**

diff --git a/package.json b/package.json
@@ -10,8 +10,8 @@
   "main": "dist/index.js",
   "exports": {
     ".": "./dist/index.js",
-    "./internals": "./dist/index-internals.js",
-    "./internals.js": "./dist/index-internals.js",
+    "./internals": "./dist/index_internals.js",
+    "./internals.js": "./dist/index_internals.js",
     "./manifest.json": "./manifest.json"
   },
   "bin": {
@@ -83,7 +83,7 @@
   },
   "scripts": {
     "start": "npm run start:standby",
-    "dev": "node scripts/dev-standby.js",
+    "dev": "node scripts/dev_standby.js",
     "start:standby": "APIFY_META_ORIGIN=\"STANDBY\" tsx src/main.ts",
     "build": "npm run build:core && npm run build:web",
     "build:core": "tsc -b src",
@@ -95,14 +95,14 @@
     "lint:fix": "eslint . --fix",
     "type-check": "tsc -p tsconfig.json --noEmit",
     "check": "npm run type-check && npm run lint",
-    "check:widgets": "tsx scripts/check-widgets.ts",
+    "check:widgets": "tsx scripts/check_widgets.ts",
     "test": "npm run test:unit",
     "test:unit": "vitest run tests/unit",
     "test:integration": "npm run build && vitest run tests/integration",
     "inspector:stdio": "npx @modelcontextprotocol/inspector -e APIFY_TOKEN=$APIFY_TOKEN -- node dist/stdio.js",
-    "evals:create-dataset": "tsx evals/create-dataset.ts",
-    "evals:run": "tsx evals/run-evaluation.ts",
-    "evals:workflow": "npm run build && tsx evals/workflows/run-workflow-evals.ts"
+    "evals:create-dataset": "tsx evals/create_dataset.ts",
+    "evals:run": "tsx evals/run_evaluation.ts",
+    "evals:workflow": "npm run build && tsx evals/workflows/run_workflow_evals.ts"
   },
   "author": "Apify",
   "license": "MIT"

diff --git a/res/index.md b/res/index.md
@@ -12,7 +12,7 @@ Technical analysis of Algolia search API responses for each documentation source
 - Recommendations for response processing logic
 - **Use case**: Understand what data is actually returned by Algolia to inform simplification decisions
 
-### [mcp-server-refactor-analysis.md](./mcp-server-refactor-analysis.md)
+### [mcp_server_refactor_analysis.md](./mcp_server_refactor_analysis.md)
 Implementation plan for migrating from low-level `Server` to high-level `McpServer` API.
 
 **Structure:**
@@ -30,19 +30,35 @@ Implementation plan for migrating from low-level `Server` to high-level `McpServ
 - Testing strategy
 - **Use case**: Reference for implementing the MCP SDK migration
 
-### [mcp-resources-analysis.md](./mcp-resources-analysis.md)
+### [mcp_resources_analysis.md](./mcp_resources_analysis.md)
 Current MCP resources behavior and constraints (Skyfire readme and OpenAI widgets).
 - Handler locations and low-level MCP usage
 - Resource list/read behavior and error handling
 - **Use case**: Baseline reference before refactoring resources
 
-### [mcp-resources-refactor-analysis.md](./mcp-resources-refactor-analysis.md)
+### [mcp_resources_refactor_analysis.md](./mcp_resources_refactor_analysis.md)
 Refactor plan for modularizing existing resource handling (no new resources).
 - Minimal resource service API (list/read/templates)
 - Behavior-preserving steps and non-goals
 - **Use case**: Step-by-step guide for refactoring without behavior change
 
-### [patterns-for-simplification.md](./patterns-for-simplification.md)
+### [tool_mode_separation_plan.md](./tool_mode_separation_plan.md)
+Implementation plan for separating UI-mode (OpenAI) and normal-mode tool behavior into independent modules.
+
+**Key approach:** Actor Executor pattern + separate tool definitions per mode + shared core logic layer.
+
+**Estimated effort:** 6-10 developer days
+
+- Design decisions table (actor-mcp passthrough, Skyfire freeze, task lifecycle, etc.)
+- Three-layer architecture (core → registry → mode-specific tools)
+- Actor Executor pattern for direct actor tools (`type: 'actor'`) mode awareness
+- Tool definition immutability via `Object.freeze` (Skyfire safety)
+- Mode-aware category registry eliminating deep-clone hack
+- 5-phase migration plan with chained PR strategy (7 PRs)
+- Directory structure and complete file manifest with PR assignments
+- **Use case**: Reference for implementing the UI/normal mode tool separation
+
+### [patterns_for_simplification.md](./patterns_for_simplification.md)
 Analysis of patterns from the **official TypeScript MCP SDK** and **FastMCP** framework that could simplify the codebase.
 
 **Key patterns identified:**

diff --git a/res/mcp-resources-analysis.md → res/mcp_resources_analysis.md b/res/mcp-resources-analysis.md → res/mcp_resources_analysis.md
diff --git a/res/mcp-resources-refactor-analysis.md → res/mcp_resources_refactor_analysis.md b/res/mcp-resources-refactor-analysis.md → res/mcp_resources_refactor_analysis.md
diff --git a/res/mcp-server-refactor-analysis.md → res/mcp_server_refactor_analysis.md b/res/mcp-server-refactor-analysis.md → res/mcp_server_refactor_analysis.md
diff --git a/res/patterns-for-simplification.md → res/patterns_for_simplification.md b/res/patterns-for-simplification.md → res/patterns_for_simplification.md