fix: Review large PR (#507)

jirispilka · web-flow · commit 8040d7a9175e · 2026-02-27T09:26:48.000+01:00
* fix: rename `test-cases.json` to `test_cases.json` across repo for consistency

* refactor: set default `ServerMode` to 'default' across functions, enforce explicit handling of empty tool selectors, and freeze tool result objects. Add new tests for mode-specific behavior.
diff --git a/evals/README.md b/evals/README.md
@@ -44,13 +44,13 @@ export OPENROUTER_API_KEY="your_key"
 export OPENROUTER_BASE_URL="https://openrouter.ai/api/v1"
 
 npm ci
-npm run evals:create-dataset  # one-time: creates dataset from test-cases.json
+npm run evals:create-dataset  # one-time: creates dataset from test_cases.json
 npm run evals:run              # runs evaluation on default dataset (v1.4)
 ```
 
 ### Using a specific dataset version
 
-By default, the evaluation uses the dataset version from `test-cases.json` (`v1.4`). To use a different dataset:
+By default, the evaluation uses the dataset version from `test_cases.json` (`v1.4`). To use a different dataset:
 
 ```bash
 # Create a new dataset with custom name
@@ -285,4 +285,3 @@ NOTES:
 // System prompt - instructions mainly cursor (very similar instructions in copilot)
 // https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools/blob/main/Cursor%20Prompts/Agent%20Prompt%20v1.2.txt
 // https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools/blob/main/VSCode%20Agent/Prompt.txt
-
diff --git a/evals/config.ts b/evals/config.ts
@@ -11,10 +11,10 @@ import log from '@apify/log';
 // Re-export shared config
 export { OPENROUTER_CONFIG, sanitizeHeaderValue, validateEnvVars, getRequiredEnvVars } from './shared/config.js';
 
-// Read version from test-cases.json
+// Read the version from test-cases.json
 function getTestCasesVersion(): string {
     const dir = dirname(fileURLToPath(import.meta.url));
-    const raw = readFileSync(join(dir, 'test-cases.json'), 'utf-8');
+    const raw = readFileSync(join(dir, 'test_cases.json'), 'utf-8');
     return JSON.parse(raw).version;
 }
 
@@ -28,7 +28,7 @@ export type EvaluatorName = typeof EVALUATOR_NAMES[keyof typeof EVALUATOR_NAMES]
 
 // Models to evaluate
 // 'openai/gpt-4.1-mini', // DO NOT USE - it has much worse performance than gpt-4o-mini and other models
-// 'openai/gpt-4o-mini',  // Neither used in cursor nor copilot
+// 'openai/gpt-4o-mini', // Neither used in cursor nor copilot
 // 'openai/gpt-4.1',
 export const MODELS_TO_EVALUATE = [
     'anthropic/claude-haiku-4.5',
diff --git a/evals/workflows/README.md b/evals/workflows/README.md
@@ -32,7 +32,7 @@ npm run evals:workflow -- --category search
 # Run specific test
 npm run evals:workflow -- --id search-google-maps
 
-# Filter by line range in test-cases.json
+# Filter by line range in test_cases.json
 npm run evals:workflow -- --lines 277-283
 
 # Show detailed conversation logs
@@ -242,7 +242,7 @@ export OPENROUTER_API_KEY="your_openrouter_key" # Get from https://openrouter.ai
 | `--id <id>` | | Run specific test by ID | All tests |
 | `--lines <range>` | `-l` | Filter by line range in test-cases.json | All tests |
 | `--verbose` | | Show detailed conversation logs | `false` |
-| `--test-cases-path <path>` | | Custom test cases file path | `test-cases.json` |
+| `--test-cases-path <path>` | | Custom test cases file path | `test_cases.json` |
 | `--agent-model <model>` | | Override agent model | `anthropic/claude-haiku-4.5` |
 | `--judge-model <model>` | | Override judge model | `x-ai/grok-4.1-fast` |
 | `--tool-timeout <seconds>` | | Tool call timeout | `60` |
@@ -252,7 +252,7 @@ export OPENROUTER_API_KEY="your_openrouter_key" # Get from https://openrouter.ai
 
 ### Line Range Filtering
 
-The `--lines` (or `-l`) option filters test cases by their line numbers in the `test-cases.json` file.
+The `--lines` (or `-l`) option filters test cases by their line numbers in the `test_cases.json` file.
 
 **Format options:**
 - **Single line:** `--lines 100` (includes tests that contain line 100)
diff --git a/evals/workflows/test_cases_loader.ts b/evals/workflows/test_cases_loader.ts
@@ -22,7 +22,7 @@ export type WorkflowTestCaseWithLineNumbers = WorkflowTestCase & TestCaseWithLin
  * Load workflow test cases from JSON file with validation
  */
 export function loadTestCases(filePath?: string): WorkflowTestCase[] {
-    const testCasesPath = filePath || path.join(process.cwd(), 'evals/workflows/test-cases.json');
+    const testCasesPath = filePath || path.join(process.cwd(), 'evals/workflows/test_cases.json');
 
     if (!fs.existsSync(testCasesPath)) {
         throw new Error(`Test cases file not found: ${testCasesPath}`);
@@ -89,7 +89,7 @@ export function loadTestCasesWithLineNumbers(filePath?: string): {
     testCases: WorkflowTestCaseWithLineNumbers[];
     totalLines: number;
 } {
-    const testCasesPath = filePath || path.join(process.cwd(), 'evals/workflows/test-cases.json');
+    const testCasesPath = filePath || path.join(process.cwd(), 'evals/workflows/test_cases.json');
 
     if (!fs.existsSync(testCasesPath)) {
         throw new Error(`Test cases file not found: ${testCasesPath}`);
diff --git a/src/mcp/utils.ts b/src/mcp/utils.ts
@@ -42,8 +42,14 @@ export function getProxyMCPServerToolName(url: string, toolName: string): string
  * @param url The URL to process
  * @param apifyClient The Apify client instance
  * @param mode Server mode for tool variant resolution
+ * @param actorStore
  */
-export async function processParamsGetTools(url: string, apifyClient: ApifyClient, mode: ServerMode, actorStore?: ActorStore) {
+export async function processParamsGetTools(
+    url: string,
+    apifyClient: ApifyClient,
+    mode: ServerMode = 'default',
+    actorStore?: ActorStore,
+) {
     const input = parseInputParamsFromUrl(url);
     return await loadToolsFromInput(input, apifyClient, mode, actorStore);
 }
diff --git a/src/resources/resource_service.ts b/src/resources/resource_service.ts
@@ -23,12 +23,12 @@ type ResourceService = {
 
 type ResourceServiceOptions = {
     skyfireMode?: boolean;
-    mode: ServerMode;
+    mode?: ServerMode;
     getAvailableWidgets: () => Map<string, AvailableWidget>;
 };
 
 export function createResourceService(options: ResourceServiceOptions): ResourceService {
-    const { skyfireMode, mode, getAvailableWidgets } = options;
+    const { skyfireMode, mode = 'default', getAvailableWidgets } = options;
 
     const listResources = async (): Promise<ListResourcesResult> => {
         const resources: Resource[] = [];
diff --git a/src/tools/categories.ts b/src/tools/categories.ts
@@ -146,7 +146,7 @@ function resolveCategoryEntries(entries: readonly CategoryToolEntry[], mode: Ser
  * @param mode - Required. Use `'default'` or `'openai'`.
  *   Made explicit (no default value) to prevent accidentally serving wrong-mode tools.
  */
-export function getCategoryTools(mode: ServerMode): ToolCategoryMap {
+export function getCategoryTools(mode: ServerMode = 'default'): ToolCategoryMap {
     return Object.fromEntries(
         CATEGORY_NAMES.map((name) => [name, resolveCategoryEntries(toolCategories[name], mode)]),
     ) as ToolCategoryMap;
diff --git a/src/tools/common/add_actor.ts b/src/tools/common/add_actor.ts
@@ -50,7 +50,7 @@ USAGE EXAMPLES:
         const tools = await apifyMcpServer.loadActorsAsTools([parsed.actor], apifyClient);
         /**
          * If no tools were found, return a message that the Actor was not found
-         * instead of returning that non existent tool was added since the
+         * instead of returning that non-existent tool was added since the
          * loadActorsAsTools method returns an empty array and does not throw an error.
          */
         if (tools.length === 0) {
diff --git a/src/tools/core/call_actor_common.ts b/src/tools/core/call_actor_common.ts
@@ -52,7 +52,7 @@ For MCP server Actors, use format "actorName:toolName" to call a specific tool (
         .describe('The input JSON to pass to the Actor. Required.'),
     async: z.boolean()
         .optional()
-        .describe(`When true: starts the run and returns immediately with runId. When false or not provided: waits for completion and returns results immediately. Default: true when UI mode is enabled (enforced), false otherwise. IMPORTANT: Only set async to true if the user explicitly asks to run the Actor in the background or does not need immediate results. When the user asks for data or results, always use async: false (default) so the results are returned immediately.`),
+        .describe(`When true, starts the run and returns immediately with runId. When false or omitted, behavior depends on the active server mode/tool variant. IMPORTANT: use async=true only when the user explicitly asks to run in the background or does not need immediate results.`),
     previewOutput: z.boolean()
         .optional()
         .describe('When true (default): includes preview items. When false: metadata only (reduces context). Use when fetching fields via get-actor-output.'),
diff --git a/src/tools/index.ts b/src/tools/index.ts
@@ -20,7 +20,7 @@ export { CATEGORY_NAME_SET, CATEGORY_NAMES, getCategoryTools, toolCategories, to
  * Returns the tool entries for the default-enabled categories resolved for the given mode.
  * Computed here (not in helper file) to avoid module initialization issues.
  */
-export function getDefaultTools(mode: ServerMode): ToolEntry[] {
+export function getDefaultTools(mode: ServerMode = 'default'): ToolEntry[] {
     return getExpectedToolsByCategories(toolCategoriesEnabledByDefault, mode);
 }
 
diff --git a/src/tools/openai/call_actor.ts b/src/tools/openai/call_actor.ts
@@ -130,7 +130,7 @@ Do NOT proactively poll using ${HelperTools.ACTOR_RUNS_GET}. Wait for the widget
             return buildMCPResponse({
                 texts: [`Failed to call Actor '${baseActorName}': ${error instanceof Error ? error.message : String(error)}.
 Please verify the Actor name, input parameters, and ensure the Actor exists.
-You can search for available Actors using the tool: ${HelperTools.STORE_SEARCH}, or get Actor details using: ${HelperTools.ACTOR_GET_DETAILS}.`],
+You can search for available Actors using the tool: ${HelperTools.STORE_SEARCH_INTERNAL}, or get Actor details using: ${HelperTools.ACTOR_GET_DETAILS_INTERNAL}.`],
                 isError: true,
             });
         }
diff --git a/src/utils/server-instructions/index.ts b/src/utils/server-instructions/index.ts
@@ -16,6 +16,6 @@ const instructionsByMode: Record<ServerMode, () => string> = {
 /**
  * Build server instructions for the given server mode.
  */
-export function getServerInstructions(mode: ServerMode): string {
+export function getServerInstructions(mode: ServerMode = 'default'): string {
     return instructionsByMode[mode]();
 }
diff --git a/src/utils/tools.ts b/src/utils/tools.ts
@@ -110,5 +110,5 @@ export function applySkyfireAugmentation(tool: ToolEntry): ToolEntry {
         }
     }
 
-    return cloned;
+    return Object.freeze(cloned);
 }
diff --git a/src/utils/tools_loader.ts b/src/utils/tools_loader.ts
@@ -45,12 +45,13 @@ function getAllInternalToolNames(): Set<string> {
  * @param input The processed Input object
  * @param apifyClient The Apify client instance
  * @param mode Server mode for tool variant resolution
+ * @param actorStore
  * @returns An array of tool entries
  */
 export async function loadToolsFromInput(
     input: Input,
     apifyClient: ApifyClient,
-    mode: ServerMode,
+    mode: ServerMode = 'default',
     actorStore?: ActorStore,
 ): Promise<ToolEntry[]> {
     // Build mode-resolved categories — tools are already the correct variant for this mode
@@ -70,6 +71,7 @@ export async function loadToolsFromInput(
     const selectorsExplicitEmpty = selectorsProvided && (selectors as string[]).length === 0;
     const addActorEnabled = input.enableAddingActors === true;
     const actorsExplicitlyEmpty = (Array.isArray(input.actors) && input.actors.length === 0) || input.actors === '';
+    const explicitlyNoToolsRequested = selectorsExplicitEmpty || actorsExplicitlyEmpty;
 
     // Build mode-specific tool-by-name map for individual tool selection
     const modeToolByName = new Map<string, ToolEntry>();
@@ -157,7 +159,7 @@ export async function loadToolsFromInput(
     }
 
     // In openai mode, unconditionally add UI-specific tools (regardless of selectors)
-    if (mode === 'openai') {
+    if (mode === 'openai' && !explicitlyNoToolsRequested) {
         result.push(...categories.ui);
     }
 
@@ -181,7 +183,7 @@ export async function loadToolsFromInput(
     const hasGetActorOutput = result.some((entry) => entry.name === HelperTools.ACTOR_OUTPUT_GET);
 
     const toolsToInject: ToolEntry[] = [];
-    if (!hasGetActorRun && (hasCallActor || mode === 'openai')) {
+    if (!hasGetActorRun && (hasCallActor || (mode === 'openai' && !explicitlyNoToolsRequested))) {
         // Use mode-resolved get-actor-run variant
         const modeGetActorRun = modeToolByName.get(HelperTools.ACTOR_RUNS_GET);
         if (modeGetActorRun) toolsToInject.push(modeGetActorRun);
diff --git a/tests/unit/tools.mode_contract.test.ts b/tests/unit/tools.mode_contract.test.ts
@@ -125,6 +125,16 @@ describe('getCategoryTools mode contract (tool-mode separation)', () => {
         }
     });
 
+    describe('mode-specific call-actor behavior guidance', () => {
+        it('should document that openai call-actor always runs asynchronously', () => {
+            const openaiCallActor = openaiCategories.actors.find((t) => t.name === HelperTools.ACTOR_CALL);
+
+            expect(openaiCallActor).toBeDefined();
+            expect(openaiCallActor!.description).toContain('always runs asynchronously');
+            expect(openaiCallActor!.description).toContain('do NOT poll or call any other tool');
+        });
+    });
+
     describe('tool definitions are frozen', () => {
         for (const mode of SERVER_MODES) {
             const categories = getCategoryTools(mode);
diff --git a/tests/unit/tools.skyfire.test.ts b/tests/unit/tools.skyfire.test.ts
@@ -157,6 +157,7 @@ describe('applySkyfireAugmentation', () => {
                 type: 'string',
                 description: SKYFIRE_PAY_ID_PROPERTY_DESCRIPTION,
             });
+            expect(Object.isFrozen(result)).toBe(true);
         });
 
         // Test each SKYFIRE_ENABLED_TOOLS member
@@ -181,6 +182,7 @@ describe('applySkyfireAugmentation', () => {
 
             const props = result.inputSchema.properties as Record<string, unknown>;
             expect(props['skyfire-pay-id']).toBeDefined();
+            expect(Object.isFrozen(result)).toBe(true);
         });
     });
 
diff --git a/tests/unit/utils.tools_loader.test.ts b/tests/unit/utils.tools_loader.test.ts
@@ -0,0 +1,38 @@
+import { ApifyClient } from 'apify';
+import { describe, expect, it } from 'vitest';
+
+import { HelperTools } from '../../src/const.js';
+import { loadToolsFromInput } from '../../src/utils/tools_loader.js';
+
+describe('loadToolsFromInput explicit-empty semantics', () => {
+    const apifyClient = new ApifyClient({ token: 'test-token' });
+
+    it('should not auto-add openai ui tools when tools are explicitly empty', async () => {
+        const tools = await loadToolsFromInput({
+            tools: [],
+        }, apifyClient, 'openai');
+
+        expect(tools).toHaveLength(0);
+    });
+
+    it('should not auto-add openai ui tools when actors are explicitly empty', async () => {
+        const tools = await loadToolsFromInput({
+            actors: [],
+        }, apifyClient, 'openai');
+
+        expect(tools).toHaveLength(0);
+    });
+
+    it('should keep openai ui tools and get-actor-run for non-empty selectors', async () => {
+        const tools = await loadToolsFromInput({
+            tools: ['docs'],
+        }, apifyClient, 'openai');
+
+        const toolNames = tools.map((tool) => tool.name);
+        expect(toolNames).toContain(HelperTools.DOCS_SEARCH);
+        expect(toolNames).toContain(HelperTools.DOCS_FETCH);
+        expect(toolNames).toContain(HelperTools.STORE_SEARCH_INTERNAL);
+        expect(toolNames).toContain(HelperTools.ACTOR_GET_DETAILS_INTERNAL);
+        expect(toolNames).toContain(HelperTools.ACTOR_RUNS_GET);
+    });
+});

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ export { CATEGORY_NAME_SET, CATEGORY_NAMES, getCategoryTools, toolCategories, to`
`20`	`20`	`* Returns the tool entries for the default-enabled categories resolved for the given mode.`
`21`	`21`	`* Computed here (not in helper file) to avoid module initialization issues.`
`22`	`22`	`*/`
`23`		`-export function getDefaultTools(mode: ServerMode): ToolEntry[] {`
	`23`	`+export function getDefaultTools(mode: ServerMode = 'default'): ToolEntry[] {`
`24`	`24`	`return getExpectedToolsByCategories(toolCategoriesEnabledByDefault, mode);`
`25`	`25`	`}`
`26`	`26`