Skip to content

Commit fd73365

Browse files
committed
fix(mcp): resolve bugs, add tool annotations, and new UIA tools
Bug fixes: - Use top-level fs import instead of inline require() in appium-manager - Fix spawn race: replace setImmediate with child.once('spawn') (Node ≥15.1) - Fix CSS selector bug: use driver.findElement/findElements/findElementFromElement protocol calls directly; 'id' aliased to 'accessibility id' as documented - Fix appium:waitForAppLaunch capability (drop incorrect ms: infix) - Fix get_clipboard to pass { contentType } object, not bare string - Fix get_window_element to throw on unexpected response instead of String(result) - Fix get_attribute returning the string "null" for absent attributes (now "") - Remove dead ?? 1500 fallback (Zod default always fills the value) Quality: - Extract shared ELEMENT_KEY to lib/mcp/constants.ts - Return image content type from take_screenshot (MCP image rendering) - Move maximize/minimize/restore/close_window to window.ts (out of patterns.ts) - Add readOnlyHint/destructiveHint/idempotentHint annotations to all tools New tools: wait_for_element, get_toggle_state, focus_element, select_item
1 parent cf3d464 commit fd73365

File tree

18 files changed

+439
-149
lines changed

18 files changed

+439
-149
lines changed

lib/mcp/appium-manager.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import * as fs from 'node:fs';
12
import * as http from 'node:http';
23
import * as path from 'node:path';
34
import { spawn, type ChildProcess } from 'node:child_process';
@@ -52,7 +53,7 @@ function resolveAppiumBinary(configBinary?: string): string {
5253

5354
// Prefer a local node_modules/.bin/appium (3 levels up from build/lib/mcp/)
5455
const localBin = path.resolve(__dirname, '..', '..', '..', 'node_modules', '.bin', 'appium');
55-
if (require('node:fs').existsSync(localBin)) {return localBin;}
56+
if (fs.existsSync(localBin)) {return localBin;}
5657

5758
// Fall back to appium on the system PATH (global install: npm install -g appium)
5859
return 'appium';
@@ -86,7 +87,10 @@ export class AppiumManager {
8687
shell: process.platform === 'win32',
8788
});
8889

89-
// Attach error handler immediately to prevent unhandled error crash
90+
// Wait for either a successful spawn or an early error.
91+
// The 'spawn' event (Node ≥ 15.1) fires only after the OS has confirmed the
92+
// process started; 'error' fires if it never starts (binary not found, etc.).
93+
// Using setImmediate here would be a race — resolve could win before 'error' fires.
9094
await new Promise<void>((resolve, reject) => {
9195
child.once('error', (err) => {
9296
reject(new Error(
@@ -95,8 +99,7 @@ export class AppiumManager {
9599
`Or set APPIUM_BINARY to the full path of the appium executable.`
96100
));
97101
});
98-
// If no error fires synchronously, we're past the spawn phase
99-
setImmediate(resolve);
102+
child.once('spawn', resolve);
100103
});
101104

102105
this.process = child;

lib/mcp/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/** W3C WebDriver element reference key */
2+
export const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf';

lib/mcp/session.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ export class AppiumSession {
3535

3636
if (params.appArguments !== undefined) {caps['appium:appArguments'] = params.appArguments;}
3737
if (params.appWorkingDir !== undefined) {caps['appium:appWorkingDir'] = params.appWorkingDir;}
38-
if (params.waitForAppLaunch !== undefined) {caps['appium:ms:waitForAppLaunch'] = params.waitForAppLaunch;}
38+
if (params.waitForAppLaunch !== undefined) {caps['appium:waitForAppLaunch'] = params.waitForAppLaunch;}
3939
if (params.shouldCloseApp !== undefined) {caps['appium:shouldCloseApp'] = params.shouldCloseApp;}
4040
if (params.delayAfterClick !== undefined) {caps['appium:delayAfterClick'] = params.delayAfterClick;}
4141
if (params.delayBeforeClick !== undefined) {caps['appium:delayBeforeClick'] = params.delayBeforeClick;}
@@ -48,7 +48,7 @@ export class AppiumSession {
4848
capabilities: caps as WebdriverIO.Capabilities,
4949
});
5050

51-
await this.driver.setTimeout({ implicit: params.implicitTimeout ?? 1500 });
51+
await this.driver.setTimeout({ implicit: params.implicitTimeout });
5252
process.stderr.write('[MCP] Session created successfully\n');
5353
}
5454

lib/mcp/tools/advanced.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export function registerAdvancedTools(server: McpServer, session: AppiumSession)
1010
'advanced_click',
1111
{
1212
description: 'Perform an advanced click with modifier keys, multiple clicks, or custom duration. Use this for right-click, Ctrl+click, double-click, etc.',
13+
annotations: { destructiveHint: false },
1314
inputSchema: {
1415
elementId: z.string().optional().describe('Element to click (its center). Provide either elementId or x+y.'),
1516
x: z.number().int().optional().describe('Absolute screen x coordinate'),
@@ -36,6 +37,7 @@ export function registerAdvancedTools(server: McpServer, session: AppiumSession)
3637
'send_keys',
3738
{
3839
description: 'Send keyboard input. Each action can be a pause (ms delay), text to type, or a virtual key code press/release.',
40+
annotations: { destructiveHint: false },
3941
inputSchema: {
4042
actions: z.array(z.object({
4143
pause: z.number().int().optional().describe('Pause in milliseconds'),

lib/mcp/tools/app.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@ export function registerAppTools(server: McpServer, session: AppiumSession): voi
77
'get_window_element',
88
{
99
description: 'Get the root UI element of the current app window. Returns an element ID that represents the top-level window.',
10+
annotations: { readOnlyHint: true },
1011
},
1112
async () => {
1213
try {
1314
const driver = session.getDriver();
1415
const result = await driver.executeScript('windows: getWindowElement', [{}]);
15-
const elementId = (result as Record<string, string>)['element-6066-11e4-a52e-4f735466cecf']
16-
?? (result as Record<string, string>).ELEMENT
17-
?? String(result);
16+
const ref = result as Record<string, string>;
17+
const elementId = ref['element-6066-11e4-a52e-4f735466cecf'] ?? ref.ELEMENT;
18+
if (!elementId) {
19+
throw new Error(`windows: getWindowElement returned unexpected value: ${JSON.stringify(result)}`);
20+
}
1821
return { content: [{ type: 'text' as const, text: elementId }] };
1922
} catch (err) {
2023
return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] };
@@ -26,6 +29,7 @@ export function registerAppTools(server: McpServer, session: AppiumSession): voi
2629
'launch_app',
2730
{
2831
description: 'Launch the application configured for this session (re-launch if it was closed).',
32+
annotations: { destructiveHint: false },
2933
},
3034
async () => {
3135
try {
@@ -42,6 +46,7 @@ export function registerAppTools(server: McpServer, session: AppiumSession): voi
4246
'close_app',
4347
{
4448
description: 'Close the application under test without ending the session.',
49+
annotations: { destructiveHint: true },
4550
},
4651
async () => {
4752
try {
@@ -58,6 +63,7 @@ export function registerAppTools(server: McpServer, session: AppiumSession): voi
5863
'get_device_time',
5964
{
6065
description: 'Get the current date/time on the Windows device.',
66+
annotations: { readOnlyHint: true },
6167
},
6268
async () => {
6369
try {

lib/mcp/tools/clipboard.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ export function registerClipboardTools(server: McpServer, session: AppiumSession
1313
inputSchema: {
1414
contentType: contentTypeSchema.describe('"plaintext" for text, "image" for image content'),
1515
},
16+
annotations: { readOnlyHint: true },
1617
},
1718
async ({ contentType }) => {
1819
try {
1920
const driver = session.getDriver();
20-
const result = await driver.executeScript('windows: getClipboard', [contentType]);
21+
const result = await driver.executeScript('windows: getClipboard', [{ contentType }]);
2122
return { content: [{ type: 'text' as const, text: String(result) }] };
2223
} catch (err) {
2324
return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] };

lib/mcp/tools/find.ts

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,15 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
22
import { z } from 'zod';
33
import type { AppiumSession } from '../session.js';
44
import { formatError } from '../errors.js';
5+
import { ELEMENT_KEY } from '../constants.js';
56

6-
const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf';
77
const STRATEGIES = ['accessibility id', 'name', 'id', 'xpath', 'class name', 'tag name', '-windows uiautomation'] as const;
88
type Strategy = typeof STRATEGIES[number];
99

10-
function buildSelector(strategy: Strategy, selector: string): string {
11-
switch (strategy) {
12-
case 'accessibility id': return `~${selector}`;
13-
case 'xpath': return selector;
14-
case 'tag name': return `//${selector}`;
15-
case 'id': return `#${selector}`;
16-
case 'class name': return `.${selector}`;
17-
case 'name': return `*[name="${selector}"]`;
18-
case '-windows uiautomation': return selector;
19-
}
10+
// 'id' is documented as an alias for 'accessibility id' (UIA AutomationId).
11+
// All other strategies are forwarded verbatim to the Appium/WinAppDriver protocol.
12+
function resolveStrategy(strategy: Strategy): string {
13+
return strategy === 'id' ? 'accessibility id' : strategy;
2014
}
2115

2216
const STRATEGY_DESCRIPTIONS: Record<Strategy, string> = {
@@ -52,15 +46,13 @@ export function registerFindTools(server: McpServer, session: AppiumSession): vo
5246
),
5347
selector: z.string().min(1).describe('The selector value for the chosen strategy'),
5448
},
49+
annotations: { readOnlyHint: true },
5550
},
5651
async ({ strategy, selector }) => {
5752
try {
5853
const driver = session.getDriver();
59-
const el = await driver.$(buildSelector(strategy as Strategy, selector));
60-
if (!await el.isExisting()) {
61-
return { isError: true, content: [{ type: 'text' as const, text: `Element not found: ${strategy}="${selector}"` }] };
62-
}
63-
return { content: [{ type: 'text' as const, text: await el.elementId }] };
54+
const rawEl = await driver.findElement(resolveStrategy(strategy as Strategy), selector);
55+
return { content: [{ type: 'text' as const, text: rawEl[ELEMENT_KEY] }] };
6456
} catch (err) {
6557
return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] };
6658
}
@@ -78,15 +70,13 @@ export function registerFindTools(server: McpServer, session: AppiumSession): vo
7870
),
7971
selector: z.string().min(1).describe('The selector value for the chosen strategy'),
8072
},
73+
annotations: { readOnlyHint: true },
8174
},
8275
async ({ strategy, selector }) => {
8376
try {
8477
const driver = session.getDriver();
85-
const els = await driver.$$(buildSelector(strategy as Strategy, selector));
86-
const ids: string[] = [];
87-
for (const el of els) {
88-
ids.push(await el.elementId);
89-
}
78+
const rawEls = await driver.findElements(resolveStrategy(strategy as Strategy), selector);
79+
const ids = rawEls.map((el) => el[ELEMENT_KEY]);
9080
return { content: [{ type: 'text' as const, text: JSON.stringify(ids) }] };
9181
} catch (err) {
9282
return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] };
@@ -106,19 +96,57 @@ export function registerFindTools(server: McpServer, session: AppiumSession): vo
10696
),
10797
selector: z.string().min(1).describe('The selector value for the chosen strategy'),
10898
},
99+
annotations: { readOnlyHint: true },
109100
},
110101
async ({ parentElementId, strategy, selector }) => {
111102
try {
112103
const driver = session.getDriver();
113-
const parent = await driver.$({ [ELEMENT_KEY]: parentElementId });
114-
const el = await parent.$(buildSelector(strategy as Strategy, selector));
115-
if (!await el.isExisting()) {
116-
return { isError: true, content: [{ type: 'text' as const, text: `Child element not found: ${strategy}="${selector}"` }] };
117-
}
118-
return { content: [{ type: 'text' as const, text: await el.elementId }] };
104+
const rawEl = await driver.findElementFromElement(
105+
parentElementId,
106+
resolveStrategy(strategy as Strategy),
107+
selector
108+
);
109+
return { content: [{ type: 'text' as const, text: rawEl[ELEMENT_KEY] }] };
119110
} catch (err) {
120111
return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] };
121112
}
122113
}
123114
);
115+
116+
server.registerTool(
117+
'wait_for_element',
118+
{
119+
description: `Wait for a UI element to appear within a configurable timeout, then return its element ID. Useful after dialog opens, page transitions, or loading spinners disappear. ${FIND_STRATEGY_PRIORITY}`,
120+
inputSchema: {
121+
strategy: StrategyEnum.describe(
122+
'Locator strategy. ' +
123+
Object.entries(STRATEGY_DESCRIPTIONS).map(([k, v]) => `"${k}": ${v}`).join(' | ')
124+
),
125+
selector: z.string().min(1).describe('The selector value for the chosen strategy'),
126+
timeoutMs: z.number().int().min(0).default(5000).describe('Maximum time in milliseconds to wait for the element'),
127+
pollIntervalMs: z.number().int().min(50).default(200).describe('How often to retry in milliseconds'),
128+
},
129+
annotations: { readOnlyHint: true },
130+
},
131+
async ({ strategy, selector, timeoutMs, pollIntervalMs }) => {
132+
const driver = session.getDriver();
133+
const effectiveStrategy = resolveStrategy(strategy as Strategy);
134+
const deadline = Date.now() + timeoutMs;
135+
// eslint-disable-next-line no-constant-condition
136+
while (true) {
137+
try {
138+
const rawEl = await driver.findElement(effectiveStrategy, selector);
139+
return { content: [{ type: 'text' as const, text: rawEl[ELEMENT_KEY] }] };
140+
} catch {
141+
if (Date.now() >= deadline) {
142+
return {
143+
isError: true,
144+
content: [{ type: 'text' as const, text: `Element not found within ${timeoutMs}ms: ${strategy}="${selector}"` }],
145+
};
146+
}
147+
await new Promise((r) => setTimeout(r, pollIntervalMs));
148+
}
149+
}
150+
}
151+
);
124152
}

lib/mcp/tools/inspect.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
22
import { z } from 'zod';
33
import type { AppiumSession } from '../session.js';
44
import { formatError } from '../errors.js';
5-
6-
const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf';
5+
import { ELEMENT_KEY } from '../constants.js';
76

87
interface SuggestedSelector {
98
strategy: string;
@@ -101,6 +100,7 @@ export function registerInspectTools(server: McpServer, session: AppiumSession):
101100
inputSchema: {
102101
elementId: z.string().min(1).describe('Element ID returned by find_element'),
103102
},
103+
annotations: { readOnlyHint: true },
104104
},
105105
async ({ elementId }) => {
106106
try {

lib/mcp/tools/interact.ts

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,17 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
22
import { z } from 'zod';
33
import type { AppiumSession } from '../session.js';
44
import { formatError } from '../errors.js';
5+
import { ELEMENT_KEY } from '../constants.js';
56

67
const elementIdSchema = z.string().min(1).describe('Element ID returned by find_element');
7-
const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf';
88

99
export function registerInteractTools(server: McpServer, session: AppiumSession): void {
1010
server.registerTool(
1111
'click_element',
1212
{
1313
description: 'Click a UI element by its element ID.',
1414
inputSchema: { elementId: elementIdSchema },
15+
annotations: { destructiveHint: false },
1516
},
1617
async ({ elementId }) => {
1718
try {
@@ -33,6 +34,7 @@ export function registerInteractTools(server: McpServer, session: AppiumSession)
3334
elementId: elementIdSchema,
3435
value: z.string().describe('The text value to set'),
3536
},
37+
annotations: { destructiveHint: false },
3638
},
3739
async ({ elementId, value }) => {
3840
try {
@@ -51,6 +53,7 @@ export function registerInteractTools(server: McpServer, session: AppiumSession)
5153
{
5254
description: 'Clear the text content of an input element.',
5355
inputSchema: { elementId: elementIdSchema },
56+
annotations: { destructiveHint: false },
5457
},
5558
async ({ elementId }) => {
5659
try {
@@ -69,6 +72,7 @@ export function registerInteractTools(server: McpServer, session: AppiumSession)
6972
{
7073
description: 'Get the visible text content of a UI element.',
7174
inputSchema: { elementId: elementIdSchema },
75+
annotations: { readOnlyHint: true },
7276
},
7377
async ({ elementId }) => {
7478
try {
@@ -85,18 +89,19 @@ export function registerInteractTools(server: McpServer, session: AppiumSession)
8589
server.registerTool(
8690
'get_attribute',
8791
{
88-
description: 'Get an attribute or property of a UI element. Common attributes: Name, AutomationId, ClassName, IsEnabled, IsOffscreen, ControlType, Value.Value.',
92+
description: 'Get an attribute or property of a UI element. Common attributes: Name, AutomationId, ClassName, IsEnabled, IsOffscreen, ControlType, Value.Value. Returns an empty string when the attribute is absent.',
8993
inputSchema: {
9094
elementId: elementIdSchema,
9195
attribute: z.string().min(1).describe('Attribute name, e.g. "Name", "IsEnabled", "ControlType"'),
9296
},
97+
annotations: { readOnlyHint: true },
9398
},
9499
async ({ elementId, attribute }) => {
95100
try {
96101
const driver = session.getDriver();
97102
const el = await driver.$({ [ELEMENT_KEY]: elementId });
98103
const value = await el.getAttribute(attribute);
99-
return { content: [{ type: 'text' as const, text: value ?? 'null' }] };
104+
return { content: [{ type: 'text' as const, text: value ?? '' }] };
100105
} catch (err) {
101106
return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] };
102107
}
@@ -108,6 +113,7 @@ export function registerInteractTools(server: McpServer, session: AppiumSession)
108113
{
109114
description: 'Check whether a UI element is visible on screen (not off-screen).',
110115
inputSchema: { elementId: elementIdSchema },
116+
annotations: { readOnlyHint: true },
111117
},
112118
async ({ elementId }) => {
113119
try {
@@ -126,6 +132,7 @@ export function registerInteractTools(server: McpServer, session: AppiumSession)
126132
{
127133
description: 'Check whether a UI element is enabled and interactable.',
128134
inputSchema: { elementId: elementIdSchema },
135+
annotations: { readOnlyHint: true },
129136
},
130137
async ({ elementId }) => {
131138
try {

0 commit comments

Comments
 (0)