diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 747dee3..1f9e2a4 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,5 @@ +# + ## Proposed changes Describe the big picture of your changes here to communicate to the maintainers why we should accept this pull request. If it fixes a bug or resolves a feature request, be sure to link to that issue. diff --git a/.github/claude-review-guide.md b/.github/claude-review-guide.md new file mode 100644 index 0000000..f727eaa --- /dev/null +++ b/.github/claude-review-guide.md @@ -0,0 +1,78 @@ +# Claude Review Guide — appium-desktop-driver + +## Project Context + +This is a **Windows desktop UI automation Appium driver** (`Appium Desktop`). It bridges WebDriver protocol to Windows UI Automation (UIA3) via a persistent PowerShell process. An MCP server layered on top exposes tools for AI agent use. + +Key stack: TypeScript, Node.js, Appium BaseDriver, PowerShell, koffi FFI (user32.dll), WebdriverIO (MCP client). + +--- + +## Severity Format + +Use this format for findings: + +``` +[BLOCKER] — Must be fixed before merge. Security or correctness issue. +[HIGH] — Significant bug or reliability issue; should be fixed. +[MEDIUM] — Non-critical issue worth addressing. +[LOW] — Minor style, naming, or improvement suggestion. +[INFO] — Observation or question, no action required. +``` + +--- + +## Security Checklist + +### PowerShell Injection +- [ ] User-supplied strings (capability values, element attributes, script arguments) are **never** interpolated raw into PowerShell strings +- [ ] `executeScript` payloads that build PS commands use proper escaping or parameterized construction +- [ ] Capability values used in pre/postrun scripts are validated and sanitized + +### FFI / native bindings +- [ ] `user32.dll` calls in `lib/winapi/user32.ts` validate coordinate ranges and handle types before passing to native +- [ ] koffi struct definitions match actual Windows API signatures + +### Secrets & credentials +- [ ] No API keys, tokens, or passwords in source code or test fixtures +- [ ] Capability values for app launch do not log sensitive data + +--- + +## Testing Standards + +- Unit tests live in `test/` and use **Vitest** +- New utility functions in `lib/` should have corresponding unit tests +- PowerShell condition builders (`lib/powershell/conditions.ts`, `converter.ts`) and XPath evaluator (`lib/xpath/`) are well-covered — changes here need tests +- E2E tests require a real Windows environment; don't flag missing E2E coverage for pure logic changes + +--- + +## Architecture Rules + +### Session lifecycle +- `createSession()` must start the PowerShell process cleanly +- `deleteSession()` must kill the PS process and clear all session state (element cache, capabilities) +- Any async work initiated during session must be awaited or cancelled on teardown + +### Element handles +- Element IDs are ephemeral — they map to live UIA3 elements that can become stale +- Code that caches element references must handle `ElementNotFound` / stale element gracefully + +### Command routing +- All new driver commands must be exported from `lib/commands/index.ts` and follow the existing mixin pattern +- MCP tools in `lib/mcp/tools/` must map cleanly to existing driver commands — avoid duplicating logic + +### Error handling +- Driver errors must be wrapped in Appium error classes (e.g., `NoSuchElementError`, `InvalidArgumentError`) +- Raw PowerShell stderr should not be surfaced verbatim to the WebDriver client +- MCP tool errors should return structured error responses, not throw + +--- + +## Code Style + +- TypeScript strict mode is on — no `any` unless unavoidable and justified +- Prefer `async/await` over raw Promise chains +- `@/` path alias resolves to `lib/` — use it for imports within the library +- Avoid adding unnecessary abstraction layers for single-use logic diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 0000000..d300267 --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,50 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + # claude_args: '--allowed-tools Bash(gh pr:*)' + diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4ae7014..756daa7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,7 +5,6 @@ on: push: branches: - main - - develop permissions: contents: write diff --git a/.gitignore b/.gitignore index d6d8ebf..f90e701 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Claude Code local settings +.claude/ + # Logs logs *.log diff --git a/.mcp.json b/.mcp.json new file mode 100644 index 0000000..9ed0677 --- /dev/null +++ b/.mcp.json @@ -0,0 +1,9 @@ +{ + "mcpServers": { + "desktop-driver-mcp": { + "command": "node", + "args": ["./build/lib/mcp/index.js"], + "env": {} + } + } +} diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..34b2626 --- /dev/null +++ b/.npmignore @@ -0,0 +1,32 @@ +# Source files +lib/ +test/ +examples/ + +# Config files +tsconfig.json +eslint.config.mjs +vitest.config.ts +vitest.e2e.config.ts +.releaserc +.npmrc + +# CI/CD +.github/ + +# IDE +.vscode/ +.claude/ + +# Dev tool config +.mcp.json + +# Build artifacts +build/tsconfig.tsbuildinfo +build/eslint.config.* + +# Misc +.gitignore +CHANGELOG.md +CLAUDE.md +MCP_README.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b1e745..e81e7b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,49 @@ +## [1.4.1](https://github.com/verisoft-ai/appium-desktop-driver/compare/v1.4.0...v1.4.1) (2026-03-23) + +### Bug Fixes + +* **mcp:** change mcp naming to match new Desktop Driver convention ([e77a7b0](https://github.com/verisoft-ai/appium-desktop-driver/commit/e77a7b02d4ed8769d61697c96c239bb03e3aef61)) + +## [1.4.0](https://github.com/verisoft-ai/appium-desktop-driver/compare/v1.3.1...v1.4.0) (2026-03-23) + +### Features + +* **capability:** Add capabilities windowSwitchRetries and windowSwitchInterval ([a831b3a](https://github.com/verisoft-ai/appium-desktop-driver/commit/a831b3af5d8d531a41483f36391d631cd787c371)) +* custom env variables capabilities ([6a98b5c](https://github.com/verisoft-ai/appium-desktop-driver/commit/6a98b5c76bdfecf1ef6a893ea9c37abdd5e47c33)) +* **display:** add support for multi monitor testing ([1029aec](https://github.com/verisoft-ai/appium-desktop-driver/commit/1029aec97adb420e9525e1f325c64870dc51585c)) +* Implemented missing commands ([4434b99](https://github.com/verisoft-ai/appium-desktop-driver/commit/4434b996fa33cd0214c7cd44073f34806cd1c99f)) +* **mcp:** add MCP server with 39 tools and unit test suite ([cf3d464](https://github.com/verisoft-ai/appium-desktop-driver/commit/cf3d464d1b74efbdddb04aa16ff3a19e19564934)) + +### Bug Fixes + +* Add allowed tools to claude code reviewer ([c4c18e9](https://github.com/verisoft-ai/appium-desktop-driver/commit/c4c18e9f35915eb609e41572cb3c5ea15e3314a7)) +* add tabbing ([c0cb0e8](https://github.com/verisoft-ai/appium-desktop-driver/commit/c0cb0e8bb4fb37c9f70b8e891c659c56142c1943)) +* fix attaching to wrong application window ([8960843](https://github.com/verisoft-ai/appium-desktop-driver/commit/8960843d548c98728880c901b154215b6265b69e)) +* fix code review comments ([d7bebd9](https://github.com/verisoft-ai/appium-desktop-driver/commit/d7bebd9ff1660fd065a92e7008344c3b1323bd27)) +* **lint:** resolve lint errors ([5b72f12](https://github.com/verisoft-ai/appium-desktop-driver/commit/5b72f122472cdd4e563aa044b1baa2a52af7d10a)) +* **mcp:** resolve bugs, add tool annotations, and new UIA tools ([fd73365](https://github.com/verisoft-ai/appium-desktop-driver/commit/fd7336552264a52ad2dda8c28bee2afcf44050a6)) +* Remove claude code review workflow ([88f921d](https://github.com/verisoft-ai/appium-desktop-driver/commit/88f921d81ba9b81ff1578b2ac34c81d337670f30)) +* Remove outerloops ([92eedfa](https://github.com/verisoft-ai/appium-desktop-driver/commit/92eedfa5decf6125c0f688da2d4c3bcf896491d5)) +* replace NovaWindows automation name with DesktopDriver ([dd585b9](https://github.com/verisoft-ai/appium-desktop-driver/commit/dd585b9013fd5b128e10c42af86ca4e5f86f6934)) +* **window:** narrow appProcessIds to the attached window's PID ([d281444](https://github.com/verisoft-ai/appium-desktop-driver/commit/d2814445ab5248483d451f1817fbff67d30d7654)) +* **window:** track child processes spawned from launched apps. ([f1e6bff](https://github.com/verisoft-ai/appium-desktop-driver/commit/f1e6bfffe5bfcdebe884de207eabce8381c83a67)) +* **window:** window handles access capability added ([eefb804](https://github.com/verisoft-ai/appium-desktop-driver/commit/eefb8040b2ec43795b4c985e42090dad2fa2e6ae)) + +### Miscellaneous Chores + +* bump version to 1.1.0 ([393bdae](https://github.com/verisoft-ai/appium-desktop-driver/commit/393bdaeaa0070385a15a61affe88c059c8967c6a)) +* bump version to 1.2.0 ([94a4f04](https://github.com/verisoft-ai/appium-desktop-driver/commit/94a4f046be53c3b804497ad4e1d2782860e25070)) +* **claude:** Add constraints and context for claude code review ([e95c6b2](https://github.com/verisoft-ai/appium-desktop-driver/commit/e95c6b219f436dd030f4e7dd840713e651af9cb4)) +* **claude:** change constraints for claude code review ([3acee1c](https://github.com/verisoft-ai/appium-desktop-driver/commit/3acee1c1565b575e84f3b087aff6085b6b524229)) +* **npm:** Ignore build artifacts and local mcp/claude config ([c9d3529](https://github.com/verisoft-ai/appium-desktop-driver/commit/c9d3529cf1c259d36eaf82e64702fdd080463195)) +* prepare package for verisoft npm distribution ([5b35ff7](https://github.com/verisoft-ai/appium-desktop-driver/commit/5b35ff722d8254f24fea2bfe5112f4e0bddfc1e7)) +* **release:** bump version and re-added the auto release workflow ([8555903](https://github.com/verisoft-ai/appium-desktop-driver/commit/8555903c4b3038d11fe24c038ef83964f71a1710)) +* remove auto publish on push to main ([e940fd1](https://github.com/verisoft-ai/appium-desktop-driver/commit/e940fd1e4f20a505ea81dd668b4240abd2053d7f)) + +### Code Refactoring + +* **mcp:** remove auto-start, require Appium to be running externally ([8b76810](https://github.com/verisoft-ai/appium-desktop-driver/commit/8b76810041db68c960b3448173a8adca52679390)) + ## [1.3.1](https://github.com/AutomateThePlanet/appium-novawindows-driver/compare/v1.3.0...v1.3.1) (2026-03-09) ### Bug Fixes diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..1f8ecdd --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,63 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +```bash +npm run build # Compile TypeScript to build/ +npm run watch # Watch mode compilation +npm run lint # ESLint validation +npm run test # Unit tests (Vitest) +npm run test:e2e # E2E tests (requires Windows + Appium setup) +npm run mcp:start # Launch MCP server +``` + +Run a single test file: +```bash +npx vitest run test/path/to/file.test.ts +``` + +## Architecture + +This is an **Appium driver** for Windows desktop UI automation. It exposes two interfaces: + +1. **Appium WebDriver API** — used by test frameworks (Selenium-style) +2. **MCP Server** (`lib/mcp/`) — exposes 30+ tools over Model Context Protocol for AI agent use + +### Core driver flow + +`lib/driver.ts` — `NovaWindowsDriver` extends `BaseDriver`. On `createSession()`, it starts a persistent PowerShell process that remains open for the session lifetime. All UI Automation operations are executed by sending PowerShell commands through this process and reading stdout. + +### Element finding + +Element searches go through `lib/powershell/` which builds PowerShell scripts using Windows UI Automation APIs. The driver converts Appium locator strategies (XPath, accessibility id, class name, etc.) into `UIA3` conditions via `lib/powershell/conditions.ts` and `converter.ts`. XPath is evaluated in `lib/xpath/` against the live UI Automation tree. + +### Input simulation + +Low-level mouse and keyboard events use native Windows API bindings in `lib/winapi/user32.ts` via the `koffi` FFI library. Higher-level action sequences (W3C Actions) are handled in `lib/commands/actions.ts` which translates WebDriver action chains into `user32` calls with optional easing/delay curves. + +### Commands + +All driver commands live in `lib/commands/` and are mixed into the driver class via `lib/commands/index.ts`. Key files: +- `actions.ts` — mouse, keyboard, wheel via W3C ActionSequence +- `element.ts` — element finding and attribute retrieval +- `app.ts` — app launch/close/window management +- `extension.ts` — `executeScript()` platform-specific commands +- `powershell.ts` — raw PowerShell execution +- `screen-recorder.ts` — FFmpeg-based recording + +### MCP server + +`lib/mcp/` is an independent MCP server binary (`novawindows-mcp`). It auto-starts and manages an Appium server process, creates WebdriverIO sessions, and exposes tools grouped by domain in `lib/mcp/tools/`. The server communicates via stdio using the `@modelcontextprotocol/sdk`. + +### TypeScript paths + +`@/` resolves to `lib/` (configured in both `tsconfig.json` and Vitest configs). + +## Key capabilities + +- `platformName`: `"Windows"`, `automationName`: `"DesktopDriver"` +- Supported locator strategies: `xpath`, `accessibility id`, `id`, `name`, `class name`, `tag name`, `-windows uiautomation` +- Custom `executeScript()` commands listed in README.md +- Prerun/postrun PowerShell scripts via session capabilities diff --git a/MCP_README.md b/MCP_README.md new file mode 100644 index 0000000..95c0058 --- /dev/null +++ b/MCP_README.md @@ -0,0 +1,776 @@ +# Appium Desktop MCP Server + +The `appium-desktop-driver` package ships a built-in **Model Context Protocol (MCP)** server that lets AI agents (Claude, Cursor, Copilot, etc.) automate Windows desktop applications via natural language — no test-framework code required. + +--- + +## Table of Contents + +- [Overview](#overview) +- [Architecture](#architecture) +- [Prerequisites](#prerequisites) +- [Installation](#installation) +- [Configuration](#configuration) +- [Running the MCP Server](#running-the-mcp-server) +- [Connecting an AI Client](#connecting-an-ai-client) +- [Startup Sequence](#startup-sequence) +- [Tool Reference](#tool-reference) + - [Session Management](#session-management) + - [Element Discovery](#element-discovery) + - [Basic Interaction](#basic-interaction) + - [Advanced Input](#advanced-input) + - [UIA Patterns](#uia-patterns) + - [Window Management](#window-management) + - [Application Control](#application-control) + - [Clipboard](#clipboard) +- [Locator Strategies](#locator-strategies) +- [Capabilities Reference](#capabilities-reference) +- [Example Workflows](#example-workflows) +- [File Structure](#file-structure) + +--- + +## Overview + +The MCP server wraps the **Appium Desktop driver** in a stateful, tool-based interface that AI agents can call over the [Model Context Protocol](https://modelcontextprotocol.io). The agent: + +1. Calls `create_session` to launch a Windows app (Win32, UWP, or classic). +2. Uses `find_element` / `find_elements` to locate UI elements. +3. Uses interaction tools (`click_element`, `set_value`, `send_keys`, etc.) to drive the app. +4. Calls `delete_session` when done. + +All communication between the MCP host (AI client) and this server goes over **stdio**. Appium itself is managed automatically (auto-start / auto-shutdown) unless configured otherwise. + +--- + +## Architecture + +``` +AI Client (Claude / Cursor / etc.) + │ stdio (MCP protocol) + ▼ +┌──────────────────────────────────────────┐ +│ desktop-driver-mcp server │ +│ lib/mcp/index.ts │ +│ ┌─────────────┐ ┌──────────────────┐ │ +│ │AppiumManager│ │ AppiumSession │ │ +│ │ (auto-start │ │ (WebdriverIO │ │ +│ │ / monitor) │ │ remote driver) │ │ +│ └──────┬──────┘ └────────┬─────────┘ │ +│ │ │ │ +│ ▼ ▼ │ +│ Appium Server MCP Tools │ +│ :4723 (30+ tools) │ +└──────────────────────────────────────────┘ + │ + ▼ +Windows UI Automation (UIA3) +``` + +### Key source files + +| File | Responsibility | +|------|---------------| +| [lib/mcp/index.ts](lib/mcp/index.ts) | Entry point — wires config, Appium, session, tools, and transport | +| [lib/mcp/config.ts](lib/mcp/config.ts) | Reads environment variables into `McpConfig` | +| [lib/mcp/appium-manager.ts](lib/mcp/appium-manager.ts) | Detects / spawns / shuts down the Appium process | +| [lib/mcp/session.ts](lib/mcp/session.ts) | Creates and deletes the WebdriverIO session | +| [lib/mcp/errors.ts](lib/mcp/errors.ts) | Formats errors for MCP tool responses | +| [lib/mcp/tools/index.ts](lib/mcp/tools/index.ts) | Registers all tool groups | +| [lib/mcp/tools/session.ts](lib/mcp/tools/session.ts) | `create_session`, `delete_session`, `get_session_status` | +| [lib/mcp/tools/find.ts](lib/mcp/tools/find.ts) | `find_element`, `find_elements`, `find_child_element` | +| [lib/mcp/tools/interact.ts](lib/mcp/tools/interact.ts) | `click_element`, `set_value`, `clear_element`, `get_text`, `get_attribute`, `is_element_displayed`, `is_element_enabled` | +| [lib/mcp/tools/advanced.ts](lib/mcp/tools/advanced.ts) | `advanced_click`, `send_keys`, `hover`, `scroll`, `click_and_drag` | +| [lib/mcp/tools/patterns.ts](lib/mcp/tools/patterns.ts) | UIA pattern tools — `invoke_element`, `expand_element`, `collapse_element`, `toggle_element`, `set_element_value`, `get_element_value`, window state tools | +| [lib/mcp/tools/window.ts](lib/mcp/tools/window.ts) | `take_screenshot`, `get_page_source`, `get_window_rect`, `get_window_handles`, `switch_to_window` | +| [lib/mcp/tools/app.ts](lib/mcp/tools/app.ts) | `get_window_element`, `launch_app`, `close_app`, `get_device_time` | +| [lib/mcp/tools/clipboard.ts](lib/mcp/tools/clipboard.ts) | `get_clipboard`, `set_clipboard` | + +--- + +## Prerequisites + +- **Windows 10 / 11** (64-bit) +- **Node.js 18+** +- **Appium 3.x** with the Appium Desktop driver installed: + ```bash + npm install -g appium + appium driver install --source=npm appium-desktop-driver + ``` +- An MCP-capable AI client (Claude Desktop, Cursor, VS Code with MCP extension, etc.) + +--- + +## Installation + +### From npm (recommended) + +```bash +npm install appium-desktop-driver +``` + +The MCP entry point is automatically registered as a `bin` command: + +``` +desktop-driver-mcp → build/lib/mcp/index.js +``` + +### From source + +```bash +git clone https://github.com/AutomateThePlanet/appium-desktop-driver.git +cd appium-desktop-driver +npm install +npm run build +``` + +--- + +## Configuration + +All configuration is via **environment variables** read at startup. No configuration file is required. + +| Variable | Default | Description | +|----------|---------|-------------| +| `APPIUM_HOST` | `127.0.0.1` | Hostname where Appium is running (or should be started) | +| `APPIUM_PORT` | `4723` | Port for the Appium server | +| `APPIUM_AUTO_START` | `true` | `true` = start Appium automatically if not running; `false` = require it to already be running | +| `APPIUM_BINARY` | *(auto-detected)* | Full path to the `appium` executable. If omitted, looks in `node_modules/.bin/appium` then the system `PATH` | + +### Binary resolution order + +When `APPIUM_AUTO_START=true` and Appium is not already running, the server resolves the binary as: + +1. `APPIUM_BINARY` env var (if set) +2. `/node_modules/.bin/appium` (local install) +3. `appium` on the system `PATH` (global install) + +--- + +## Running the MCP Server + +### Standalone (for testing) + +```bash +# Using the npm script +npm run mcp:start + +# Using npx (after npm install) +npx desktop-driver-mcp + +# After global install +desktop-driver-mcp +``` + +### With custom configuration + +```bash +APPIUM_PORT=4724 APPIUM_AUTO_START=false desktop-driver-mcp +``` + +--- + +## Connecting an AI Client + +### Claude Desktop + +Add to `claude_desktop_config.json` (usually at `%APPDATA%\Claude\claude_desktop_config.json`): + +```json +{ + "mcpServers": { + "appium-desktop-driver": { + "command": "npx", + "args": ["desktop-driver-mcp"], + "env": { + "APPIUM_AUTO_START": "true" + } + } + } +} +``` + +Or, if using a local build: + +```json +{ + "mcpServers": { + "appium-desktop-driver": { + "command": "node", + "args": ["C:/path/to/appium-desktop-driver/build/lib/mcp/index.js"], + "env": { + "APPIUM_AUTO_START": "true" + } + } + } +} +``` + +### Cursor / VS Code MCP Extension + +```json +{ + "mcp": { + "servers": { + "appium-desktop-driver": { + "type": "stdio", + "command": "npx", + "args": ["desktop-driver-mcp"] + } + } + } +} +``` + +--- + +## Startup Sequence + +When the server starts it performs these steps in order: + +1. **Load config** — reads environment variables into `McpConfig`. Fails immediately on invalid values (e.g. bad port number). +2. **Ensure Appium is running** — polls `GET /status` on the configured host:port. + - If already running: skips spawn. + - If not running and `APPIUM_AUTO_START=true`: spawns the Appium process and polls until ready (30 s timeout). + - If not running and `APPIUM_AUTO_START=false`: exits with an error. +3. **Create session holder** — `AppiumSession` object is initialized but no app is launched yet. +4. **Create MCP server** — `McpServer` from `@modelcontextprotocol/sdk` with name `desktop-driver-mcp` and version `1.3.0`. +5. **Register tools** — all 30+ tools are registered (see [Tool Reference](#tool-reference)). +6. **Register shutdown handlers** — `SIGINT`, `SIGTERM`, and `stdin close` all trigger graceful shutdown (session delete + Appium stop, with a 10 s session-delete timeout). +7. **Connect stdio transport** — the server is now ready for the AI client. + +> **Note:** `stdout` is owned entirely by the MCP protocol. All log messages (prefixed `[MCP]` or `[Appium]`) go to `stderr`. + +--- + +## Tool Reference + +### Session Management + +#### `create_session` +Start an Appium session by launching a Windows application. **Must be called before any other tool.** + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `app` | string | yes | Executable path (`C:\Windows\notepad.exe`), UWP App ID (`Microsoft.WindowsCalculator_8wekyb3d8bbwe!App`), or `Root` to attach to the desktop | +| `appArguments` | string | no | Command-line arguments for the app | +| `appWorkingDir` | string | no | Working directory for the app process | +| `waitForAppLaunch` | number (ms) | no | Extra wait after launch before interactions begin | +| `shouldCloseApp` | boolean | no | Default `true` — close app on `delete_session` | +| `implicitTimeout` | number (ms) | no | Default `1500` — implicit element wait timeout | +| `delayAfterClick` | number (ms) | no | Wait after every click | +| `delayBeforeClick` | number (ms) | no | Wait before every click | +| `smoothPointerMove` | string | no | Easing function name for pointer movement | + +Returns: confirmation string with the app name. + +--- + +#### `delete_session` +End the current Appium session. Closes the app (unless `shouldCloseApp=false` was set). Call when done. + +No parameters. + +--- + +#### `get_session_status` +Check whether a session is currently active. + +No parameters. Returns `"Session is active."` or `"No active session. Call create_session to start one."` + +--- + +### Element Discovery + +#### `find_element` +Find a single UI element. Returns an **element ID string** used by interaction tools. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `strategy` | enum | yes | Locator strategy (see [Locator Strategies](#locator-strategies)) | +| `selector` | string | yes | The selector value | + +Returns: element ID string, or error if not found. + +--- + +#### `find_elements` +Find all UI elements matching the selector. + +Same parameters as `find_element`. Returns: JSON array of element ID strings. + +--- + +#### `find_child_element` +Find a child element within a known parent element. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `parentElementId` | string | yes | Element ID of the parent | +| `strategy` | enum | yes | Locator strategy | +| `selector` | string | yes | The selector value | + +Returns: child element ID string. + +--- + +### Basic Interaction + +All interaction tools accept an `elementId` returned by a find tool. + +#### `click_element` +Click a UI element at its center. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `set_value` +Clear an input element and type a new value. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `elementId` | string | yes | | +| `value` | string | yes | Text to type | + +--- + +#### `clear_element` +Clear the text content of an input element. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `get_text` +Get the visible text of a UI element. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +Returns: text string. + +--- + +#### `get_attribute` +Get any UIA attribute of an element. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `elementId` | string | yes | | +| `attribute` | string | yes | Attribute name, e.g. `Name`, `AutomationId`, `ClassName`, `IsEnabled`, `IsOffscreen`, `ControlType`, `Value.Value` | + +Returns: attribute value string. + +--- + +#### `is_element_displayed` +Check whether an element is visible on screen (not off-screen). + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +Returns: `"true"` or `"false"`. + +--- + +#### `is_element_enabled` +Check whether an element is enabled and interactable. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +Returns: `"true"` or `"false"`. + +--- + +### Advanced Input + +#### `advanced_click` +Perform a click with modifier keys, multiple clicks, a specific mouse button, or a hold duration. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `elementId` | string | — | Element to click (use either `elementId` or `x`+`y`) | +| `x` | number | — | Absolute screen x coordinate | +| `y` | number | — | Absolute screen y coordinate | +| `button` | enum | `left` | `left`, `right`, `middle`, `back`, `forward` | +| `modifierKeys` | array | `[]` | Any of `shift`, `ctrl`, `alt`, `win` | +| `durationMs` | number | `0` | Hold duration in ms (long-press) | +| `times` | number | `1` | Click count (`2` = double-click) | +| `interClickDelayMs` | number | `100` | Delay between clicks | + +--- + +#### `send_keys` +Send a sequence of keyboard actions — text, virtual key codes, or pauses. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `actions` | array | yes | Sequence of action objects (see below) | +| `forceUnicode` | boolean | no | Use Unicode input for special characters | + +Each action object can contain: + +| Field | Type | Description | +|-------|------|-------------| +| `pause` | number | Pause in milliseconds | +| `text` | string | Text to type (Unicode supported) | +| `virtualKeyCode` | number | Windows VK code (e.g. `13` = Enter, `27` = Escape, `9` = Tab) | +| `down` | boolean | `true` = key-down only, `false` = key-up only, omit = full press | + +**Example** — press Ctrl+A then Delete: +```json +{ + "actions": [ + { "virtualKeyCode": 17, "down": true }, + { "virtualKeyCode": 65 }, + { "virtualKeyCode": 17, "down": false }, + { "virtualKeyCode": 46 } + ] +} +``` + +--- + +#### `hover` +Move the mouse pointer from one position to another (for hover effects, tooltips, or drag-without-click). + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `startElementId` | string | — | Element to start from | +| `startX` / `startY` | number | — | Absolute start coordinates | +| `endElementId` | string | — | Element to end at | +| `endX` / `endY` | number | — | Absolute end coordinates | +| `modifierKeys` | array | `[]` | `shift`, `ctrl`, `alt`, `win` | +| `durationMs` | number | `500` | Duration of movement | + +--- + +#### `scroll` +Scroll the mouse wheel at an element or screen coordinate. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `elementId` | string | — | Element to scroll over | +| `x` / `y` | number | — | Absolute screen coordinates | +| `deltaX` | number | `0` | Horizontal scroll (positive = right) | +| `deltaY` | number | `0` | Vertical scroll (positive = down) | +| `modifierKeys` | array | `[]` | | + +--- + +#### `click_and_drag` +Click and drag from one position to another (resize, reorder, move). + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `startElementId` | string | — | Drag source element | +| `startX` / `startY` | number | — | Absolute start coordinates | +| `endElementId` | string | — | Drag target element | +| `endX` / `endY` | number | — | Absolute end coordinates | +| `modifierKeys` | array | `[]` | | +| `durationMs` | number | `500` | Drag duration | +| `button` | enum | `left` | `left`, `right`, `middle` | + +--- + +### UIA Patterns + +These tools use Windows UI Automation patterns directly, bypassing mouse simulation. More reliable for programmatic interactions. + +#### `invoke_element` +Invoke the default action of an element via the **Invoke** pattern (button click, menu item selection, etc.). + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `expand_element` +Expand a collapsible element (tree node, combo box, menu) via the **ExpandCollapse** pattern. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `collapse_element` +Collapse an expanded element via the **ExpandCollapse** pattern. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `toggle_element` +Toggle a checkbox or toggle button via the **Toggle** pattern. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `set_element_value` +Set the value of an element via the **Value** or **RangeValue** pattern (sliders, spin boxes, editable cells). + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `elementId` | string | yes | | +| `value` | string | yes | Value to set | + +--- + +#### `get_element_value` +Get the value of an element via the **Value** pattern. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `maximize_window` +Maximize a window element via the **Window** pattern. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `minimize_window` +Minimize a window element via the **Window** pattern. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `restore_window` +Restore a minimized or maximized window to its normal state. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +#### `close_window` +Close a window element via the **Window** pattern. + +| Parameter | Type | Required | +|-----------|------|----------| +| `elementId` | string | yes | + +--- + +### Window Management + +#### `take_screenshot` +Capture a screenshot of the current app window. + +No parameters. Returns: base64-encoded PNG string. + +--- + +#### `get_page_source` +Get the full XML UI element tree of the current window. Use this to understand the app structure before deciding what to interact with. + +No parameters. Returns: XML string. + +--- + +#### `get_window_rect` +Get the position and size of the current app window. + +No parameters. Returns: JSON object `{ x, y, width, height }`. + +--- + +#### `get_window_handles` +Get all window handles for the current session (for multi-window apps). + +No parameters. Returns: JSON array of handle strings. + +--- + +#### `switch_to_window` +Switch automation focus to a different window. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `handle` | string | yes | Handle from `get_window_handles` | + +--- + +### Application Control + +#### `get_window_element` +Get the root UI element of the current app window. Returns an element ID for the top-level window (useful for UIA pattern operations on the window itself). + +No parameters. Returns: element ID string. + +--- + +#### `launch_app` +Re-launch the application configured in the session (if it was closed with `close_app`). + +No parameters. + +--- + +#### `close_app` +Close the application under test without ending the session. Use `launch_app` to restart it. + +No parameters. + +--- + +#### `get_device_time` +Get the current date/time on the Windows device. + +No parameters. Returns: datetime string. + +--- + +### Clipboard + +#### `get_clipboard` +Read the current clipboard contents. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `contentType` | enum | `plaintext` | `plaintext` or `image` | + +Returns: base64-encoded string of clipboard contents. + +--- + +#### `set_clipboard` +Set the clipboard contents. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `b64Content` | string | yes | Base64-encoded content | +| `contentType` | enum | no | `plaintext` (default) or `image` | + +--- + +## Locator Strategies + +| Strategy | Description | Example selector | +|----------|-------------|-----------------| +| `accessibility id` | UIA AutomationId (most reliable) | `CalculatorResults` | +| `name` | Element Name property | `Close` | +| `id` | Same as AutomationId (CSS `#id` syntax internally) | `TextBox1` | +| `xpath` | XPath expression | `//Button[@Name='OK']` | +| `class name` | UIA ControlType class | `TextBlock` | +| `tag name` | UIA element type | `Button` | +| `-windows uiautomation` | Raw UIA condition expression | *(advanced)* | + +**Best practice:** prefer `accessibility id` (AutomationId) when available. Use `get_page_source` to discover AutomationIds and element hierarchy. + +--- + +## Capabilities Reference + +These are the WebdriverIO capabilities set by `create_session`. They map directly to Appium / Appium Desktop driver capabilities: + +| Capability | Session parameter | Description | +|-----------|------------------|-------------| +| `appium:app` | `app` | Application to launch | +| `appium:appArguments` | `appArguments` | CLI arguments | +| `appium:appWorkingDir` | `appWorkingDir` | Working directory | +| `appium:ms:waitForAppLaunch` | `waitForAppLaunch` | Post-launch wait (ms) | +| `appium:shouldCloseApp` | `shouldCloseApp` | Close on session delete | +| `appium:delayAfterClick` | `delayAfterClick` | Click delay (ms) | +| `appium:delayBeforeClick` | `delayBeforeClick` | Pre-click delay (ms) | +| `appium:smoothPointerMove` | `smoothPointerMove` | Pointer easing function | + +--- + +## Example Workflows + +### Open Notepad and type text + +``` +1. create_session(app="C:\Windows\notepad.exe") +2. find_element(strategy="class name", selector="RichEditD2DPT") → +3. set_value(elementId=, value="Hello from the AI agent!") +4. delete_session() +``` + +### Open Windows Calculator and perform a calculation + +``` +1. create_session(app="Microsoft.WindowsCalculator_8wekyb3d8bbwe!App") +2. find_element(strategy="accessibility id", selector="num5Button") → <5> +3. click_element(elementId=<5>) +4. find_element(strategy="accessibility id", selector="multiplyButton") → +5. click_element(elementId=) +6. find_element(strategy="accessibility id", selector="num3Button") → <3> +7. click_element(elementId=<3>) +8. find_element(strategy="accessibility id", selector="equalButton") → +9. click_element(elementId=) +10. find_element(strategy="accessibility id", selector="CalculatorResults") → +11. get_text(elementId=) → "Display is 15" +12. delete_session() +``` + +### Inspect an unknown app + +``` +1. create_session(app="C:\MyApp\MyApp.exe") +2. get_page_source() # inspect element tree XML +3. take_screenshot() # visual confirmation +4. get_window_rect() # window bounds +5. ...interact based on findings... +6. delete_session() +``` + +### Right-click context menu + +``` +1. create_session(app="C:\Windows\explorer.exe", appArguments="C:\Users") +2. find_element(strategy="name", selector="Documents") → +3. advanced_click(elementId=, button="right") +4. find_element(strategy="name", selector="Properties") → +5. click_element(elementId=) +6. delete_session() +``` + +--- + +## File Structure + +``` +lib/mcp/ +├── index.ts # Server entry point & lifecycle +├── config.ts # Environment variable config +├── appium-manager.ts # Appium process management +├── session.ts # WebdriverIO session wrapper +├── errors.ts # Error formatting utility +└── tools/ + ├── index.ts # Registers all tool groups + ├── session.ts # create_session, delete_session, get_session_status + ├── find.ts # find_element, find_elements, find_child_element + ├── interact.ts # click_element, set_value, clear_element, get_text, get_attribute, is_element_* + ├── advanced.ts # advanced_click, send_keys, hover, scroll, click_and_drag + ├── patterns.ts # UIA pattern tools (invoke, expand, toggle, value, window state) + ├── window.ts # take_screenshot, get_page_source, get_window_rect, handles + ├── app.ts # get_window_element, launch_app, close_app, get_device_time + └── clipboard.ts # get_clipboard, set_clipboard +``` diff --git a/README.md b/README.md index 7cdc3f9..074a612 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -NovaWindows Driver +Appium Desktop Driver =================== -NovaWindows Driver is a custom Appium driver designed to tackle the limitations of existing Windows automation solutions like WinAppDriver. NovaWindows Driver supports testing Universal Windows Platform (UWP), Windows Forms (WinForms), Windows Presentation Foundation (WPF), and Classic Windows (Win32) apps on Windows 10 PCs. Built to improve performance and reliability for traditional desktop applications, it offers: +Appium Desktop Driver is a custom Appium driver designed to tackle the limitations of existing Windows automation solutions like WinAppDriver. Appium Desktop Driver supports testing Universal Windows Platform (UWP), Windows Forms (WinForms), Windows Presentation Foundation (WPF), and Classic Windows (Win32) apps on Windows 10 PCs. Built to improve performance and reliability for traditional desktop applications, it offers: Faster XPath locator performance — Reduces element lookup times, even in complex UIs. RawView element support — Access elements typically hidden from the default ControlView/ContentView. @@ -13,12 +13,12 @@ It’s designed to handle real-world scenarios where traditional drivers fall sh > > This driver is built for Appium 2/3 and is not compatible with Appium 1. To install > the driver, simply run: -> `appium driver install --source=npm appium-novawindows-driver` +> `appium driver install --source=npm appium-desktop-driver` ## Usage -Beside of standard Appium requirements NovaWindows Driver adds the following prerequisites: +Beside of standard Appium requirements Appium Desktop Driver adds the following prerequisites: - Appium Windows Driver only supports Windows 10 and later as the host. @@ -30,14 +30,14 @@ Beside of standard Appium requirements NovaWindows Driver adds the following pre > realiability and better code and error management, as well as supporting > more features, that are currently not possible using PowerShell alone. > It is unlikely for the prerequisites to change, as this is one of the -> main goals of NovaWindows driver – seamless setup on any PC. +> main goals of Appium Desktop driver – seamless setup on any PC. -NovaWindows Driver supports the following capabilities: +Appium Desktop Driver supports the following capabilities: Capability Name | Description --- | --- platformName | Must be set to `Windows` (case-insensitive). -automationName | Must be set to `NovaWindows` (case-insensitive). +automationName | Must be set to `DesktopDriver` (case-insensitive). smoothPointerMove | CSS-like easing function (including valid Bezier curve). This controls the smooth movement of the mouse for `delayBeforeClick` ms. Example: `ease-in`, `cubic-bezier(0.42, 0, 0.58, 1)`. delayBeforeClick | Time in milliseconds before a click is performed. delayAfterClick | Time in milliseconds after a click is performed. @@ -48,6 +48,11 @@ appWorkingDir | Optional working directory path for the application. prerun | An object containing either `script` or `command` key. The value of each key must be a valid PowerShell script or command to be executed prior to the WinAppDriver session startup. See [Power Shell commands execution](#power-shell-commands-execution) for more details. Example: `{script: 'Get-Process outlook -ErrorAction SilentlyContinue'}` postrun | An object containing either `script` or `command` key. The value of each key must be a valid PowerShell script or command to be executed after WinAppDriver session is stopped. See [Power Shell commands execution](#power-shell-commands-execution) for more details. isolatedScriptExecution | Whether PowerShell scripts are executed in an isolated session. Default is `false`. +appEnvironment | Optional object of custom environment variables to inject into the PowerShell session. The variables are only available for the lifetime of the session and do not affect the system environment. Example: `{"MY_VAR": "hello", "API_URL": "http://localhost:3000"}`. +returnAllWindowHandles | When `true`, `getWindowHandles()` returns all top-level windows on the desktop (UIA root children) instead of only the windows belonging to the launched app. Useful for switching to arbitrary system windows. Default is `false`. +ms:waitForAppLaunch | Time in seconds to wait for the app window to appear after launch. Default is `0` (falls back to 10 000 ms internal timeout). +ms:windowSwitchRetries | Maximum number of retry attempts in `setWindow()` when the target window is not yet visible. Must be a non-negative integer. Default is `20`. +ms:windowSwitchInterval | Sleep duration in milliseconds between each retry in `setWindow()`. Must be a non-negative integer. Default is `500`. Please note that more capabilities will be added as the development of this driver progresses. Since it is still in its early stages, some features may be missing or subject to change. If you need a specific capability or encounter any issues, please feel free to open an issue. @@ -65,11 +70,11 @@ def generate_options(): # How to get the app ID for Universal Windows Apps (UWP): # https://www.securitylearningacademy.com/mod/book/view.php?id=13829&chapterid=678 uwp_options.app = 'Microsoft.WindowsCalculator_8wekyb3d8bbwe!App' - uwp_options.automation_name = 'NovaWindows' + uwp_options.automation_name = 'DesktopDriver' classic_options = WindowsOptions() classic_options.app = 'C:\\Windows\\System32\\notepad.exe' - classic_options.automation_name = 'NovaWindows' + classic_options.automation_name = 'DesktopDriver' use_existing_app_options = WindowsOptions() # Active window handles could be retrieved from any compatible UI inspector app: @@ -81,7 +86,7 @@ def generate_options(): # This capability could be used to create a workaround for UWP apps startup: # https://github.com/microsoft/WinAppDriver/blob/master/Samples/C%23/StickyNotesTest/StickyNotesSession.cs use_existing_app_options.app_top_level_window = hex(12345) - use_existing_app_options.automation_name = 'NovaWindows' + use_existing_app_options.automation_name = 'DesktopDriver' return [uwp_options, classic_options, use_existing_app_options] @@ -137,7 +142,7 @@ else: > **Note** > -> NovaWindows Driver runs on a single PowerShell session, +> Appium Desktop Driver runs on a single PowerShell session, > therefore you may share variables between executed PowerShell > scripts. Unless the PowerShell session exits or crashes for some > reason, you should be able to reuse the variables that you create. @@ -163,7 +168,7 @@ Beside of standard W3C APIs the driver provides the below custom command extensi > **Note** > -> In most cases, commands implemented in NovaWindows driver can be used +> In most cases, commands implemented in Appium Desktop driver can be used > more intuitively by just the element as a second argument and the value > (if such is needed) as the thrid argument and so on. For example: > `driver.executeScript("windows: setValue", element, "valueToSet")` or @@ -311,7 +316,7 @@ Base-64 encoded content of the Windows clipboard. ### windows: pushCacheRequest -This is an asynchronous function that sends cache requests based on specific conditions. This is useful for revealing RawView elements in the element tree. Note that cached elements aren't supported by NovaWindows driver yet. +This is an asynchronous function that sends cache requests based on specific conditions. This is useful for revealing RawView elements in the element tree. Note that cached elements aren't supported by Appium Desktop driver yet. #### Arguments @@ -589,6 +594,45 @@ button | string | no | Mouse button: `left` (default), `middle`, `right`, `back` \* Provide either startElementId or both startX and startY; and either endElementId or both endX and endY. +### windows: getMonitors + +Returns information about all connected display monitors, including their screen coordinates in the [virtual screen](https://learn.microsoft.com/en-us/windows/win32/gdi/the-virtual-screen) coordinate space, working area, device name, and which monitor is the primary display. + +This command takes no arguments. + +#### Returns + +An array of monitor objects, one per connected display: + +Name | Type | Description +--- | --- | --- +index | number | Zero-based index of the monitor in the `AllScreens` array. +deviceName | string | System device name, e.g. `\\.\DISPLAY1`. +primary | boolean | `true` if this is the primary display. +bounds | object | Full monitor rectangle: `{ x, y, width, height }` in virtual screen coordinates. +workingArea | object | Usable area excluding taskbars and docked toolbars: `{ x, y, width, height }`. + +#### Example + +```javascript +// WebdriverIO — move app window to the secondary monitor +const monitors = await driver.executeScript('windows: getMonitors', []); +const secondary = monitors.find(m => !m.primary); +if (secondary) { + await driver.setWindowRect(secondary.bounds.x, secondary.bounds.y, null, null); +} +``` + +```python +# Python — click at the center of the secondary monitor +monitors = driver.execute_script('windows: getMonitors', {}) +secondary = next((m for m in monitors if not m['primary']), None) +if secondary: + cx = secondary['bounds']['x'] + secondary['bounds']['width'] // 2 + cy = secondary['bounds']['y'] + secondary['bounds']['height'] // 2 + driver.execute_script('windows: click', {'x': cx, 'y': cy}) +``` + ## Development it is recommended to use Matt Bierner's [Comment tagged templates](https://marketplace.visualstudio.com/items?itemName=bierner.comment-tagged-templates) diff --git a/docs/driver-demo.mp4 b/docs/driver-demo.mp4 new file mode 100644 index 0000000..cfad27e Binary files /dev/null and b/docs/driver-demo.mp4 differ diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..0005615 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,582 @@ + + + + + + Appium Desktop Driver — Windows UI Automation That Actually Works + + + + + + + + + + + + + +
+
+
Appium 2 & 3 compatible
+

Windows UI Automation
That Actually Works

+

+ A drop-in Appium 2/3 driver for Windows that tackles the limitations of WinAppDriver — faster XPath, layout-safe input, and no extra setup required. +

+ +
+
+
npm install -g appium
+appium driver install --source=npm appium-desktop-driver
+
+ + + View on GitHub + +
+
+
+ + +
+
+ +

Driver Demo

+

Watch the driver automate a real Windows application end-to-end — element finding, input simulation, and test execution.

+ +
+
+ +
+
+
+
+ + +
+
+ +

Why Not WinAppDriver?

+

WinAppDriver is abandoned and ships known pain points. Here's what this driver does instead.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Problem with WinAppDriverWhat this driver does instead
Slow XPath on complex UIsFaster XPath via direct UIA3 tree traversal
Can't access hidden/raw elementsRawView element support
Keyboard layout–dependent inputUnicode-safe text input with layout bypass
Requires Developer Mode or extra setupZero extra prerequisites — PowerShell only
No MCP / AI agent supportBuilt-in MCP server with 30+ tools
+
+
+
+ + +
+
+ +

Supported App Types

+

Works with every Windows application framework — including legacy apps with sparse accessibility trees.

+ +
+
🪟UWP (Universal Windows Platform)
+
🖼️WinForms
+
💠WPF (Windows Presentation Foundation)
+
⚙️Win32 / Classic Windows
+
🔧Legacy / accessibility-sparse apps
+
+
+
+ + +
+
+ +

Core Features

+

Everything you need for production-grade Windows UI automation.

+ +
+
+
🔌
+

Appium 2/3 Native

+

Drop-in driver for the modern Appium stack. Familiar W3C WebDriver protocol — works with any Appium client.

+
+
+
+

Fast Element Finding

+

Element lookups via Windows UI Automation 3 (UIA3). XPath evaluated against the live UIA tree with RawView access for hidden elements.

+
+
+
🖱️
+

Rich Input Simulation

+

Mouse (click, scroll, hover, drag, Bezier easing), keyboard (unicode, virtual key codes, modifier combos), and clipboard. All via native user32.dll calls.

+
+
+
🛠️
+

Platform Extensions

+

30+ executeScript commands: invoke/expand/collapse/toggle UI patterns, set/get values, manage windows, control monitors, run PowerShell, record screen.

+
+
+
🤖
+

MCP Server

+

An out-of-the-box Model Context Protocol server (desktop-driver-mcp) exposes 30+ tools so AI agents can automate Windows UIs without writing traditional test scripts.

+
+
+
💻
+

PowerShell Escape Hatch

+

Run arbitrary PowerShell before/after sessions (prerun/postrun), inject environment variables, and execute raw scripts mid-test.

+
+
+
+
+ + +
+
+ +

Locator Strategies

+

All supported locators with their UIA mapping and an example selector.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
StrategyMaps toExample
accessibility idUIA AutomationIdAppNameTitle
nameUIA NameCalculator
class nameUIA ClassNameTextBlock
tag nameUIA ControlTypeButton
xpathLive UIA tree XPath 1.0//Button[@Name="OK"]
idAlias for accessibility id42.333896.3.1
-windows uiautomationRaw UIA condition (C#/PS syntax)new PropertyCondition(...)
+
+
+
+ + +
+
+ +

MCP Server for AI Agents

+

+ Connect Claude, Copilot, or any MCP-compatible AI agent to any Windows app in minutes. +

+ +
+
+
1
+

Run desktop-driver-mcp

+

Start your Appium server, then run desktop-driver-mcp to connect and create a session.

+
+
+
2
+

Connect your AI client

+

Point Claude Desktop, Cursor, or any MCP-compatible client to the server via stdio transport.

+
+
+
3
+

Let the agent work

+

The agent finds elements, clicks, types, takes screenshots, and reads the UI tree — all without you writing test code.

+
+
+ + +
+
+ +
+
+ + +
+

MCP Config (Claude Desktop / Cursor)

+
+
{
+  "mcpServers": {
+    "windows": {
+      "command": "desktop-driver-mcp",
+      "args": [],
+      "env": {
+        "APP": "C:\\Windows\\System32\\notepad.exe"
+      }
+    }
+  }
+}
+
+
+ + +
+

Tool Groups (30+ tools)

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GroupToolsDescription
Sessionstart_session, get_session_info, end_sessionLifecycle management
Findfind_element, find_elements, find_child_element, wait_for_elementElement discovery with selector guidance
Inspectget_element_infoReads UIA properties and returns ranked selector suggestions
Interactclick_element, set_value, clear_element, get_text, get_attribute, is_element_displayed, is_element_enabledBasic interactions
Advancedadvanced_click, send_keys, hover, scroll, click_and_dragComplex input (right-click, double-click, drag, modifier keys)
Windowtake_screenshot, get_page_source, get_window_rect, get_window_handles, switch_to_window, maximize_window, minimize_window, restore_window, close_window, get_monitorsWindow & screen control
UI Patternsinvoke, expand_element, collapse_element, toggle_element, scroll_into_view, select_element, get_selected_item, get_all_selected_items, add_to_selection, remove_from_selection, get_element_value, set_element_value, is_multiple_selectionUIA interaction patterns
Appget_window_element, launch_app, close_app, get_device_timeApp lifecycle
Clipboardset_clipboard, get_clipboardClipboard read/write (text and images)
+
+
+
+
+ + +
+
+ +

Quick Start

+

Connect to any Windows app in under 10 lines.

+ +
+
+ + + + +
+
+
from appium import webdriver
+from appium.options.windows import WindowsOptions
+
+options = WindowsOptions()
+options.app = 'C:\\Windows\\System32\\notepad.exe'
+options.automation_name = 'DesktopDriver'
+
+driver = webdriver.Remote('http://127.0.0.1:4723', options=options)
+
+# Find and interact
+title_bar = driver.find_element('name', 'Notepad')
+driver.execute_script('windows: maximize', title_bar)
+ +
import io.appium.java_client.windows.WindowsDriver;
+import org.openqa.selenium.remote.DesiredCapabilities;
+import java.net.URL;
+
+DesiredCapabilities caps = new DesiredCapabilities();
+caps.setCapability("platformName", "Windows");
+caps.setCapability("appium:automationName", "DesktopDriver");
+caps.setCapability("appium:app", "C:\\Windows\\System32\\notepad.exe");
+
+WindowsDriver driver = new WindowsDriver(
+    new URL("http://127.0.0.1:4723"),
+    caps
+);
+
+driver.findElementByName("Notepad").click();
+ +
import { remote } from 'webdriverio';
+
+const driver = await remote({
+  hostname: '127.0.0.1',
+  port: 4723,
+  capabilities: {
+    platformName: 'Windows',
+    'appium:automationName': 'DesktopDriver',
+    'appium:app': 'C:\\Windows\\System32\\notepad.exe',
+  },
+});
+
+const el = await driver.$('~AppNameTitle');
+await el.click();
+ +
using OpenQA.Selenium.Appium;
+using OpenQA.Selenium.Appium.Windows;
+
+var options = new AppiumOptions();
+options.PlatformName = "Windows";
+options.AddAdditionalAppiumOption("automationName", "DesktopDriver");
+options.AddAdditionalAppiumOption("app", @"C:\Windows\System32\notepad.exe");
+
+var driver = new WindowsDriver(
+    new Uri("http://127.0.0.1:4723"),
+    options
+);
+
+driver.FindElement(MobileBy.AccessibilityId("AppNameTitle")).Click();
+
+
+ + +
+ + Advanced session capabilities (Python) + +
+
options.smooth_pointer_move = 'ease-in-out'
+options.delay_before_click = 100        # ms
+options.app_top_level_window = 0x12345  # attach to existing window
+options.prerun = {'script': 'Start-Process notepad'}
+options.postrun = {'command': 'Stop-Process -Name notepad'}
+options.app_environment = {'MY_VAR': 'hello'}
+options.return_all_window_handles = True
+
+
+
+
+ + +
+
+ +

Session Capabilities

+

All supported capabilities and their defaults.

+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CapabilityTypeDefaultDescription
platformNamestringMust be Windows
automationNamestringMust be DesktopDriver
appstringApp path or UWP App User Model ID
appTopLevelWindowstring/numberAttach to existing window by handle
appArgumentsstringCLI arguments for app launch
appWorkingDirstringWorking directory for app
smoothPointerMovestringCSS easing for mouse movement (e.g. ease-in)
delayBeforeClicknumberms delay before each click
delayAfterClicknumberms delay after each click
shouldCloseAppbooleantrueClose app window when session ends
prerunobject{script: '...'} or {command: '...'} to run before session
postrunobjectSame as prerun, runs after session
isolatedScriptExecutionbooleanfalseRun PowerShell scripts in isolated scope
appEnvironmentobjectCustom env vars injected into PS session
returnAllWindowHandlesbooleanfalseReturn all top-level windows (not just app's)
ms:waitForAppLaunchnumber0Seconds to wait for app window after launch
ms:windowSwitchRetriesnumber20Retry count for setWindow()
ms:windowSwitchIntervalnumber500ms between setWindow() retries
+
+
+
+ + +
+
+ +

executeScript Extensions

+

All windows: commands grouped by category.

+ +
+
+

Mouse & Keyboard

+
    +
  • windows: click — advanced click with modifier keys, multi-click, duration
  • +
  • windows: scroll — mouse wheel (horizontal or vertical)
  • +
  • windows: hover — mouse move from A to B
  • +
  • windows: keys — keyboard input (text, virtual key codes, modifiers)
  • +
  • windows: clickAndDrag — click-press-move-release drag
  • +
+
+ +
+

UI Patterns

+
    +
  • windows: invoke, windows: expand, windows: collapse
  • +
  • windows: toggle, windows: setValue, windows: getValue
  • +
  • windows: select, windows: addToSelection, windows: removeFromSelection
  • +
  • windows: selectedItem, windows: allSelectedItems, windows: isMultiple
  • +
  • windows: scrollIntoView, windows: setFocus
  • +
+
+ +
+

Window Management

+
    +
  • windows: maximize, windows: minimize, windows: restore, windows: close
  • +
  • windows: getMonitors — multi-monitor info
  • +
  • windows: getWindowElement — root element of current window
  • +
  • windows: launchApp, windows: closeApp
  • +
+
+ +
+

System

+
    +
  • windows: setClipboard, windows: getClipboard
  • +
  • windows: startRecordingScreen, windows: stopRecordingScreen
  • +
  • windows: deleteFile, windows: deleteFolder
  • +
  • windows: pushCacheRequest — UIA RawView element caching
  • +
+
+
+
+
+ + + + + + + + + + + diff --git a/docs/logo.png b/docs/logo.png new file mode 100644 index 0000000..a47aa7a Binary files /dev/null and b/docs/logo.png differ diff --git a/docs/mcp-demo.mp4 b/docs/mcp-demo.mp4 new file mode 100644 index 0000000..8d3ac0e Binary files /dev/null and b/docs/mcp-demo.mp4 differ diff --git a/docs/script.js b/docs/script.js new file mode 100644 index 0000000..909981f --- /dev/null +++ b/docs/script.js @@ -0,0 +1,65 @@ +/* ============================================================ + Language tabs + ============================================================ */ +document.querySelectorAll('.lang-tabs').forEach(tabBar => { + const tabs = tabBar.querySelectorAll('.lang-tab'); + const codeBlock = tabBar.nextElementSibling; // .tabbed-code + + tabs.forEach(tab => { + tab.addEventListener('click', () => { + const lang = tab.dataset.lang; + + // Update active tab + tabs.forEach(t => delete t.dataset.active); + tab.dataset.active = ''; + + // Show matching pre + codeBlock.querySelectorAll('pre').forEach(pre => { + delete pre.dataset.active; + if (pre.dataset.lang === lang) pre.dataset.active = ''; + }); + }); + }); +}); + +/* ============================================================ + Copy buttons + ============================================================ */ +const ICON_COPY = ``; +const ICON_CHECK = ``; + +function makeCopyBtn() { + const btn = document.createElement('button'); + btn.className = 'copy-btn'; + btn.setAttribute('aria-label', 'Copy'); + btn.innerHTML = ICON_COPY; + return btn; +} + +function flashCopied(btn) { + btn.innerHTML = ICON_CHECK; + btn.classList.add('copied'); + setTimeout(() => { btn.innerHTML = ICON_COPY; btn.classList.remove('copied'); }, 1800); +} + +function addCopyButton(wrap) { + const btn = makeCopyBtn(); + btn.addEventListener('click', () => { + const code = wrap.querySelector('pre[data-active], pre') || wrap; + navigator.clipboard.writeText(code.innerText.trim()).then(() => flashCopied(btn)); + }); + wrap.appendChild(btn); +} + +document.querySelectorAll('.code-block-wrap').forEach(addCopyButton); + +// Hero install block +const heroInstall = document.querySelector('.hero-install'); +if (heroInstall) { + const btn = makeCopyBtn(); + btn.classList.add('hero-install-copy'); + btn.addEventListener('click', () => { + navigator.clipboard.writeText(heroInstall.querySelector('code').innerText.trim()).then(() => flashCopied(btn)); + }); + heroInstall.appendChild(btn); +} diff --git a/docs/style.css b/docs/style.css new file mode 100644 index 0000000..14d3a45 --- /dev/null +++ b/docs/style.css @@ -0,0 +1,706 @@ +/* ============================================================ + Reset & Custom Properties + ============================================================ */ +*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } + +:root { + --bg: #ffffff; + --bg-card: #f8fafb; + --bg-code: #f1f5f7; + --border: #e2e8ee; + --accent: #3ecfb2; + --accent-dim: #2baf96; + --text: #111827; + --text-muted: #6b7280; + --radius: 8px; + --font-body: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; + --font-code: 'JetBrains Mono', 'Cascadia Code', 'Consolas', monospace; + --max-width: 1100px; +} + +html { scroll-behavior: smooth; } + +body { + background: var(--bg); + color: var(--text); + font-family: var(--font-body); + font-size: 16px; + line-height: 1.7; +} + +a { color: var(--accent); text-decoration: none; } +a:hover { text-decoration: underline; } + +/* ============================================================ + Layout Helpers + ============================================================ */ +.container { + max-width: var(--max-width); + margin: 0 auto; + padding: 0 24px; +} + +section { padding: 80px 0; } +section + section { border-top: 1px solid var(--border); } +section:nth-child(even) { background: var(--bg-card); } + +h1, h2, h3 { line-height: 1.25; } +h2 { font-size: 2rem; margin-bottom: 12px; } +h3 { font-size: 1.25rem; margin-bottom: 8px; } + +.section-label { + font-size: 0.75rem; + font-weight: 600; + letter-spacing: 0.12em; + text-transform: uppercase; + color: var(--accent); + margin-bottom: 12px; +} + +.section-intro { + color: var(--text-muted); + max-width: 640px; + margin-bottom: 40px; +} + +/* ============================================================ + Nav + ============================================================ */ +nav { + position: sticky; + top: 0; + z-index: 100; + background: rgba(255,255,255,0.92); + backdrop-filter: blur(8px); + border-bottom: 1px solid var(--border); + box-shadow: 0 1px 3px rgba(0,0,0,0.06); +} + +.nav-inner { + display: flex; + align-items: center; + justify-content: space-between; + height: 56px; + max-width: var(--max-width); + margin: 0 auto; + padding: 0 24px; +} + +.nav-brand { + font-weight: 700; + font-size: 0.95rem; + color: var(--text); + display: flex; + align-items: center; + gap: 8px; +} + +.nav-brand span { color: var(--accent); } + +.nav-logo { + height: 26px; + width: auto; + border-radius: 5px; + display: block; +} + +.nav-links { + display: flex; + gap: 24px; + list-style: none; +} + +.nav-links a { + color: var(--text-muted); + font-size: 0.875rem; + transition: color 0.15s; +} +.nav-links a:hover { color: var(--text); text-decoration: none; } + +/* ============================================================ + Hero + ============================================================ */ +#hero { + padding: 100px 0 80px; + border-top: none; +} + +.hero-eyebrow { + display: inline-flex; + align-items: center; + gap: 8px; + font-size: 0.8rem; + font-weight: 600; + letter-spacing: 0.1em; + text-transform: uppercase; + color: var(--accent-dim); + background: rgba(62,207,178,0.1); + border: 1px solid rgba(62,207,178,0.3); + border-radius: 100px; + padding: 4px 12px; + margin-bottom: 24px; +} + +.hero-eyebrow::before { + content: ''; + width: 6px; height: 6px; + border-radius: 50%; + background: var(--accent-dim); +} + +#hero h1 { + font-size: clamp(2.4rem, 5vw, 3.8rem); + font-weight: 800; + letter-spacing: -0.02em; + margin-bottom: 16px; + max-width: 800px; +} + +#hero h1 .accent { color: var(--accent); } + +.hero-pitch { + font-size: 1.15rem; + color: var(--text-muted); + max-width: 620px; + margin-bottom: 24px; +} + +.hero-install-row { + display: flex; + align-items: center; + gap: 16px; + flex-wrap: wrap; + margin-bottom: 32px; +} + +.hero-install { + position: relative; + background: var(--bg-code); + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 14px 48px 14px 20px; + display: inline-block; +} + +.hero-install-copy { + position: absolute; + top: 50%; right: 12px; + transform: translateY(-50%); + display: flex; + align-items: center; + justify-content: center; + width: 26px; height: 26px; + background: none; + border: 1px solid var(--border); + color: var(--text-muted); + border-radius: 4px; + cursor: pointer; + transition: background 0.15s, color 0.15s; +} +.hero-install-copy:hover { background: rgba(0,0,0,0.05); color: var(--text); } +.hero-install-copy.copied { color: #0f7a5c; } + +.hero-install pre, +.hero-install pre code { + margin: 0; + padding: 0; + background: none; + border: none; + font-family: var(--font-code); + font-size: 0.875rem; + color: #0f7a5c; +} + +.hero-actions { + display: flex; + align-items: center; + flex-wrap: wrap; + gap: 16px; + margin-bottom: 48px; +} + +.install-block { + display: flex; + align-items: center; + background: var(--bg-code); + border: 1px solid var(--border); + border-radius: var(--radius); + overflow: hidden; + font-family: var(--font-code); + font-size: 0.85rem; +} + +.install-block code { + padding: 10px 16px; + color: #0f7a5c; +} + +.install-block .copy-btn { + padding: 10px 14px; + background: transparent; + border: none; + border-left: 1px solid var(--border); + color: var(--text-muted); + cursor: pointer; + font-size: 0.75rem; + font-family: var(--font-body); + transition: color 0.15s, background 0.15s; + white-space: nowrap; +} +.install-block .copy-btn:hover { background: #e8f4f1; color: var(--text); } + +.btn-primary { + display: inline-flex; + align-items: center; + gap: 8px; + background: var(--accent); + color: #fff; + padding: 10px 20px; + border-radius: var(--radius); + font-weight: 600; + font-size: 0.9rem; + transition: background 0.15s; +} +.btn-primary:hover { background: var(--accent-dim); text-decoration: none; } + +.badges { + display: flex; + flex-wrap: wrap; + gap: 8px; +} + +.badge { + font-size: 0.75rem; + font-weight: 600; + padding: 3px 10px; + border-radius: 100px; + border: 1px solid var(--border); + color: var(--text-muted); + background: var(--bg-card); +} + +/* ============================================================ + Video sections + ============================================================ */ +.video-outer { + max-width: 860px; +} + +.video-wrapper { + position: relative; + padding-top: 56.25%; + border-radius: var(--radius); + overflow: hidden; + border: 1px solid var(--border); + background: var(--bg-card); +} + +.video-wrapper iframe, +.video-wrapper video { + position: absolute; + inset: 0; + width: 100%; height: 100%; + border: none; + object-fit: contain; +} + +.video-placeholder-box { + position: absolute; + inset: 0; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + gap: 12px; + color: var(--text-muted); + font-size: 0.9rem; +} + +.video-placeholder-box .play-icon { + width: 56px; height: 56px; + border: 2px solid var(--border); + border-radius: 50%; + display: flex; align-items: center; justify-content: center; + color: var(--text-muted); +} + +/* ============================================================ + Comparison table (Why Not WinAppDriver) + ============================================================ */ +.comparison-table { + width: 100%; + border-collapse: collapse; + font-size: 0.9rem; +} + +.comparison-table th { + text-align: left; + padding: 12px 16px; + background: var(--bg-card); + border-bottom: 2px solid var(--border); + color: var(--text-muted); + font-weight: 600; + font-size: 0.8rem; + text-transform: uppercase; + letter-spacing: 0.08em; +} + +.comparison-table td { + padding: 14px 16px; + border-bottom: 1px solid var(--border); + vertical-align: top; +} + +.comparison-table tr:last-child td { border-bottom: none; } + +.comparison-table td:first-child { color: var(--text-muted); } +.comparison-table td:last-child { color: var(--text); } + +.check { color: #2baf96; font-weight: 700; margin-right: 6px; } +.cross { color: #e05252; font-weight: 700; margin-right: 6px; } + +/* ============================================================ + App type chips + ============================================================ */ +.app-chips { + display: flex; + flex-wrap: wrap; + gap: 12px; +} + +.app-chip { + display: flex; + align-items: center; + gap: 10px; + background: #fff; + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 12px 20px; + font-weight: 600; + font-size: 0.9rem; + box-shadow: 0 1px 3px rgba(0,0,0,0.05); +} + +.app-chip .chip-icon { + font-size: 1.25rem; +} + +/* ============================================================ + Feature cards + ============================================================ */ +.feature-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); + gap: 20px; +} + +.feature-card { + background: #fff; + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 24px; + transition: border-color 0.2s, box-shadow 0.2s; + box-shadow: 0 1px 4px rgba(0,0,0,0.05); +} + +.feature-card:hover { border-color: var(--accent); box-shadow: 0 4px 12px rgba(62,207,178,0.12); } + +.feature-icon { + font-size: 1.75rem; + margin-bottom: 14px; +} + +.feature-card h3 { margin-bottom: 8px; } +.feature-card p { color: var(--text-muted); font-size: 0.9rem; } + +/* ============================================================ + Tables (generic) + ============================================================ */ +.data-table-wrap { overflow-x: auto; } + +.data-table { + width: 100%; + border-collapse: collapse; + font-size: 0.875rem; +} + +.data-table th { + text-align: left; + padding: 10px 14px; + background: var(--bg-card); + border-bottom: 2px solid var(--border); + color: var(--text-muted); + font-size: 0.78rem; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.08em; + white-space: nowrap; +} + +.data-table td { + padding: 10px 14px; + border-bottom: 1px solid var(--border); + vertical-align: top; +} + +.data-table tr:last-child td { border-bottom: none; } + +.data-table code { + font-family: var(--font-code); + font-size: 0.82rem; + background: var(--bg-code); + border: 1px solid var(--border); + padding: 1px 6px; + border-radius: 4px; + color: #0f7a5c; + white-space: nowrap; +} + +/* ============================================================ + MCP Steps + ============================================================ */ +.mcp-steps { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(260px, 1fr)); + gap: 20px; + margin-bottom: 48px; +} + +.mcp-step { + background: #fff; + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 24px; + position: relative; + box-shadow: 0 1px 4px rgba(0,0,0,0.05); +} + +.step-num { + display: inline-flex; + align-items: center; + justify-content: center; + width: 32px; height: 32px; + background: rgba(62,207,178,0.12); + border: 1px solid rgba(62,207,178,0.35); + color: var(--accent-dim); + border-radius: 50%; + font-weight: 700; + font-size: 0.85rem; + margin-bottom: 14px; +} + +.mcp-step p { color: var(--text-muted); font-size: 0.9rem; } + +/* MCP tool group table rows */ +.mcp-group-table td:first-child { white-space: nowrap; font-weight: 600; color: var(--accent); } + +/* ============================================================ + Code blocks + ============================================================ */ +.code-block-wrap { + position: relative; + background: var(--bg-code); + border: 1px solid var(--border); + border-radius: var(--radius); + overflow: hidden; + box-shadow: 0 1px 3px rgba(0,0,0,0.05); +} + +.code-block-wrap pre { + margin: 0; + padding: 20px; + overflow-x: auto; + font-size: 0.85rem; + line-height: 1.6; +} + +.code-block-wrap .copy-btn { + position: absolute; + top: 44px; right: 10px; + display: flex; + align-items: center; + justify-content: center; + width: 28px; height: 28px; + background: rgba(255,255,255,0.8); + border: 1px solid var(--border); + color: var(--text-muted); + border-radius: 4px; + cursor: pointer; + transition: background 0.15s, color 0.15s; +} +.code-block-wrap .copy-btn:hover { background: #fff; color: var(--text); } +.code-block-wrap .copy-btn.copied { color: #0f7a5c; } + +/* Language tabs */ +.lang-tabs { + display: flex; + gap: 0; + border-bottom: 1px solid var(--border); + background: rgba(0,0,0,0.02); + overflow-x: auto; +} + +.lang-tab { + padding: 8px 18px; + font-size: 0.82rem; + font-weight: 600; + color: var(--text-muted); + cursor: pointer; + border-bottom: 2px solid transparent; + white-space: nowrap; + transition: color 0.15s, border-color 0.15s; + background: none; + border-top: none; + border-left: none; + border-right: none; + font-family: var(--font-body); +} + +.lang-tab[data-active] { + color: var(--accent); + border-bottom-color: var(--accent); +} + +.tabbed-code { position: relative; } + +.tabbed-code pre { display: none; } +.tabbed-code pre[data-active] { display: block; } + +/* ============================================================ + executeScript groups + ============================================================ */ +.ext-groups { + display: grid; + grid-template-columns: repeat(2, 1fr); + gap: 20px; +} + +.ext-group { + background: #fff; + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 20px; + box-shadow: 0 1px 4px rgba(0,0,0,0.05); +} + +.ext-group h3 { + font-size: 0.85rem; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--accent); + margin-bottom: 12px; +} + +.ext-group ul { + list-style: none; + display: flex; + flex-direction: column; + gap: 6px; +} + +.ext-group li { + font-size: 0.85rem; + color: var(--text-muted); +} + +.ext-group li code { + font-family: var(--font-code); + font-size: 0.8rem; + color: #0f7a5c; + background: rgba(62,207,178,0.1); + padding: 1px 5px; + border-radius: 3px; +} + +/* ============================================================ + Install section + ============================================================ */ +.install-steps { + display: flex; + flex-direction: column; + gap: 20px; + max-width: 720px; +} + +.req-list { + list-style: none; + display: flex; + flex-direction: column; + gap: 8px; +} + +.req-list li { + display: flex; + align-items: center; + gap: 10px; + font-size: 0.9rem; + color: var(--text-muted); +} + +.req-list li::before { + content: '✓'; + color: #2baf96; + font-weight: 700; + flex-shrink: 0; +} + +/* ============================================================ + Footer + ============================================================ */ +footer { + border-top: 1px solid var(--border); + padding: 40px 0; + color: var(--text-muted); + font-size: 0.875rem; +} + +.footer-inner { + max-width: var(--max-width); + margin: 0 auto; + padding: 0 24px; + display: flex; + flex-wrap: wrap; + justify-content: space-between; + align-items: center; + gap: 16px; +} + +.footer-links { + display: flex; + flex-wrap: wrap; + gap: 20px; + list-style: none; +} + +.footer-links a { color: var(--text-muted); transition: color 0.15s; } +.footer-links a:hover { color: var(--text); text-decoration: none; } + +.footer-logo { + height: 20px; + width: auto; + border-radius: 3px; + vertical-align: middle; + margin: 0 2px; + display: inline-block; +} + +/* ============================================================ + Utilities + ============================================================ */ +.mt-4 { margin-top: 16px; } +.mt-8 { margin-top: 32px; } +.mt-12 { margin-top: 48px; } + +/* ============================================================ + Responsive + ============================================================ */ +@media (max-width: 640px) { + nav .nav-links { display: none; } + #hero h1 { font-size: 2rem; } + .install-block code { font-size: 0.75rem; } + .feature-grid { grid-template-columns: 1fr; } + .mcp-steps { grid-template-columns: 1fr; } + .ext-groups { grid-template-columns: 1fr; } + +} diff --git a/eslint.config.mjs b/eslint.config.mjs index d82d831..c890803 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -11,4 +11,9 @@ export default defineConfig( { files: ['test/e2e/**/*.ts'], }, + { + rules: { + '@typescript-eslint/no-unused-vars': ['warn', { argsIgnorePattern: '^_' }], + }, + }, ); diff --git a/examples/C#/CalculatorTest/CalculatorTest/CalculatorSession.cs b/examples/C#/CalculatorTest/CalculatorTest/CalculatorSession.cs index e87d1a3..d99d0b7 100644 --- a/examples/C#/CalculatorTest/CalculatorTest/CalculatorSession.cs +++ b/examples/C#/CalculatorTest/CalculatorTest/CalculatorSession.cs @@ -22,7 +22,7 @@ protected static void Setup() var appiumOptions = new AppiumOptions { App = CalculatorAppId, - AutomationName = "NovaWindows", + AutomationName = "DesktopDriver", PlatformName = "Windows", }; @@ -41,4 +41,4 @@ protected static void TearDown() Session.Quit(); Session = null; } -} \ No newline at end of file +} diff --git a/lib/commands/app.ts b/lib/commands/app.ts index e82d5ed..3c10ffa 100644 --- a/lib/commands/app.ts +++ b/lib/commands/app.ts @@ -13,7 +13,7 @@ import { pwsh$, pwsh, } from '../powershell'; -import { sleep } from '../util'; +import { isUwpAppId, sleep } from '../util'; import { errors, W3C_ELEMENT_KEY } from '@appium/base-driver'; import { getWindowAllHandlesForProcessIds, @@ -91,6 +91,11 @@ export async function getWindowHandle(this: NovaWindowsDriver): Promise } export async function getWindowHandles(this: NovaWindowsDriver): Promise { + if (this.appProcessIds.length > 0 && !this.caps.returnAllWindowHandles) { + const handles = getWindowAllHandlesForProcessIds(this.appProcessIds); + return handles.map((h) => `0x${h.toString(16).padStart(8, '0')}`); + } + const result = await this.sendPowerShellCommand(AutomationElement.rootElement.findAll(TreeScope.CHILDREN, new TrueCondition()).buildCommand()); const elIds = result.split('\n').map((x) => x.trim()).filter(Boolean); const nativeWindowHandles: string[] = []; @@ -105,7 +110,9 @@ export async function getWindowHandles(this: NovaWindowsDriver): Promise { const handle = Number(nameOrHandle); - for (let i = 1; i <= 20; i++) { // TODO: make a setting for the number of retries or timeout + const maxRetries = this.caps['ms:windowSwitchRetries'] ?? 20; + const sleepInterval = this.caps['ms:windowSwitchInterval'] ?? SLEEP_INTERVAL_MS; + for (let i = 1; i <= maxRetries; i++) { if (!isNaN(handle)) { const condition = new PropertyCondition(Property.NATIVE_WINDOW_HANDLE, new PSInt32(handle)); const elementId = await this.sendPowerShellCommand(AutomationElement.rootElement.findFirst(TreeScope.CHILDREN_OR_SELF, condition).buildCommand()); @@ -128,8 +135,8 @@ export async function setWindow(this: NovaWindowsDriver, nameOrHandle: string): return; } - this.log.info(`Failed to locate window with name '${name}'. Sleeping for ${SLEEP_INTERVAL_MS} milliseconds and retrying... (${i}/20)`); // TODO: make a setting for the number of retries or timeout - await sleep(SLEEP_INTERVAL_MS); // TODO: make a setting for the sleep timeout + this.log.info(`Failed to locate window with name '${name}'. Sleeping for ${sleepInterval} milliseconds and retrying... (${i}/${maxRetries})`); + await sleep(sleepInterval); } throw new errors.NoSuchWindowError(`No window was found with name or handle '${nameOrHandle}'.`); @@ -163,6 +170,11 @@ export async function changeRootElement(this: NovaWindowsDriver, pathOrNativeWin if (elementId.trim() !== '') { await this.sendPowerShellCommand(/* ps1 */ `$rootElement = ${new FoundAutomationElement(elementId).buildCommand()}`); trySetForegroundWindow(nativeWindowHandle); + const pidResult = await this.sendPowerShellCommand(`$rootElement.Current.ProcessId`); + const pid = Number(pidResult.trim()); + if (!isNaN(pid) && pid > 0) { + this.appProcessIds = [pid]; + } return; } @@ -171,53 +183,35 @@ export async function changeRootElement(this: NovaWindowsDriver, pathOrNativeWin const path = pathOrNativeWindowHandle; - if (path.includes('!') && path.includes('_') && !(path.includes('/') || path.includes('\\'))) { + if (isUwpAppId(path)) { this.log.debug('Detected app path to be in the UWP format.'); await this.sendPowerShellCommand(/* ps1 */ `Start-Process 'explorer.exe' 'shell:AppsFolder\\${path}'${this.caps.appArguments ? ` -ArgumentList '${this.caps.appArguments}'` : ''}`); - await sleep((this.caps['ms:waitForAppLaunch'] ?? 0) * 1000 || SLEEP_INTERVAL_MS); - for (let i = 1; i <= 20; i++) { - const result = await this.sendPowerShellCommand(/* ps1 */ `(Get-Process -Name 'ApplicationFrameHost').Id`); - const processIds = result.split('\n').map((pid) => pid.trim()).filter(Boolean).map(Number); - - this.log.debug('Process IDs of ApplicationFrameHost processes: ' + processIds.join(', ')); - try { - await this.attachToApplicationWindow(processIds); - return; - } catch { - // noop - } - - this.log.info(`Failed to locate window of the app. Sleeping for ${SLEEP_INTERVAL_MS} milliseconds and retrying... (${i}/20)`); // TODO: make a setting for the number of retries or timeout - await sleep(SLEEP_INTERVAL_MS); // TODO: make a setting for the sleep timeout + const result = await this.sendPowerShellCommand(/* ps1 */ `(Get-Process -Name 'ApplicationFrameHost').Id`); + const processIds = result.split('\n').map((pid) => pid.trim()).filter(Boolean).map(Number); + this.log.debug(`Process IDs of ApplicationFrameHost processes (${processIds.length}): ` + processIds.join(', ')); + this.appProcessIds = processIds; + await this.attachToApplicationWindow(processIds); + const attachedPid = Number((await this.sendPowerShellCommand(`$rootElement.Current.ProcessId`)).trim()); + if (!isNaN(attachedPid) && attachedPid > 0) { + this.appProcessIds = [attachedPid]; } } else { this.log.debug('Detected app path to be in the classic format.'); const normalizedPath = normalize(path); await this.sendPowerShellCommand(/* ps1 */ `Start-Process '${normalizedPath}'${this.caps.appArguments ? ` -ArgumentList '${this.caps.appArguments}'` : ''}`); - await sleep((this.caps['ms:waitForAppLaunch'] ?? 0) * 1000 || 500); - for (let i = 1; i <= 20; i++) { - try { - const breadcrumbs = normalizedPath.toLowerCase().split('\\').flatMap((x) => x.split('/')); - const executable = breadcrumbs[breadcrumbs.length - 1]; - const processName = executable.endsWith('.exe') ? executable.slice(0, executable.length - 4) : executable; - const result = await this.sendPowerShellCommand(/* ps1 */ `(Get-Process -Name '${processName}' | Sort-Object StartTime -Descending).Id`); - const processIds = result.split('\n').map((pid) => pid.trim()).filter(Boolean).map(Number); - this.log.debug(`Process IDs of '${processName}' processes: ` + processIds.join(', ')); - - await this.attachToApplicationWindow(processIds); - return; - } catch (err) { - if (err instanceof Error) { - this.log.debug(`Received error:\n${err.message}`); - } - } - - this.log.info(`Failed to locate window of the app. Sleeping for ${SLEEP_INTERVAL_MS} milliseconds and retrying... (${i}/20)`); // TODO: make a setting for the number of retries or timeout - await sleep(SLEEP_INTERVAL_MS); // TODO: make a setting for the sleep timeout + const breadcrumbs = normalizedPath.toLowerCase().split('\\').flatMap((x) => x.split('/')); + const executable = breadcrumbs[breadcrumbs.length - 1]; + const processName = executable.endsWith('.exe') ? executable.slice(0, executable.length - 4) : executable; + const result = await this.sendPowerShellCommand(/* ps1 */ `(Get-Process -Name '${processName}' | Sort-Object StartTime -Descending).Id`); + const processIds = result.split('\n').map((pid) => pid.trim()).filter(Boolean).map(Number); + this.log.debug(`Process IDs of '${processName}' processes: ` + processIds.join(', ')); + this.appProcessIds = processIds; + await this.attachToApplicationWindow(processIds); + const attachedPid = Number((await this.sendPowerShellCommand(`$rootElement.Current.ProcessId`)).trim()); + if (!isNaN(attachedPid) && attachedPid > 0) { + this.appProcessIds = [attachedPid]; } } - - throw new errors.UnknownError('Failed to locate window of the app.'); } export async function back(this: NovaWindowsDriver): Promise { @@ -282,45 +276,57 @@ export async function setWindowRect( return await this.getWindowRect(); } -export async function waitForNewWindow(this: NovaWindowsDriver, pid: number, timeout: number): Promise { + +export async function attachToApplicationWindow(this: NovaWindowsDriver, processIds: number[]): Promise { + const trackedPids = new Set(processIds); + this.log.debug(`Attaching to application window. Process IDs: [${[...trackedPids].join(', ')}]`); + const timeout = (this.caps['ms:waitForAppLaunch'] ?? 0) * 1000 || SLEEP_INTERVAL_MS * 20; const start = Date.now(); let attempts = 0; while (Date.now() - start < timeout) { - const handles = getWindowAllHandlesForProcessIds([pid]); + // Discover child processes of all currently-tracked PIDs + const pidList = [...trackedPids].join(', '); + const childPidResult = await this.sendPowerShellCommand( + /* ps1 */ `@(Get-CimInstance Win32_Process | Where-Object { $_.ParentProcessId -in @(${pidList}) }).ProcessId` + ); + for (const token of childPidResult.split('\n').map((s) => s.trim()).filter(Boolean)) { + const childPid = Number(token); + if (!isNaN(childPid) && childPid > 0 && !trackedPids.has(childPid)) { + this.log.debug(`Discovered child process PID ${childPid} spawned by tracked PIDs`); + trackedPids.add(childPid); + } + } + + const currentPids = [...trackedPids]; + const handles = getWindowAllHandlesForProcessIds(currentPids); if (handles.length > 0) { - return handles[handles.length - 1]; + this.log.debug(`Found ${handles.length} window handle(s) for PIDs [${currentPids.join(', ')}]: ${handles.map((h) => `0x${h.toString(16).padStart(8, '0')}`).join(', ')}`); + + for (const handle of handles) { + const elementId = await this.sendPowerShellCommand(AutomationElement.rootElement.findFirst(TreeScope.CHILDREN, new PropertyCondition(Property.NATIVE_WINDOW_HANDLE, new PSInt32(handle))).buildCommand()); + + if (elementId.trim()) { + await this.sendPowerShellCommand(/* ps1 */ `$rootElement = ${new FoundAutomationElement(elementId).buildCommand()}`); + if ((await this.sendPowerShellCommand(/* ps1 */ `$null -ne $rootElement`)).toLowerCase() === 'true') { + const confirmedHandle = Number(await this.sendPowerShellCommand(AutomationElement.automationRoot.buildGetPropertyCommand(Property.NATIVE_WINDOW_HANDLE))); + this.log.info(`Successfully attached to window. Native window handle: 0x${confirmedHandle.toString(16).padStart(8, '0')}`); + this.appProcessIds = currentPids; + if (!trySetForegroundWindow(confirmedHandle)) { + await this.focusElement({ + [W3C_ELEMENT_KEY]: elementId, + } satisfies Element); + } + return; + } + } + } } - this.log.debug(`Waiting for the process window to appear... (${++attempts}/${Math.floor(timeout / SLEEP_INTERVAL_MS)})`); + this.log.debug(`No attachable window found yet. Sleeping for ${SLEEP_INTERVAL_MS} milliseconds and retrying... (${++attempts}/${Math.floor(timeout / SLEEP_INTERVAL_MS)})`); await sleep(SLEEP_INTERVAL_MS); } - throw new Error('Timed out waiting for window.'); -} - -export async function attachToApplicationWindow(this: NovaWindowsDriver, processIds: number[]): Promise { - const nativeWindowHandle = await waitForNewWindow.call(this, processIds[0], this.caps['ms:waitForAppLaunch'] ?? SLEEP_INTERVAL_MS * 20); - - let elementId = ''; - for (let i = 1; i <= 20; i++) { - elementId = await this.sendPowerShellCommand(AutomationElement.rootElement.findFirst(TreeScope.CHILDREN, new PropertyCondition(Property.NATIVE_WINDOW_HANDLE, new PSInt32(nativeWindowHandle))).buildCommand()); - if (elementId) { - break; - } - this.log.info(`The window with handle 0x${nativeWindowHandle.toString(16).padStart(8, '0')} is not yet available in the UI Automation tree. Sleeping for ${SLEEP_INTERVAL_MS} milliseconds and retrying... (${i}/20)`); // TODO: make a setting for the number of retries or timeout - await sleep(SLEEP_INTERVAL_MS); // TODO: make a setting for the sleep timeout - } - - await this.sendPowerShellCommand(/* ps1 */ `$rootElement = ${new FoundAutomationElement(elementId).buildCommand()}`); - if ((await this.sendPowerShellCommand(/* ps1 */ `$null -ne $rootElement`)).toLowerCase() === 'true') { - const nativeWindowHandle = Number(await this.sendPowerShellCommand(AutomationElement.automationRoot.buildGetPropertyCommand(Property.NATIVE_WINDOW_HANDLE))); - if (!trySetForegroundWindow(nativeWindowHandle)) { - await this.focusElement({ - [W3C_ELEMENT_KEY]: elementId, - } satisfies Element); - }; - return; - } + throw new Error('Timed out waiting to attach to application window.'); } diff --git a/lib/commands/device.ts b/lib/commands/device.ts index e595044..489f12c 100644 --- a/lib/commands/device.ts +++ b/lib/commands/device.ts @@ -1,5 +1,12 @@ +import { normalize } from 'node:path'; +import { errors } from '@appium/base-driver'; import { NovaWindowsDriver } from '../driver'; -import { PSString, pwsh$ } from '../powershell'; +import { PSString, pwsh, pwsh$ } from '../powershell'; +import { MODIFY_FS_FEATURE } from '../constants'; +import { isUwpAppId, sleep } from '../util'; + +const TERMINATE_POLL_INTERVAL_MS = 200; +const TERMINATE_TIMEOUT_MS = 10_000; const GET_SYSTEM_TIME_COMMAND = pwsh$ /* ps1 */ `(Get-Date).ToString(${0})`; const ISO_8061_FORMAT = 'yyyy-MM-ddTHH:mm:sszzz'; @@ -9,33 +16,257 @@ export async function getDeviceTime(this: NovaWindowsDriver, _sessionId?: string return await this.sendPowerShellCommand(GET_SYSTEM_TIME_COMMAND.format(fmt)); } -// command: 'hideKeyboard' -// payloadParams: { optional: ['strategy', 'key', 'keyCode', 'keyName'] } +// ─── File operations ───────────────────────────────────────────────────────── -// command: 'isKeyboardShown' +const PUSH_FILE_COMMAND = pwsh$ /* ps1 */ ` + $path = ${0} + $parentDir = [IO.Path]::GetDirectoryName($path) + if ($parentDir) { [IO.Directory]::CreateDirectory($parentDir) | Out-Null } + [IO.File]::WriteAllBytes($path, [Convert]::FromBase64String(${1})) +`; -// command: 'pushFile' -// payloadParams: { required: ['path', 'data'] } +export async function pushFile(this: NovaWindowsDriver, path: string, data: string): Promise { + this.assertFeatureEnabled(MODIFY_FS_FEATURE); + if (!path) {throw new errors.InvalidArgumentError("'path' must be provided.");} + if (!data) {throw new errors.InvalidArgumentError("'data' must be provided.");} + await this.sendPowerShellCommand( + PUSH_FILE_COMMAND.format(new PSString(path).toString(), new PSString(data).toString()) + ); +} -// command: 'pullFile' -// payloadParams: { required: ['path'] } +const PULL_FILE_COMMAND = pwsh$ /* ps1 */ `[Convert]::ToBase64String([IO.File]::ReadAllBytes(${0}))`; -// command: 'pullFolder' -// payloadParams: { required: ['path'] } +export async function pullFile(this: NovaWindowsDriver, path: string): Promise { + this.assertFeatureEnabled(MODIFY_FS_FEATURE); + if (!path) {throw new errors.InvalidArgumentError("'path' must be provided.");} + return await this.sendPowerShellCommand(PULL_FILE_COMMAND.format(new PSString(path).toString())); +} -// # APP MANAGEMENT +const PULL_FOLDER_COMMAND = pwsh$ /* ps1 */ ` + $srcPath = ${0} + $tempZip = [IO.Path]::GetTempFileName() + '.zip' + try { + Compress-Archive -LiteralPath $srcPath -DestinationPath $tempZip -ErrorAction Stop + [Convert]::ToBase64String([IO.File]::ReadAllBytes($tempZip)) + } finally { + if (Test-Path $tempZip) { Remove-Item $tempZip -Force } + } +`; -// command: 'activateApp' -// payloadParams: { required: [['appId'], ['bundleId']], optional: ['options'] } +export async function pullFolder(this: NovaWindowsDriver, path: string): Promise { + this.assertFeatureEnabled(MODIFY_FS_FEATURE); + if (!path) {throw new errors.InvalidArgumentError("'path' must be provided.");} + return await this.sendPowerShellCommand(PULL_FOLDER_COMMAND.format(new PSString(path).toString())); +} -// command: 'removeApp' -// payloadParams: { required: [['appId'], ['bundleId']], optional: ['options'] } +// ─── Keyboard ──────────────────────────────────────────────────────────────── -//command: 'terminateApp' -// payloadParams: { required: [['appId'], ['bundleId']], optional: ['options'] } +const HIDE_KEYBOARD_COMMAND = pwsh /* ps1 */ ` + $kb = Get-Process -Name 'TabTip','TextInputHost' -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($null -eq $kb) { return } + $kbEl = [System.Windows.Automation.AutomationElement]::RootElement.FindFirst( + [System.Windows.Automation.TreeScope]::Children, + [System.Windows.Automation.PropertyCondition]::new( + [System.Windows.Automation.AutomationElement]::ProcessIdProperty, + $kb.Id + ) + ) + if ($null -ne $kbEl) { + try { + $kbEl.GetCurrentPattern([System.Windows.Automation.WindowPattern]::Pattern).Close() + } catch { + Stop-Process -Id $kb.Id -Force -ErrorAction SilentlyContinue + } + } else { + Stop-Process -Id $kb.Id -Force -ErrorAction SilentlyContinue + } +`; + +export async function hideKeyboard( + this: NovaWindowsDriver, + _strategy?: string, + _key?: string, + _keyCode?: string, + _keyName?: string +): Promise { + await this.sendPowerShellCommand(HIDE_KEYBOARD_COMMAND); +} + +const IS_KEYBOARD_SHOWN_COMMAND = pwsh /* ps1 */ ` + $kb = Get-Process -Name 'TabTip','TextInputHost' -ErrorAction SilentlyContinue | Select-Object -First 1 + if ($null -eq $kb) { Write-Output 'false'; return } + $kbEl = [System.Windows.Automation.AutomationElement]::RootElement.FindFirst( + [System.Windows.Automation.TreeScope]::Children, + [System.Windows.Automation.PropertyCondition]::new( + [System.Windows.Automation.AutomationElement]::ProcessIdProperty, + $kb.Id + ) + ) + if ($null -eq $kbEl) { Write-Output 'false'; return } + if ($kbEl.GetCurrentPropertyValue([System.Windows.Automation.AutomationElement]::IsOffscreenProperty)) { + Write-Output 'false' + } else { + Write-Output 'true' + } +`; + +export async function isKeyboardShown(this: NovaWindowsDriver): Promise { + const result = await this.sendPowerShellCommand(IS_KEYBOARD_SHOWN_COMMAND); + return result.trim().toLowerCase() === 'true'; +} + +// ─── App management ────────────────────────────────────────────────────────── + +export async function activateApp( + this: NovaWindowsDriver, + appId: string, + _options?: Record +): Promise { + if (!appId) {throw new errors.InvalidArgumentError("'appId' or 'bundleId' must be provided.");} + + const isUwp = isUwpAppId(appId); + if (isUwp) { + await this.changeRootElement(appId); + return; + } + + const normalizedPath = normalize(appId); + const parts = normalizedPath.toLowerCase().split('\\').flatMap((x) => x.split('/')); + const executable = parts[parts.length - 1]; + const processName = (executable.endsWith('.exe') ? executable.slice(0, -4) : executable).replace(/'/g, "''"); + + const pidResult = await this.sendPowerShellCommand( + /* ps1 */ `(Get-Process -Name '${processName}' -ErrorAction SilentlyContinue | Sort-Object StartTime -Descending | Select-Object -First 1).Id` + ); + const existingPid = Number(pidResult.trim()); -// command: 'isAppInstalled' -// payloadParams: { required: [['appId'], ['bundleId']] } + if (!isNaN(existingPid) && existingPid > 0) { + const handleResult = await this.sendPowerShellCommand( + /* ps1 */ `(Get-Process -Id ${existingPid} -ErrorAction SilentlyContinue).MainWindowHandle` + ); + const handle = Number(handleResult.trim()); + if (!isNaN(handle) && handle > 0) { + await this.changeRootElement(handle); + return; + } + await this.attachToApplicationWindow([existingPid]); + return; + } + + await this.changeRootElement(appId); +} + +export async function terminateApp( + this: NovaWindowsDriver, + appId: string, + _options?: Record +): Promise { + if (!appId) {throw new errors.InvalidArgumentError("'appId' or 'bundleId' must be provided.");} + + const isUwp = isUwpAppId(appId); + + let killed: boolean; + if (isUwp) { + const safeFamily = new PSString(appId.split('!')[0]).toString(); + const checkResult = await this.sendPowerShellCommand( + /* ps1 */ ` + $pkg = Get-AppxPackage | Where-Object { $_.PackageFamilyName -eq ${safeFamily} } + if ($null -eq $pkg) { Write-Output 'none'; return } + $procs = Get-Process | Where-Object { $_.Path -like ($pkg.InstallLocation + '\\*') } + if (@($procs).Count -eq 0) { Write-Output 'none' } else { ($procs | Select-Object -ExpandProperty Id) -join ',' } + ` + ); + const pids = checkResult.trim(); + if (pids === 'none' || pids === '') { + await this.sendPowerShellCommand(/* ps1 */ `$rootElement = $null`).catch(() => {}); + return false; + } + await this.sendPowerShellCommand( + /* ps1 */ `Stop-Process -Id ${pids} -Force -ErrorAction SilentlyContinue` + ); + + const deadline = Date.now() + TERMINATE_TIMEOUT_MS; + killed = false; + while (Date.now() < deadline) { + await sleep(TERMINATE_POLL_INTERVAL_MS); + const stillRunning = await this.sendPowerShellCommand( + /* ps1 */ ` + $pkg = Get-AppxPackage | Where-Object { $_.PackageFamilyName -eq ${safeFamily} } + if ($null -eq $pkg) { 'false' } else { ($null -ne (Get-Process | Where-Object { $_.Path -like ($pkg.InstallLocation + '\\*') } | Select-Object -First 1)).ToString().ToLower() } + ` + ); + if (stillRunning.trim().toLowerCase() !== 'true') { + killed = true; + break; + } + } + } else { + const normalizedPath = normalize(appId); + const parts = normalizedPath.toLowerCase().split('\\').flatMap((x) => x.split('/')); + const executable = parts[parts.length - 1]; + const processName = (executable.endsWith('.exe') ? executable.slice(0, -4) : executable).replace(/'/g, "''"); + + const checkResult = await this.sendPowerShellCommand( + /* ps1 */ `$procs = Get-Process -Name '${processName}' -ErrorAction SilentlyContinue; if (@($procs).Count -eq 0) { Write-Output 'none' } else { ($procs | Select-Object -ExpandProperty Id) -join ',' }` + ); + const pids = checkResult.trim(); + if (pids === 'none' || pids === '') { + await this.sendPowerShellCommand(/* ps1 */ `$rootElement = $null`).catch(() => {}); + return false; + } + await this.sendPowerShellCommand( + /* ps1 */ `Stop-Process -Id ${pids} -Force -ErrorAction SilentlyContinue` + ); + + const deadline = Date.now() + TERMINATE_TIMEOUT_MS; + killed = false; + while (Date.now() < deadline) { + await sleep(TERMINATE_POLL_INTERVAL_MS); + const stillRunning = await this.sendPowerShellCommand( + /* ps1 */ `(Get-Process -Name '${processName}' -ErrorAction SilentlyContinue).Count -gt 0` + ); + if (stillRunning.trim().toLowerCase() !== 'true') { + killed = true; + break; + } + } + } + + await this.sendPowerShellCommand(/* ps1 */ `$rootElement = $null`).catch(() => {}); + return killed; +} + +export async function isAppInstalled(this: NovaWindowsDriver, appId: string): Promise { + if (!appId) {throw new errors.InvalidArgumentError("'appId' or 'bundleId' must be provided.");} + + const isUwp = isUwpAppId(appId); + if (isUwp) { + const safeFamily = new PSString(appId.split('!')[0]).toString(); + const result = await this.sendPowerShellCommand( + /* ps1 */ `if (@(Get-AppxPackage | Where-Object { $_.PackageFamilyName -eq ${safeFamily} }).Count -gt 0) { 'true' } else { 'false' }` + ); + return result.trim().toLowerCase() === 'true'; + } + + const hasPathSeparator = appId.includes('/') || appId.includes('\\'); + if (hasPathSeparator) { + const safePath = new PSString(appId).toString(); + const result = await this.sendPowerShellCommand( + /* ps1 */ `if (Test-Path -LiteralPath ${safePath}) { 'true' } else { 'false' }` + ); + return result.trim().toLowerCase() === 'true'; + } + + // Bare process name — search PATH + const safeName = new PSString(appId).toString(); + const result = await this.sendPowerShellCommand( + /* ps1 */ `if (Get-Command -Name ${safeName} -ErrorAction SilentlyContinue) { 'true' } else { 'false' }` + ); + return result.trim().toLowerCase() === 'true'; +} // command: 'installApp' // payloadParams: { required: ['appPath'], optional: ['options'] } + +// command: 'removeApp' +// payloadParams: { required: [['appId'], ['bundleId']], optional: ['options'] } diff --git a/lib/commands/extension.ts b/lib/commands/extension.ts index f67d559..4314e78 100644 --- a/lib/commands/extension.ts +++ b/lib/commands/extension.ts @@ -67,6 +67,7 @@ const EXTENSION_COMMANDS = Object.freeze({ clickAndDrag: 'executeClickAndDrag', getDeviceTime: 'windowsGetDeviceTime', getWindowElement: 'getWindowElement', + getMonitors: 'windowsGetMonitors', } as const); const ContentType = Object.freeze({ @@ -892,3 +893,23 @@ export async function getWindowElement(this: NovaWindowsDriver): Promise { + const result = await this.sendPowerShellCommand(GET_MONITORS_COMMAND); + return JSON.parse(result.trim()); +} diff --git a/lib/commands/powershell.ts b/lib/commands/powershell.ts index ded7f2f..5579d21 100644 --- a/lib/commands/powershell.ts +++ b/lib/commands/powershell.ts @@ -12,7 +12,16 @@ const NULL_ROOT_ELEMENT = /* ps1 */ `$rootElement = $null`; const INIT_ELEMENT_TABLE = /* ps1 */ `$elementTable = New-Object System.Collections.Generic.Dictionary[[string]\`,[AutomationElement]]`; export async function startPowerShellSession(this: NovaWindowsDriver): Promise { - const powerShell = spawn('powershell.exe', ['-NoExit', '-Command', '-']); + const spawnEnv = this.caps.appEnvironment + ? { ...process.env, ...(this.caps.appEnvironment as Record) } + : process.env; + + if (this.caps.appEnvironment) { + const keys = Object.keys(this.caps.appEnvironment as Record); + this.log.info(`Applying appEnvironment variables to PowerShell session: ${keys.join(', ')}`); + } + + const powerShell = spawn('powershell.exe', ['-NoExit', '-Command', '-'], { env: spawnEnv }); powerShell.stdout.setEncoding('utf8'); powerShell.stderr.setEncoding('utf8'); @@ -35,7 +44,7 @@ export async function startPowerShellSession(this: NovaWindowsDriver): Promise `%${envVar}%`).join(', ')}`); for (const envVar of envVars) { - this.caps.app = this.caps.app.replaceAll(`%${envVar}%`, process.env[envVar.toUpperCase()] ?? ''); + this.caps.app = this.caps.app.replaceAll(`%${envVar}%`, spawnEnv[envVar.toUpperCase()] ?? ''); } await this.changeRootElement(this.caps.app); @@ -93,10 +102,12 @@ export async function startPowerShellSession(this: NovaWindowsDriver): Promise { const magicNumber = 0xF2EE; - const powerShell = spawn('powershell.exe', ['-NoExit', '-Command', '-']); + const spawnEnv = this.caps.appEnvironment + ? { ...process.env, ...(this.caps.appEnvironment as Record) } + : process.env; + const powerShell = spawn('powershell.exe', ['-NoExit', '-Command', '-'], { env: spawnEnv }); try { powerShell.stdout.setEncoding('utf8'); - powerShell.stdout.setEncoding('utf8'); let localStdOut = ''; let localStdErr = ''; @@ -123,7 +134,7 @@ export async function sendIsolatedPowerShellCommand(this: NovaWindowsDriver, com } const envVars = Array.from(envVarsSet); for (const envVar of envVars) { - this.caps.appWorkingDir = this.caps.appWorkingDir.replaceAll(`%${envVar}%`, process.env[envVar.toUpperCase()] ?? ''); + this.caps.appWorkingDir = this.caps.appWorkingDir.replaceAll(`%${envVar}%`, spawnEnv[envVar.toUpperCase()] ?? ''); } powerShell.stdin.write(`Set-Location -Path '${this.caps.appWorkingDir}'\n`); } diff --git a/lib/constraints.ts b/lib/constraints.ts index e2f82d6..29dd3fd 100644 --- a/lib/constraints.ts +++ b/lib/constraints.ts @@ -36,12 +36,24 @@ export const UI_AUTOMATION_DRIVER_CONSTRAINTS = { isolatedScriptExecution: { isBoolean: true, }, + appEnvironment: { + isObject: true, + }, 'ms:waitForAppLaunch': { isNumber: true, }, 'ms:forcequit': { isBoolean: true, }, + returnAllWindowHandles: { + isBoolean: true, + }, + 'ms:windowSwitchRetries': { + isNumber: true, + }, + 'ms:windowSwitchInterval': { + isNumber: true, + }, } as const satisfies Constraints; export default UI_AUTOMATION_DRIVER_CONSTRAINTS; diff --git a/lib/driver.ts b/lib/driver.ts index 9a0b5fd..a52628c 100644 --- a/lib/driver.ts +++ b/lib/driver.ts @@ -20,7 +20,8 @@ import { convertStringToCondition, } from './powershell'; import { - assertSupportedEasingFunction + assertIntegerCap, + assertSupportedEasingFunction, } from './util'; import { setDpiAwareness } from './winapi/user32'; import { xpathToElIdOrIds } from './xpath'; @@ -60,6 +61,7 @@ export class NovaWindowsDriver extends BaseDriver) + .filter(([, v]) => typeof v !== 'string') + .map(([k]) => k); + if (invalidKeys.length > 0) { + throw new errors.InvalidArgumentError( + `Invalid capabilities. 'appEnvironment' values must be strings. Invalid keys: ${invalidKeys.join(', ')}` + ); + } + } + if (caps['ms:windowSwitchRetries'] !== undefined) { + assertIntegerCap('ms:windowSwitchRetries', caps['ms:windowSwitchRetries'], 0); + } + if (caps['ms:windowSwitchInterval'] !== undefined) { + assertIntegerCap('ms:windowSwitchInterval', caps['ms:windowSwitchInterval'], 0); + } if (this.caps.shouldCloseApp === undefined) { this.caps.shouldCloseApp = true; // set default value } diff --git a/lib/mcp/config.ts b/lib/mcp/config.ts new file mode 100644 index 0000000..8e95297 --- /dev/null +++ b/lib/mcp/config.ts @@ -0,0 +1,17 @@ +/** Infrastructure config — read from env vars at startup. */ +export interface McpConfig { + appiumHost: string; + appiumPort: number; +} + +export function loadConfig(): McpConfig { + const appiumPort = parseInt(process.env.APPIUM_PORT ?? '4723', 10); + if (isNaN(appiumPort) || appiumPort < 1 || appiumPort > 65535) { + throw new Error(`APPIUM_PORT must be a valid port number (1-65535), got: '${process.env.APPIUM_PORT}'`); + } + + return { + appiumHost: process.env.APPIUM_HOST ?? '127.0.0.1', + appiumPort, + }; +} diff --git a/lib/mcp/constants.ts b/lib/mcp/constants.ts new file mode 100644 index 0000000..6e10e30 --- /dev/null +++ b/lib/mcp/constants.ts @@ -0,0 +1,2 @@ +/** W3C WebDriver element reference key */ +export const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf'; diff --git a/lib/mcp/errors.ts b/lib/mcp/errors.ts new file mode 100644 index 0000000..cb3e6d7 --- /dev/null +++ b/lib/mcp/errors.ts @@ -0,0 +1,4 @@ +export function formatError(err: unknown): string { + if (err instanceof Error) {return `${err.constructor.name}: ${err.message}`;} + return String(err); +} diff --git a/lib/mcp/index.ts b/lib/mcp/index.ts new file mode 100644 index 0000000..f51421d --- /dev/null +++ b/lib/mcp/index.ts @@ -0,0 +1,87 @@ +#!/usr/bin/env node +import * as http from 'node:http'; +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; +import { loadConfig } from './config.js'; +import { AppiumSession } from './session.js'; +import { registerAllTools } from './tools/index.js'; + +function checkAppiumReachable(host: string, port: number): Promise { + return new Promise((resolve) => { + const req = http.get( + { hostname: host, port, path: '/status', timeout: 3000 }, + (res) => { + let body = ''; + res.on('data', (chunk) => { body += chunk; }); + res.on('end', () => { + try { resolve(JSON.parse(body)?.value?.ready === true); } + catch { resolve(false); } + }); + } + ); + req.on('error', () => resolve(false)); + req.on('timeout', () => { req.destroy(); resolve(false); }); + }); +} + +async function main() { + // Step 1: Load infrastructure config (host, port — no app required) + let config; + try { + config = loadConfig(); + } catch (err) { + process.stderr.write(`[MCP] Configuration error: ${err instanceof Error ? err.message : String(err)}\n`); + process.exit(1); + } + + // Step 2: Verify Appium is reachable + const { appiumHost: host, appiumPort: port } = config; + if (!await checkAppiumReachable(host, port)) { + process.stderr.write( + `[MCP] Appium is not running on ${host}:${port}.\n` + + `Start it first with: appium --port ${port}\n` + ); + process.exit(1); + } + process.stderr.write(`[MCP] Appium detected on ${host}:${port}\n`); + + // Step 3: Create session holder (no app launched yet — agent calls create_session) + const session = new AppiumSession(config); + + // Step 4: Create and configure MCP server + const server = new McpServer({ + name: 'desktop-driver-mcp', + version: '1.3.0', + }); + + // Step 5: Register all tools (including create_session / delete_session) + registerAllTools(server, session); + + // Step 6: Shutdown handler + let shuttingDown = false; + async function shutdown(reason: string) { + if (shuttingDown) {return;} + shuttingDown = true; + process.stderr.write(`[MCP] Shutting down (${reason})...\n`); + + if (session.isActive()) { + await Promise.race([ + session.delete(), + new Promise((resolve) => setTimeout(resolve, 10_000)), + ]); + } + + process.exit(0); + } + + process.on('SIGINT', () => { shutdown('SIGINT'); }); + process.on('SIGTERM', () => { shutdown('SIGTERM'); }); + process.stdin.on('end', () => { shutdown('stdin closed'); }); + + // Step 7: Connect transport (stdout is owned by MCP protocol — all logs go to stderr) + const transport = new StdioServerTransport(); + await server.connect(transport); + process.stderr.write('[MCP] desktop-driver-mcp server ready. Call create_session to launch an app.\n'); +} + +main(); diff --git a/lib/mcp/session.ts b/lib/mcp/session.ts new file mode 100644 index 0000000..493f7d4 --- /dev/null +++ b/lib/mcp/session.ts @@ -0,0 +1,75 @@ +import { remote } from 'webdriverio'; +import type { Browser } from 'webdriverio'; +import type { McpConfig } from './config.js'; + +/** Session parameters provided by the agent via the create_session tool. */ +export interface SessionParams { + app: string; + appArguments?: string; + appWorkingDir?: string; + waitForAppLaunch?: number; + shouldCloseApp?: boolean; + implicitTimeout?: number; + delayAfterClick?: number; + delayBeforeClick?: number; + smoothPointerMove?: string; +} + +export class AppiumSession { + private driver: Browser | null = null; + + constructor(private readonly appiumConfig: McpConfig) {} + + async create(params: SessionParams): Promise { + if (this.driver) { + throw new Error('A session is already active. Call delete_session first.'); + } + + process.stderr.write(`[MCP] Creating Appium session for app: ${params.app}\n`); + + const caps: Record = { + platformName: 'Windows', + 'appium:automationName': 'DesktopDriver', + 'appium:app': params.app, + }; + + if (params.appArguments !== undefined) {caps['appium:appArguments'] = params.appArguments;} + if (params.appWorkingDir !== undefined) {caps['appium:appWorkingDir'] = params.appWorkingDir;} + if (params.waitForAppLaunch !== undefined) {caps['appium:waitForAppLaunch'] = params.waitForAppLaunch;} + if (params.shouldCloseApp !== undefined) {caps['appium:shouldCloseApp'] = params.shouldCloseApp;} + if (params.delayAfterClick !== undefined) {caps['appium:delayAfterClick'] = params.delayAfterClick;} + if (params.delayBeforeClick !== undefined) {caps['appium:delayBeforeClick'] = params.delayBeforeClick;} + if (params.smoothPointerMove !== undefined) {caps['appium:smoothPointerMove'] = params.smoothPointerMove;} + + this.driver = await remote({ + hostname: this.appiumConfig.appiumHost, + port: this.appiumConfig.appiumPort, + path: '/', + capabilities: caps as WebdriverIO.Capabilities, + }); + + await this.driver.setTimeout({ implicit: params.implicitTimeout }); + process.stderr.write('[MCP] Session created successfully\n'); + } + + async delete(): Promise { + if (!this.driver) {return;} + try { + await this.driver.deleteSession(); + process.stderr.write('[MCP] Session deleted\n'); + } catch (err) { + process.stderr.write(`[MCP] Warning: session delete failed: ${err}\n`); + } finally { + this.driver = null; + } + } + + isActive(): boolean { + return this.driver !== null; + } + + getDriver(): Browser { + if (!this.driver) {throw new Error('No active session. Call create_session first.');} + return this.driver; + } +} diff --git a/lib/mcp/tools/advanced.ts b/lib/mcp/tools/advanced.ts new file mode 100644 index 0000000..2b33363 --- /dev/null +++ b/lib/mcp/tools/advanced.ts @@ -0,0 +1,138 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { AppiumSession } from '../session.js'; +import { formatError } from '../errors.js'; + +const modifierKeys = z.array(z.enum(['shift', 'ctrl', 'alt', 'win'])).default([]); + +export function registerAdvancedTools(server: McpServer, session: AppiumSession): void { + server.registerTool( + 'advanced_click', + { + description: 'Perform an advanced click with modifier keys, multiple clicks, or custom duration. Use this for right-click, Ctrl+click, double-click, etc.', + annotations: { destructiveHint: false }, + inputSchema: { + elementId: z.string().optional().describe('Element to click (its center). Provide either elementId or x+y.'), + x: z.number().int().optional().describe('Absolute screen x coordinate'), + y: z.number().int().optional().describe('Absolute screen y coordinate'), + button: z.enum(['left', 'right', 'middle', 'back', 'forward']).default('left'), + modifierKeys, + durationMs: z.number().int().min(0).default(0).describe('Hold duration in ms (for long-press)'), + times: z.number().int().min(1).default(1).describe('Number of clicks (2 = double-click)'), + interClickDelayMs: z.number().int().min(0).default(100), + }, + }, + async (args) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: click', [args]); + return { content: [{ type: 'text' as const, text: 'clicked' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'send_keys', + { + description: 'Send keyboard input. Each action can be a pause (ms delay), text to type, or a virtual key code press/release.', + annotations: { destructiveHint: false }, + inputSchema: { + actions: z.array(z.object({ + pause: z.number().int().optional().describe('Pause in milliseconds'), + text: z.string().optional().describe('Text to type (unicode supported)'), + virtualKeyCode: z.number().int().optional().describe('Windows Virtual Key code (e.g. 13 = Enter, 27 = Escape)'), + down: z.boolean().optional().describe('true = key down only, false = key up only, omit = press and release'), + })).describe('Sequence of keyboard actions to perform'), + forceUnicode: z.boolean().default(false).describe('Use Unicode input method for special characters'), + }, + }, + async (args) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: keys', [args]); + return { content: [{ type: 'text' as const, text: 'keys sent' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'hover', + { + description: 'Move the mouse pointer from one position to another, optionally with modifier keys held. Useful for hover effects and drag-without-click.', + inputSchema: { + startElementId: z.string().optional().describe('Element to start hover from (uses element center)'), + startX: z.number().int().optional(), + startY: z.number().int().optional(), + endElementId: z.string().optional().describe('Element to hover to'), + endX: z.number().int().optional(), + endY: z.number().int().optional(), + modifierKeys, + durationMs: z.number().int().min(0).default(500).describe('Duration of the hover movement in ms'), + }, + }, + async (args) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: hover', [args]); + return { content: [{ type: 'text' as const, text: 'hovered' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'scroll', + { + description: 'Scroll the mouse wheel at an element or screen coordinate.', + inputSchema: { + elementId: z.string().optional().describe('Element to scroll over (uses element center)'), + x: z.number().int().optional().describe('Absolute screen x coordinate'), + y: z.number().int().optional().describe('Absolute screen y coordinate'), + deltaX: z.number().int().default(0).describe('Horizontal scroll amount (positive = right)'), + deltaY: z.number().int().default(0).describe('Vertical scroll amount (positive = down)'), + modifierKeys, + }, + }, + async (args) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: scroll', [args]); + return { content: [{ type: 'text' as const, text: 'scrolled' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'click_and_drag', + { + description: 'Click and drag from one position to another. Useful for resizing, reordering, or moving elements.', + inputSchema: { + startElementId: z.string().optional().describe('Element to start drag from'), + startX: z.number().int().optional(), + startY: z.number().int().optional(), + endElementId: z.string().optional().describe('Element to drag to'), + endX: z.number().int().optional(), + endY: z.number().int().optional(), + modifierKeys, + durationMs: z.number().int().min(0).default(500), + button: z.enum(['left', 'right', 'middle']).default('left'), + }, + }, + async (args) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: clickAndDrag', [args]); + return { content: [{ type: 'text' as const, text: 'drag completed' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); +} diff --git a/lib/mcp/tools/app.ts b/lib/mcp/tools/app.ts new file mode 100644 index 0000000..4f889ae --- /dev/null +++ b/lib/mcp/tools/app.ts @@ -0,0 +1,78 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import type { AppiumSession } from '../session.js'; +import { formatError } from '../errors.js'; + +export function registerAppTools(server: McpServer, session: AppiumSession): void { + server.registerTool( + 'get_window_element', + { + description: 'Get the root UI element of the current app window. Returns an element ID that represents the top-level window.', + annotations: { readOnlyHint: true }, + }, + async () => { + try { + const driver = session.getDriver(); + const result = await driver.executeScript('windows: getWindowElement', [{}]); + const ref = result as Record; + const elementId = ref['element-6066-11e4-a52e-4f735466cecf'] ?? ref.ELEMENT; + if (!elementId) { + throw new Error(`windows: getWindowElement returned unexpected value: ${JSON.stringify(result)}`); + } + return { content: [{ type: 'text' as const, text: elementId }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'launch_app', + { + description: 'Launch the application configured for this session (re-launch if it was closed).', + annotations: { destructiveHint: false }, + }, + async () => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: launchApp', [{}]); + return { content: [{ type: 'text' as const, text: 'app launched' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'close_app', + { + description: 'Close the application under test without ending the session.', + annotations: { destructiveHint: true }, + }, + async () => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: closeApp', [{}]); + return { content: [{ type: 'text' as const, text: 'app closed' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_device_time', + { + description: 'Get the current date/time on the Windows device.', + annotations: { readOnlyHint: true }, + }, + async () => { + try { + const driver = session.getDriver(); + const result = await driver.executeScript('windows: getDeviceTime', [{}]); + return { content: [{ type: 'text' as const, text: String(result) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); +} diff --git a/lib/mcp/tools/clipboard.ts b/lib/mcp/tools/clipboard.ts new file mode 100644 index 0000000..8ff3c70 --- /dev/null +++ b/lib/mcp/tools/clipboard.ts @@ -0,0 +1,48 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { AppiumSession } from '../session.js'; +import { formatError } from '../errors.js'; + +const contentTypeSchema = z.enum(['plaintext', 'image']).default('plaintext'); + +export function registerClipboardTools(server: McpServer, session: AppiumSession): void { + server.registerTool( + 'get_clipboard', + { + description: 'Read the current clipboard contents as a base64-encoded string.', + inputSchema: { + contentType: contentTypeSchema.describe('"plaintext" for text, "image" for image content'), + }, + annotations: { readOnlyHint: true }, + }, + async ({ contentType }) => { + try { + const driver = session.getDriver(); + const result = await driver.executeScript('windows: getClipboard', [{ contentType }]); + return { content: [{ type: 'text' as const, text: String(result) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'set_clipboard', + { + description: 'Set the clipboard contents from a base64-encoded string.', + inputSchema: { + b64Content: z.string().min(1).describe('Base64-encoded content to set'), + contentType: contentTypeSchema.describe('"plaintext" for text, "image" for image content'), + }, + }, + async ({ b64Content, contentType }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: setClipboard', [{ b64Content, contentType }]); + return { content: [{ type: 'text' as const, text: 'clipboard set' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); +} diff --git a/lib/mcp/tools/find.ts b/lib/mcp/tools/find.ts new file mode 100644 index 0000000..afa7a14 --- /dev/null +++ b/lib/mcp/tools/find.ts @@ -0,0 +1,151 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { AppiumSession } from '../session.js'; +import { formatError } from '../errors.js'; +import { ELEMENT_KEY } from '../constants.js'; + +const STRATEGIES = ['accessibility id', 'name', 'id', 'xpath', 'class name', 'tag name', '-windows uiautomation'] as const; +type Strategy = typeof STRATEGIES[number]; + +// 'id' is documented as an alias for 'accessibility id' (UIA AutomationId). +// All other strategies are forwarded verbatim to the Appium/WinAppDriver protocol. +function resolveStrategy(strategy: Strategy): string { + return strategy === 'id' ? 'accessibility id' : strategy; +} + +const STRATEGY_DESCRIPTIONS: Record = { + 'accessibility id': 'Maps to UIA AutomationId — most stable selector, preferred for test automation. Use when the element has a non-empty AutomationId.', + 'name': 'Maps to UIA Name property (visible label/title). Reliable when text is static and not locale-dependent.', + 'id': 'Alias for accessibility id — maps to UIA AutomationId.', + 'xpath': 'Evaluates XPath against the live UIA tree. Use as fallback when no stable AutomationId or Name exists. Example: //Button[@Name="OK"]', + 'class name': 'Maps to UIA ClassName — rarely unique on its own; use to narrow results when combined with other strategies.', + 'tag name': 'Maps to UIA ControlType (e.g. "Button", "Edit", "TextBlock"). Rarely unique; useful for finding all elements of a type.', + '-windows uiautomation': 'Raw UIA condition expression for advanced compound queries.', +}; + +const StrategyEnum = z.enum(STRATEGIES); + +const FIND_STRATEGY_PRIORITY = [ + 'Preferred strategy order for reliable automation:', + '1) "accessibility id" (AutomationId) — most stable, use whenever available', + '2) "name" — good for static labels not subject to localization', + '3) "xpath" — flexible fallback, e.g. //Button[@Name="OK"]', + '4) other strategies — use only when the above are unavailable.', + 'After interacting with an element you plan to use in generated test code, call get_element_info to capture the best locator.', +].join(' '); + +export function registerFindTools(server: McpServer, session: AppiumSession): void { + server.registerTool( + 'find_element', + { + description: `Find a single UI element in the current app window. Returns an element ID string to pass to other tools. Returns an error if not found. ${FIND_STRATEGY_PRIORITY}`, + inputSchema: { + strategy: StrategyEnum.describe( + 'Locator strategy. ' + + Object.entries(STRATEGY_DESCRIPTIONS).map(([k, v]) => `"${k}": ${v}`).join(' | ') + ), + selector: z.string().min(1).describe('The selector value for the chosen strategy'), + }, + annotations: { readOnlyHint: true }, + }, + async ({ strategy, selector }) => { + try { + const driver = session.getDriver(); + const rawEl = await driver.findElement(resolveStrategy(strategy as Strategy), selector); + return { content: [{ type: 'text' as const, text: rawEl[ELEMENT_KEY] }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'find_elements', + { + description: `Find all UI elements matching the selector. Returns a JSON array of element ID strings. ${FIND_STRATEGY_PRIORITY}`, + inputSchema: { + strategy: StrategyEnum.describe( + 'Locator strategy. ' + + Object.entries(STRATEGY_DESCRIPTIONS).map(([k, v]) => `"${k}": ${v}`).join(' | ') + ), + selector: z.string().min(1).describe('The selector value for the chosen strategy'), + }, + annotations: { readOnlyHint: true }, + }, + async ({ strategy, selector }) => { + try { + const driver = session.getDriver(); + const rawEls = await driver.findElements(resolveStrategy(strategy as Strategy), selector); + const ids = rawEls.map((el) => el[ELEMENT_KEY]); + return { content: [{ type: 'text' as const, text: JSON.stringify(ids) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'find_child_element', + { + description: `Find a child element within a parent element. Returns an element ID string. ${FIND_STRATEGY_PRIORITY}`, + inputSchema: { + parentElementId: z.string().min(1).describe('Element ID of the parent to search within'), + strategy: StrategyEnum.describe( + 'Locator strategy. ' + + Object.entries(STRATEGY_DESCRIPTIONS).map(([k, v]) => `"${k}": ${v}`).join(' | ') + ), + selector: z.string().min(1).describe('The selector value for the chosen strategy'), + }, + annotations: { readOnlyHint: true }, + }, + async ({ parentElementId, strategy, selector }) => { + try { + const driver = session.getDriver(); + const rawEl = await driver.findElementFromElement( + parentElementId, + resolveStrategy(strategy as Strategy), + selector + ); + return { content: [{ type: 'text' as const, text: rawEl[ELEMENT_KEY] }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'wait_for_element', + { + description: `Wait for a UI element to appear within a configurable timeout, then return its element ID. Useful after dialog opens, page transitions, or loading spinners disappear. ${FIND_STRATEGY_PRIORITY}`, + inputSchema: { + strategy: StrategyEnum.describe( + 'Locator strategy. ' + + Object.entries(STRATEGY_DESCRIPTIONS).map(([k, v]) => `"${k}": ${v}`).join(' | ') + ), + selector: z.string().min(1).describe('The selector value for the chosen strategy'), + timeoutMs: z.number().int().min(0).default(5000).describe('Maximum time in milliseconds to wait for the element'), + pollIntervalMs: z.number().int().min(50).default(200).describe('How often to retry in milliseconds'), + }, + annotations: { readOnlyHint: true }, + }, + async ({ strategy, selector, timeoutMs, pollIntervalMs }) => { + const driver = session.getDriver(); + const effectiveStrategy = resolveStrategy(strategy as Strategy); + const deadline = Date.now() + timeoutMs; + while (true) { + try { + const rawEl = await driver.findElement(effectiveStrategy, selector); + return { content: [{ type: 'text' as const, text: rawEl[ELEMENT_KEY] }] }; + } catch { + if (Date.now() >= deadline) { + return { + isError: true, + content: [{ type: 'text' as const, text: `Element not found within ${timeoutMs}ms: ${strategy}="${selector}"` }], + }; + } + await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); + } + } + } + ); +} diff --git a/lib/mcp/tools/index.ts b/lib/mcp/tools/index.ts new file mode 100644 index 0000000..6edd990 --- /dev/null +++ b/lib/mcp/tools/index.ts @@ -0,0 +1,23 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import type { AppiumSession } from '../session.js'; +import { registerSessionTools } from './session.js'; +import { registerFindTools } from './find.js'; +import { registerInteractTools } from './interact.js'; +import { registerInspectTools } from './inspect.js'; +import { registerWindowTools } from './window.js'; +import { registerAdvancedTools } from './advanced.js'; +import { registerPatternTools } from './patterns.js'; +import { registerAppTools } from './app.js'; +import { registerClipboardTools } from './clipboard.js'; + +export function registerAllTools(server: McpServer, session: AppiumSession): void { + registerSessionTools(server, session); + registerFindTools(server, session); + registerInteractTools(server, session); + registerInspectTools(server, session); + registerWindowTools(server, session); + registerAdvancedTools(server, session); + registerPatternTools(server, session); + registerAppTools(server, session); + registerClipboardTools(server, session); +} diff --git a/lib/mcp/tools/inspect.ts b/lib/mcp/tools/inspect.ts new file mode 100644 index 0000000..da95405 --- /dev/null +++ b/lib/mcp/tools/inspect.ts @@ -0,0 +1,139 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { AppiumSession } from '../session.js'; +import { formatError } from '../errors.js'; +import { ELEMENT_KEY } from '../constants.js'; + +interface SuggestedSelector { + strategy: string; + selector: string; + reliability: 'high' | 'medium' | 'low'; + note: string; +} + +interface ElementInfo { + elementId: string; + name: string | null; + automationId: string | null; + className: string | null; + controlType: string | null; + isEnabled: string | null; + suggestedSelectors: SuggestedSelector[]; +} + +/** + * Build a ranked list of suggested selectors for test automation. + * Priority: accessibility id (AutomationId) > name > xpath combos. + */ +function buildSuggestedSelectors( + props: Pick +): SuggestedSelector[] { + const suggestions: SuggestedSelector[] = []; + + // Derive the XPath tag name from ControlType (strip "ControlType." prefix if present) + const tag = props.controlType?.replace(/^ControlType\./, '') ?? null; + + // 1. AutomationId via accessibility id — most reliable + if (props.automationId?.trim()) { + suggestions.push({ + strategy: 'accessibility id', + selector: props.automationId, + reliability: 'high', + note: 'AutomationId — stable across locales and UI layout changes. Preferred for .NET: driver.FindElement(MobileBy.AccessibilityId("' + props.automationId + '"))', + }); + } + + // 2. XPath with AutomationId — explicit type + stable id + if (tag && props.automationId?.trim()) { + suggestions.push({ + strategy: 'xpath', + selector: `//${tag}[@AutomationId="${props.automationId}"]`, + reliability: 'high', + note: 'XPath using AutomationId — use when you also want to assert the control type', + }); + } + + // 3. Name via name strategy — medium reliability (may change with locale) + if (props.name?.trim()) { + suggestions.push({ + strategy: 'name', + selector: props.name, + reliability: 'medium', + note: 'Element Name — may change with localization or dynamic text. .NET: driver.FindElement(MobileBy.Name("' + props.name + '"))', + }); + } + + // 4. XPath with Name + ControlType + if (tag && props.name?.trim()) { + suggestions.push({ + strategy: 'xpath', + selector: `//${tag}[@Name="${props.name}"]`, + reliability: 'medium', + note: 'XPath using Name — readable but locale-sensitive', + }); + } + + // 5. ClassName as fallback (rarely unique on its own) + if (props.className?.trim()) { + suggestions.push({ + strategy: 'class name', + selector: props.className, + reliability: 'low', + note: 'ClassName — often shared by many elements; combine with other strategies in XPath', + }); + } + + return suggestions; +} + +export function registerInspectTools(server: McpServer, session: AppiumSession): void { + server.registerTool( + 'get_element_info', + { + description: [ + 'Retrieve all key UIA properties of an element and get ranked selector suggestions for test automation.', + 'Returns: Name, AutomationId, ClassName, ControlType, IsEnabled, and a prioritized list of selectors.', + 'ALWAYS call this after find_element when generating automated test code — it gives you the best locator to use.', + 'Selector reliability order: accessibility id (AutomationId) = highest → name → xpath → class name = lowest.', + 'For .NET/C# Appium: use MobileBy.AccessibilityId(automationId) when AutomationId is non-empty.', + ].join(' '), + inputSchema: { + elementId: z.string().min(1).describe('Element ID returned by find_element'), + }, + annotations: { readOnlyHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + const el = await driver.$({ [ELEMENT_KEY]: elementId }); + + // Fetch all relevant UIA properties in parallel + const [name, automationId, className, controlType, isEnabled] = await Promise.all([ + el.getAttribute('Name').catch(() => null), + el.getAttribute('AutomationId').catch(() => null), + el.getAttribute('ClassName').catch(() => null), + el.getAttribute('ControlType').catch(() => null), + el.getAttribute('IsEnabled').catch(() => null), + ]); + + const props = { + name: name || null, + automationId: automationId || null, + className: className || null, + controlType: controlType || null, + isEnabled: isEnabled || null, + }; + + const result: ElementInfo = { + elementId, + ...props, + suggestedSelectors: buildSuggestedSelectors(props), + }; + + return { content: [{ type: 'text' as const, text: JSON.stringify(result, null, 2) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); +} diff --git a/lib/mcp/tools/interact.ts b/lib/mcp/tools/interact.ts new file mode 100644 index 0000000..9085207 --- /dev/null +++ b/lib/mcp/tools/interact.ts @@ -0,0 +1,148 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { AppiumSession } from '../session.js'; +import { formatError } from '../errors.js'; +import { ELEMENT_KEY } from '../constants.js'; + +const elementIdSchema = z.string().min(1).describe('Element ID returned by find_element'); + +export function registerInteractTools(server: McpServer, session: AppiumSession): void { + server.registerTool( + 'click_element', + { + description: 'Click a UI element by its element ID.', + inputSchema: { elementId: elementIdSchema }, + annotations: { destructiveHint: false }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + const el = await driver.$({ [ELEMENT_KEY]: elementId }); + await el.click(); + return { content: [{ type: 'text' as const, text: 'clicked' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'set_value', + { + description: 'Set the text value of an input element (clears first then types).', + inputSchema: { + elementId: elementIdSchema, + value: z.string().describe('The text value to set'), + }, + annotations: { destructiveHint: false }, + }, + async ({ elementId, value }) => { + try { + const driver = session.getDriver(); + const el = await driver.$({ [ELEMENT_KEY]: elementId }); + await el.setValue(value); + return { content: [{ type: 'text' as const, text: 'value set' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'clear_element', + { + description: 'Clear the text content of an input element.', + inputSchema: { elementId: elementIdSchema }, + annotations: { destructiveHint: false }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + const el = await driver.$({ [ELEMENT_KEY]: elementId }); + await el.clearValue(); + return { content: [{ type: 'text' as const, text: 'cleared' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_text', + { + description: 'Get the visible text content of a UI element.', + inputSchema: { elementId: elementIdSchema }, + annotations: { readOnlyHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + const el = await driver.$({ [ELEMENT_KEY]: elementId }); + const text = await el.getText(); + return { content: [{ type: 'text' as const, text }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_attribute', + { + description: 'Get an attribute or property of a UI element. Common attributes: Name, AutomationId, ClassName, IsEnabled, IsOffscreen, ControlType, Value.Value. Returns an empty string when the attribute is absent.', + inputSchema: { + elementId: elementIdSchema, + attribute: z.string().min(1).describe('Attribute name, e.g. "Name", "IsEnabled", "ControlType"'), + }, + annotations: { readOnlyHint: true }, + }, + async ({ elementId, attribute }) => { + try { + const driver = session.getDriver(); + const el = await driver.$({ [ELEMENT_KEY]: elementId }); + const value = await el.getAttribute(attribute); + return { content: [{ type: 'text' as const, text: value ?? '' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'is_element_displayed', + { + description: 'Check whether a UI element is visible on screen (not off-screen).', + inputSchema: { elementId: elementIdSchema }, + annotations: { readOnlyHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + const el = await driver.$({ [ELEMENT_KEY]: elementId }); + const displayed = await el.isDisplayed(); + return { content: [{ type: 'text' as const, text: String(displayed) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'is_element_enabled', + { + description: 'Check whether a UI element is enabled and interactable.', + inputSchema: { elementId: elementIdSchema }, + annotations: { readOnlyHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + const el = await driver.$({ [ELEMENT_KEY]: elementId }); + const enabled = await el.isEnabled(); + return { content: [{ type: 'text' as const, text: String(enabled) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); +} diff --git a/lib/mcp/tools/patterns.ts b/lib/mcp/tools/patterns.ts new file mode 100644 index 0000000..6f5984d --- /dev/null +++ b/lib/mcp/tools/patterns.ts @@ -0,0 +1,171 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { AppiumSession } from '../session.js'; +import { formatError } from '../errors.js'; + +const elementIdSchema = z.string().min(1).describe('Element ID returned by find_element'); +const elementIdInput = { elementId: elementIdSchema }; + +export function registerPatternTools(server: McpServer, session: AppiumSession): void { + server.registerTool( + 'invoke_element', + { + description: 'Invoke the default action of an element via the UIA Invoke pattern (e.g. click a button programmatically without mouse input).', + inputSchema: elementIdInput, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: invoke', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'invoked' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'expand_element', + { + description: 'Expand a collapsible element (tree node, combo box, menu) via the UIA ExpandCollapse pattern.', + inputSchema: elementIdInput, + annotations: { idempotentHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: expand', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'expanded' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'collapse_element', + { + description: 'Collapse an expanded element via the UIA ExpandCollapse pattern.', + inputSchema: elementIdInput, + annotations: { idempotentHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: collapse', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'collapsed' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'toggle_element', + { + description: 'Toggle a checkbox or toggle button via the UIA Toggle pattern. To confirm the resulting state, call get_toggle_state after this.', + inputSchema: elementIdInput, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: toggle', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'toggled' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_toggle_state', + { + description: 'Get the toggle state of a checkbox or toggle button via the UIA Toggle pattern. Returns "On", "Off", or "Indeterminate".', + inputSchema: elementIdInput, + annotations: { readOnlyHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + const result = await driver.executeScript('windows: getToggleState', [{ elementId }]); + return { content: [{ type: 'text' as const, text: String(result) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'set_element_value', + { + description: 'Set the value of an element via the UIA Value or RangeValue pattern (e.g. sliders, spin boxes).', + inputSchema: { + elementId: elementIdSchema, + value: z.string().describe('The value to set'), + }, + annotations: { destructiveHint: false }, + }, + async ({ elementId, value }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: setValue', [{ elementId, value }]); + return { content: [{ type: 'text' as const, text: 'value set' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_element_value', + { + description: 'Get the value of an element via the UIA Value pattern.', + inputSchema: elementIdInput, + annotations: { readOnlyHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + const result = await driver.executeScript('windows: getValue', [{ elementId }]); + return { content: [{ type: 'text' as const, text: String(result) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'focus_element', + { + description: 'Set focus to an element via the UIA Focus pattern (windows: setFocus). Required before keyboard-driven interactions such as send_keys on a specific control.', + inputSchema: elementIdInput, + annotations: { idempotentHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: setFocus', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'focused' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'select_item', + { + description: 'Select an item in a list box, tab control, or combo box via the UIA SelectionItem pattern (windows: select). Use when click does not trigger selection.', + inputSchema: elementIdInput, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: select', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'selected' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); +} diff --git a/lib/mcp/tools/session.ts b/lib/mcp/tools/session.ts new file mode 100644 index 0000000..63fabe1 --- /dev/null +++ b/lib/mcp/tools/session.ts @@ -0,0 +1,69 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { AppiumSession } from '../session.js'; +import { formatError } from '../errors.js'; + +export function registerSessionTools(server: McpServer, session: AppiumSession): void { + server.registerTool( + 'create_session', + { + description: + 'Start an Appium session by launching a Windows application. Must be called before any other tool. ' + + 'Provide either an executable path (e.g. "C:\\\\Windows\\\\notepad.exe") or a UWP App ID (e.g. "Microsoft.WindowsCalculator_8wekyb3d8bbwe!App").', + annotations: { destructiveHint: true }, + inputSchema: { + app: z.string().min(1).describe( + 'Executable path (e.g. "C:\\\\Windows\\\\notepad.exe") or UWP App ID ' + + '(e.g. "Microsoft.WindowsCalculator_8wekyb3d8bbwe!App") or "Root" to attach to the desktop root.' + ), + appArguments: z.string().optional().describe('Command-line arguments to pass to the app'), + appWorkingDir: z.string().optional().describe('Working directory for the app process'), + waitForAppLaunch: z.number().int().min(0).optional().describe('Milliseconds to wait after app launch before interacting'), + shouldCloseApp: z.boolean().optional().default(true).describe('Whether to close the app when delete_session is called'), + implicitTimeout: z.number().int().min(0).optional().default(1500).describe('Implicit element wait timeout in milliseconds'), + delayAfterClick: z.number().int().min(0).optional().describe('Milliseconds to wait after each click'), + delayBeforeClick: z.number().int().min(0).optional().describe('Milliseconds to wait before each click'), + smoothPointerMove: z.string().optional().describe('Easing function name for smooth pointer movement'), + }, + }, + async (params) => { + try { + await session.create(params); + return { content: [{ type: 'text' as const, text: `Session created. App "${params.app}" is ready for interaction.` }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'delete_session', + { + description: 'End the current Appium session. Closes the app (unless shouldCloseApp was set to false when creating the session). Call this when testing is complete.', + annotations: { destructiveHint: true }, + }, + async () => { + try { + if (!session.isActive()) { + return { content: [{ type: 'text' as const, text: 'No active session to delete.' }] }; + } + await session.delete(); + return { content: [{ type: 'text' as const, text: 'Session deleted successfully.' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_session_status', + { + description: 'Check whether a session is currently active.', + annotations: { readOnlyHint: true }, + }, + async () => { + const active = session.isActive(); + return { content: [{ type: 'text' as const, text: active ? 'Session is active.' : 'No active session. Call create_session to start one.' }] }; + } + ); +} diff --git a/lib/mcp/tools/window.ts b/lib/mcp/tools/window.ts new file mode 100644 index 0000000..538296d --- /dev/null +++ b/lib/mcp/tools/window.ts @@ -0,0 +1,187 @@ +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { z } from 'zod'; +import type { AppiumSession } from '../session.js'; +import { formatError } from '../errors.js'; + +const elementIdSchema = z.string().min(1).describe('Element ID returned by find_element or get_window_element'); +const elementIdInput = { elementId: elementIdSchema }; + +export function registerWindowTools(server: McpServer, session: AppiumSession): void { + server.registerTool( + 'take_screenshot', + { + description: 'Capture a screenshot of the current app window as a PNG image.', + annotations: { readOnlyHint: true }, + }, + async () => { + try { + const driver = session.getDriver(); + const base64 = await driver.takeScreenshot(); + return { content: [{ type: 'image' as const, data: base64, mimeType: 'image/png' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_page_source', + { + description: 'Get the XML representation of the current UI element tree. Useful for understanding the app structure before deciding what to interact with.', + annotations: { readOnlyHint: true }, + }, + async () => { + try { + const driver = session.getDriver(); + const source = await driver.getPageSource(); + return { content: [{ type: 'text' as const, text: source }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_window_rect', + { + description: 'Get the position and size of the current app window.', + annotations: { readOnlyHint: true }, + }, + async () => { + try { + const driver = session.getDriver(); + const rect = await driver.getWindowRect(); + return { content: [{ type: 'text' as const, text: JSON.stringify(rect) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_window_handles', + { + description: 'Get all available window handles for the current session.', + annotations: { readOnlyHint: true }, + }, + async () => { + try { + const driver = session.getDriver(); + const handles = await driver.getWindowHandles(); + return { content: [{ type: 'text' as const, text: JSON.stringify(handles) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'switch_to_window', + { + description: 'Switch focus to a different window by its handle.', + inputSchema: { + handle: z.string().min(1).describe('Window handle to switch to (from get_window_handles)'), + }, + }, + async ({ handle }) => { + try { + const driver = session.getDriver(); + await driver.switchToWindow(handle); + return { content: [{ type: 'text' as const, text: `Switched to window: ${handle}` }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + // Window-pattern tools (UIA Window pattern) — operate on a window element ID + + server.registerTool( + 'maximize_window', + { + description: 'Maximize a window element via the UIA Window pattern.', + inputSchema: elementIdInput, + annotations: { idempotentHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: maximize', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'maximized' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'minimize_window', + { + description: 'Minimize a window element via the UIA Window pattern.', + inputSchema: elementIdInput, + annotations: { idempotentHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: minimize', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'minimized' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'restore_window', + { + description: 'Restore a minimized or maximized window to its normal state via the UIA Window pattern.', + inputSchema: elementIdInput, + annotations: { idempotentHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: restore', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'restored' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'close_window', + { + description: 'Close a window element via the UIA Window pattern.', + inputSchema: elementIdInput, + annotations: { destructiveHint: true }, + }, + async ({ elementId }) => { + try { + const driver = session.getDriver(); + await driver.executeScript('windows: close', [{ elementId }]); + return { content: [{ type: 'text' as const, text: 'closed' }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); + + server.registerTool( + 'get_monitors', + { + description: 'List all connected monitors with their bounds, working area, device name, and whether each is the primary display.', + annotations: { readOnlyHint: true }, + }, + async () => { + try { + const driver = session.getDriver(); + const monitors = await driver.executeScript('windows: getMonitors', []); + return { content: [{ type: 'text' as const, text: JSON.stringify(monitors, null, 2) }] }; + } catch (err) { + return { isError: true, content: [{ type: 'text' as const, text: formatError(err) }] }; + } + } + ); +} diff --git a/lib/util.ts b/lib/util.ts index 02f8f7c..ea1e7be 100644 --- a/lib/util.ts +++ b/lib/util.ts @@ -26,6 +26,18 @@ export function assertSupportedEasingFunction(value: string) { } } +export function assertIntegerCap(capName: string, value: number, min: number): void { + if (!Number.isInteger(value) || value < min) { + throw new errors.InvalidArgumentError( + `Invalid capability '${capName}': must be an integer >= ${min} (got ${value}).` + ); + } +} + +export function isUwpAppId(appId: string): boolean { + return appId.includes('!') && appId.includes('_') && !(appId.includes('/') || appId.includes('\\')); +} + export function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, Math.max(ms, 0))); } @@ -63,4 +75,4 @@ export class DeferredStringTemplate { } return out.join(''); } -} \ No newline at end of file +} diff --git a/lib/winapi/user32.ts b/lib/winapi/user32.ts index 5a7f046..5d03d0b 100644 --- a/lib/winapi/user32.ts +++ b/lib/winapi/user32.ts @@ -429,11 +429,9 @@ function makeMouseMoveEvents(args: { wheel: boolean, /** Set to true if the event is a mouse move with relative coordinates. This argument is ignored for mouse wheel move. */ relative?: boolean, - /** Set to screen resolution [width, height] when the mouse move is absolute. */ - screenResolutionAndRefreshRate?: ReturnType; } ): MouseEvent[] { - const { x, y, wheel, relative, screenResolutionAndRefreshRate} = args; + const { x, y, wheel, relative } = args; if (wheel) { const mouseEvents: MouseEvent[] = []; @@ -457,18 +455,18 @@ function makeMouseMoveEvents(args: { const mouseEvent: MouseEvent = makeEmptyMouseEvent(); - if (!screenResolutionAndRefreshRate) { - throw new errors.InvalidArgumentError('screenResolution parameter must be set for absolute mouse move.'); - } - - const [screenWidth, screenHeight] = screenResolutionAndRefreshRate; - - mouseEvent.u.mi.dx = relative ? Math.trunc(x) : Math.trunc((x * UINT16_MAX) / screenWidth); - mouseEvent.u.mi.dy = relative ? Math.trunc(y) : Math.trunc((y * UINT16_MAX) / screenHeight); - mouseEvent.u.mi.dwFlags = MouseEventFlags.MOUSEEVENTF_MOVE; - - if (!relative) { - mouseEvent.u.mi.dwFlags |= MouseEventFlags.MOUSEEVENTF_ABSOLUTE; + if (relative) { + mouseEvent.u.mi.dx = Math.trunc(x); + mouseEvent.u.mi.dy = Math.trunc(y); + mouseEvent.u.mi.dwFlags = MouseEventFlags.MOUSEEVENTF_MOVE; + } else { + const virt = getVirtualScreenBounds(); + mouseEvent.u.mi.dx = Math.trunc(((x - virt.left) * UINT16_MAX) / virt.width); + mouseEvent.u.mi.dy = Math.trunc(((y - virt.top) * UINT16_MAX) / virt.height); + mouseEvent.u.mi.dwFlags = + MouseEventFlags.MOUSEEVENTF_MOVE | + MouseEventFlags.MOUSEEVENTF_ABSOLUTE | + MouseEventFlags.MOUSEEVENTF_VIRTUALDESK; } return [mouseEvent]; @@ -651,8 +649,7 @@ function sendMouseButtonInput(button: number, down: boolean) { async function sendMouseMoveInput(args: { x: number, y: number, relative: boolean, duration: number, easingFunction?: string }): Promise { const { duration } = args; let { x, y, easingFunction, relative } = args; - const screenResolutionAndRefreshRate = getScreenResolutionAndRefreshRate(); - const [, , refreshRate] = screenResolutionAndRefreshRate; + const refreshRate = getRefreshRate(); const updateInterval = 1000 / refreshRate; const iterations = Math.max(Math.floor(duration / updateInterval), 1); @@ -695,14 +692,14 @@ async function sendMouseMoveInput(args: { x: number, y: number, relative: boolea const interpolatedX = cursorPosition.x + (x - cursorPosition.x) * easedProgress; const interpolatedY = cursorPosition.y + (y - cursorPosition.y) * easedProgress; - const events = makeMouseMoveEvents({ x: interpolatedX, y: interpolatedY, wheel: false, screenResolutionAndRefreshRate }); + const events = makeMouseMoveEvents({ x: interpolatedX, y: interpolatedY, wheel: false }); const returnCode = SendInput(events.length, events, sizeof(INPUT)); assertSuccessSendInputReturnCode(returnCode); }, i * updateInterval); } } else { - const events = makeMouseMoveEvents({ x, y, wheel: false, screenResolutionAndRefreshRate }); + const events = makeMouseMoveEvents({ x, y, wheel: false }); const returnCode = SendInput(events.length, events, sizeof(INPUT)); assertSuccessSendInputReturnCode(returnCode); @@ -736,26 +733,40 @@ function getResolutionScalingFactor(): number { return scalingFactor; } -function getScreenResolutionAndRefreshRate(): [number, number, number] { - const width = GetSystemMetrics(SystemMetric.SM_CXSCREEN); - const height = GetSystemMetrics(SystemMetric.SM_CYSCREEN); - let refreshRate: number | null = null; - +function getRefreshRate(): number { const buffer = Buffer.alloc(sizeof(DEVMODEA)); EnumDisplaySettingsA(null, -1, buffer); - const deviceMode = { dmDisplayFrequency: buffer.readUInt32LE(120) } as DeviceModeAnsi; - refreshRate = deviceMode.dmDisplayFrequency; + const refreshRate = (buffer.readUInt32LE(120) as DeviceModeAnsi['dmDisplayFrequency']); - const resolution = [width, height, refreshRate] satisfies ReturnType; + const nonMemoizedMethod = getRefreshRate; + const currentTime = new Date().getTime(); - const nonMemoizedMethod = getScreenResolutionAndRefreshRate; + // @ts-expect-error memoizing the function to prevent repeated calls that might crash Node.js + getRefreshRate = () => { + if (new Date().getTime() - currentTime > 1000) { + // @ts-expect-error reset memoization after 1 second + getRefreshRate = nonMemoizedMethod; + } + return refreshRate; + }; + + return refreshRate; +} + +function getScreenResolution(): [number, number] { + const width = GetSystemMetrics(SystemMetric.SM_CXSCREEN); + const height = GetSystemMetrics(SystemMetric.SM_CYSCREEN); + + const resolution = [width, height] satisfies ReturnType; + + const nonMemoizedMethod = getScreenResolution; const currentTime = new Date().getTime(); // @ts-expect-error memoizing the function to prevent repeated calls that might crash Node.js - getScreenResolutionAndRefreshRate = () => { + getScreenResolution = () => { if (new Date().getTime() - currentTime > 1000) { // @ts-expect-error reset memoization after 1 second - getScreenResolutionAndRefreshRate = nonMemoizedMethod; + getScreenResolution = nonMemoizedMethod; } return resolution; }; @@ -763,6 +774,15 @@ function getScreenResolutionAndRefreshRate(): [number, number, number] { return resolution; } +export function getVirtualScreenBounds(): { left: number; top: number; width: number; height: number } { + return { + left: GetSystemMetrics(SystemMetric.SM_XVIRTUALSCREEN), + top: GetSystemMetrics(SystemMetric.SM_YVIRTUALSCREEN), + width: GetSystemMetrics(SystemMetric.SM_CXVIRTUALSCREEN), + height: GetSystemMetrics(SystemMetric.SM_CYVIRTUALSCREEN), + }; +} + export function keyDown(char: string, forceUnicode: boolean = false): void { sendKeyInput(char, true, forceUnicode); } @@ -792,7 +812,7 @@ export function mouseUp(button: number = 0): void { } export function getDisplayOrientation(): Orientation { - const resolution = getScreenResolutionAndRefreshRate(); + const resolution = getScreenResolution(); return resolution[0] > resolution[1] ? 'LANDSCAPE' : 'PORTRAIT'; } diff --git a/package.json b/package.json index 619b160..6b96d06 100644 --- a/package.json +++ b/package.json @@ -1,45 +1,53 @@ { - "name": "appium-novawindows-driver", - "version": "1.3.1", + "name": "appium-desktop-driver", + "version": "1.4.1", "description": "Appium driver for Windows", "keywords": [ "appium", - "novawindows", + "desktop", "uiautomation", "powershell", "automated testing", - "windows" + "windows", + "verisoft" ], "main": "build/lib/driver.js", + "bin": { + "desktop-driver-mcp": "build/lib/mcp/index.js" + }, "scripts": { "build": "tsc -b", "watch": "tsc -b --watch", "lint": "eslint .", "test": "npx vitest run", - "test:e2e": "npx vitest run --config vitest.e2e.config.ts" + "test:e2e": "npx vitest run --config vitest.e2e.config.ts", + "mcp:start": "node build/lib/mcp/index.js" }, - "author": "Automate The Planet", + "author": "VeriSoft", "license": "Apache-2.0", "repository": { "type": "git", - "url": "https://github.com/AutomateThePlanet/appium-novawindows-driver.git" + "url": "https://github.com/verisoft-ai/appium-desktop-driver.git" }, "bugs": { - "url": "https://github.com/AutomateThePlanet/appium-novawindows-driver/issues" + "url": "https://github.com/verisoft-ai/appium-desktop-driver/issues" }, "peerDependencies": { "appium": "^3.0.0-rc.2" }, "dependencies": { "@appium/base-driver": "^10.1.0", + "@modelcontextprotocol/sdk": "^1.27.1", "bezier-easing": "^2.1.0", "ffmpeg-static": "^5.2.0", "koffi": "^2.14.1", - "xpath-analyzer": "^3.0.1" + "webdriverio": "^9.0.0", + "xpath-analyzer": "^3.0.1", + "zod": "^4.3.6" }, "appium": { - "driverName": "novawindows", - "automationName": "NovaWindows", + "driverName": "desktopdriver", + "automationName": "DesktopDriver", "platformNames": [ "Windows" ], @@ -58,7 +66,6 @@ "semantic-release": "^25.0.1", "typescript": "^5.9.3", "typescript-eslint": "^8.46.1", - "vitest": "^2.1.0", - "webdriverio": "^9.0.0" + "vitest": "^2.1.0" } } diff --git a/test/commands/app/app.test.ts b/test/commands/app/app.test.ts index 4e44a49..06225c4 100644 --- a/test/commands/app/app.test.ts +++ b/test/commands/app/app.test.ts @@ -125,9 +125,23 @@ describe('setWindow', () => { it('throws NoSuchWindowError when window is not found after retries', async () => { const driver = createMockDriver() as any; + driver.caps['ms:windowSwitchRetries'] = 1; + driver.caps['ms:windowSwitchInterval'] = 0; // All calls return empty (window not found) driver.sendPowerShellCommand.mockResolvedValue(''); await expect(setWindow.call(driver, 'NonExistentWindow')).rejects.toThrow('No window was found'); - }, 10000); + }); + + it('respects ms:windowSwitchRetries cap', async () => { + const driver2 = createMockDriver() as any; + driver2.caps['ms:windowSwitchRetries'] = 2; + driver2.caps['ms:windowSwitchInterval'] = 0; + driver2.sendPowerShellCommand.mockResolvedValue(''); + + // Use a numeric handle string so both the handle search and name search run per iteration (2 PS calls each) + await expect(setWindow.call(driver2, '99999')).rejects.toThrow('No window was found'); + // 2 retries × 2 PS commands (handle search + name search) = 4 calls + expect(driver2.sendPowerShellCommand.mock.calls.length).toBe(4); + }); }); diff --git a/test/commands/device.test.ts b/test/commands/device.test.ts index 0ec804e..cfccbe5 100644 --- a/test/commands/device.test.ts +++ b/test/commands/device.test.ts @@ -2,7 +2,17 @@ * Unit tests for lib/commands/device.ts */ import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { getDeviceTime } from '../../lib/commands/device'; +import { + getDeviceTime, + pushFile, + pullFile, + pullFolder, + hideKeyboard, + isKeyboardShown, + activateApp, + terminateApp, + isAppInstalled, +} from '../../lib/commands/device'; import { createMockDriver } from '../fixtures/driver'; /** Decode base64 Invoke-Expression wrappers to reveal the underlying PS command. */ @@ -42,3 +52,249 @@ describe('getDeviceTime', () => { expect(cmd).toContain('ToString'); }); }); + +describe('pushFile', () => { + beforeEach(() => vi.clearAllMocks()); + + it('asserts MODIFY_FS_FEATURE and calls PS with path and data', async () => { + const driver = createMockDriver() as any; + await pushFile.call(driver, 'C:\\temp\\test.txt', 'aGVsbG8='); + expect(driver.assertFeatureEnabled).toHaveBeenCalledTimes(1); + expect(driver.sendPowerShellCommand).toHaveBeenCalledTimes(1); + const cmd = decodeCommand(driver.sendPowerShellCommand.mock.calls[0][0]); + expect(cmd).toContain('WriteAllBytes'); + expect(cmd).toContain('FromBase64String'); + expect(cmd).toContain('CreateDirectory'); + }); + + it('throws InvalidArgumentError when path is empty', async () => { + const driver = createMockDriver() as any; + await expect(pushFile.call(driver, '', 'aGVsbG8=')).rejects.toThrow("'path' must be provided."); + }); + + it('throws InvalidArgumentError when data is empty', async () => { + const driver = createMockDriver() as any; + await expect(pushFile.call(driver, 'C:\\temp\\test.txt', '')).rejects.toThrow("'data' must be provided."); + }); +}); + +describe('pullFile', () => { + beforeEach(() => vi.clearAllMocks()); + + it('asserts MODIFY_FS_FEATURE and returns base64 string', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand.mockResolvedValue('aGVsbG8='); + const result = await pullFile.call(driver, 'C:\\temp\\test.txt'); + expect(driver.assertFeatureEnabled).toHaveBeenCalledTimes(1); + expect(result).toBe('aGVsbG8='); + const cmd = decodeCommand(driver.sendPowerShellCommand.mock.calls[0][0]); + expect(cmd).toContain('ReadAllBytes'); + expect(cmd).toContain('ToBase64String'); + }); + + it('throws InvalidArgumentError when path is empty', async () => { + const driver = createMockDriver() as any; + await expect(pullFile.call(driver, '')).rejects.toThrow("'path' must be provided."); + }); +}); + +describe('pullFolder', () => { + beforeEach(() => vi.clearAllMocks()); + + it('asserts MODIFY_FS_FEATURE and returns base64 zip', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand.mockResolvedValue('UEsDBA=='); + const result = await pullFolder.call(driver, 'C:\\temp\\mydir'); + expect(driver.assertFeatureEnabled).toHaveBeenCalledTimes(1); + expect(result).toBe('UEsDBA=='); + const cmd = decodeCommand(driver.sendPowerShellCommand.mock.calls[0][0]); + expect(cmd).toContain('Compress-Archive'); + expect(cmd).toContain('-LiteralPath'); + expect(cmd).toContain('ToBase64String'); + expect(cmd).toContain('Remove-Item'); + }); + + it('throws InvalidArgumentError when path is empty', async () => { + const driver = createMockDriver() as any; + await expect(pullFolder.call(driver, '')).rejects.toThrow("'path' must be provided."); + }); +}); + +describe('hideKeyboard', () => { + beforeEach(() => vi.clearAllMocks()); + + it('sends PS command without throwing', async () => { + const driver = createMockDriver() as any; + await hideKeyboard.call(driver); + expect(driver.sendPowerShellCommand).toHaveBeenCalledTimes(1); + const cmd = decodeCommand(driver.sendPowerShellCommand.mock.calls[0][0]); + expect(cmd).toContain('TabTip'); + expect(cmd).toContain('TextInputHost'); + }); + + it('accepts optional strategy/key/keyCode/keyName without error', async () => { + const driver = createMockDriver() as any; + await expect(hideKeyboard.call(driver, 'pressKey', 'Done', undefined, undefined)).resolves.not.toThrow(); + }); +}); + +describe('isKeyboardShown', () => { + beforeEach(() => vi.clearAllMocks()); + + it('returns true when PS outputs "true"', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand.mockResolvedValue('true\n'); + const result = await isKeyboardShown.call(driver); + expect(result).toBe(true); + }); + + it('returns false when PS outputs "false"', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand.mockResolvedValue('false\n'); + const result = await isKeyboardShown.call(driver); + expect(result).toBe(false); + }); + + it('sends a command that checks TabTip and TextInputHost', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand.mockResolvedValue('false'); + await isKeyboardShown.call(driver); + const cmd = decodeCommand(driver.sendPowerShellCommand.mock.calls[0][0]); + expect(cmd).toContain('TabTip'); + expect(cmd).toContain('IsOffscreenProperty'); + }); +}); + +describe('activateApp', () => { + beforeEach(() => vi.clearAllMocks()); + + it('throws InvalidArgumentError when appId is empty', async () => { + const driver = createMockDriver() as any; + await expect(activateApp.call(driver, '')).rejects.toThrow("'appId' or 'bundleId' must be provided."); + }); + + it('calls changeRootElement directly for UWP app IDs', async () => { + const driver = createMockDriver() as any; + driver.changeRootElement = vi.fn().mockResolvedValue(undefined); + await activateApp.call(driver, 'Microsoft.WindowsCalculator_8wekyb3d8bbwe!App'); + expect(driver.changeRootElement).toHaveBeenCalledWith('Microsoft.WindowsCalculator_8wekyb3d8bbwe!App'); + expect(driver.sendPowerShellCommand).not.toHaveBeenCalled(); + }); + + it('attaches to existing classic process window when already running', async () => { + const driver = createMockDriver() as any; + driver.changeRootElement = vi.fn().mockResolvedValue(undefined); + driver.attachToApplicationWindow = vi.fn().mockResolvedValue(undefined); + driver.sendPowerShellCommand + .mockResolvedValueOnce('1234') // Get-Process PID + .mockResolvedValueOnce('5678'); // MainWindowHandle + await activateApp.call(driver, 'C:\\Windows\\System32\\notepad.exe'); + expect(driver.changeRootElement).toHaveBeenCalledWith(5678); + expect(driver.sendPowerShellCommand).toHaveBeenCalledTimes(2); + }); + + it('launches via changeRootElement when process is not running', async () => { + const driver = createMockDriver() as any; + driver.changeRootElement = vi.fn().mockResolvedValue(undefined); + driver.sendPowerShellCommand.mockResolvedValue(''); // no existing PID + await activateApp.call(driver, 'C:\\Windows\\System32\\notepad.exe'); + expect(driver.changeRootElement).toHaveBeenCalledWith('C:\\Windows\\System32\\notepad.exe'); + }); +}); + +describe('terminateApp', () => { + beforeEach(() => vi.clearAllMocks()); + + it('throws InvalidArgumentError when appId is empty', async () => { + const driver = createMockDriver() as any; + await expect(terminateApp.call(driver, '')).rejects.toThrow("'appId' or 'bundleId' must be provided."); + }); + + it('returns false when classic process is not running', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand + .mockResolvedValueOnce('none') // check query — not running + .mockResolvedValueOnce(''); // $rootElement = $null + const result = await terminateApp.call(driver, 'C:\\Windows\\System32\\notepad.exe'); + expect(result).toBe(false); + }); + + it('returns true when classic process was terminated', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand + .mockResolvedValueOnce('1234') // check query — PID list + .mockResolvedValueOnce('') // Stop-Process (void) + .mockResolvedValueOnce('false') // poll — process is gone + .mockResolvedValueOnce(''); // $rootElement = $null + const result = await terminateApp.call(driver, 'C:\\Windows\\System32\\notepad.exe'); + expect(result).toBe(true); + }); + + it('uses UWP path and searches by PackageFamilyName for UWP apps', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand + .mockResolvedValueOnce('1234') // check query — PID list + .mockResolvedValueOnce('') // Stop-Process (void) + .mockResolvedValueOnce('false') // poll — process is gone + .mockResolvedValueOnce(''); // $rootElement = $null + const result = await terminateApp.call(driver, 'Microsoft.WindowsCalculator_8wekyb3d8bbwe!App'); + expect(result).toBe(true); + const cmd = decodeCommand(driver.sendPowerShellCommand.mock.calls[0][0]); + expect(cmd).toContain('Get-AppxPackage'); + expect(cmd).toContain('PackageFamilyName'); + }); + + it('resets rootElement regardless of result', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand + .mockResolvedValueOnce('none') // check query — not running + .mockResolvedValueOnce(''); // $rootElement = $null + await terminateApp.call(driver, 'notepad.exe'); + const lastCall = driver.sendPowerShellCommand.mock.calls[driver.sendPowerShellCommand.mock.calls.length - 1][0]; + expect(lastCall).toContain('$rootElement = $null'); + }); +}); + +describe('isAppInstalled', () => { + beforeEach(() => vi.clearAllMocks()); + + it('throws InvalidArgumentError when appId is empty', async () => { + const driver = createMockDriver() as any; + await expect(isAppInstalled.call(driver, '')).rejects.toThrow("'appId' or 'bundleId' must be provided."); + }); + + it('returns true for UWP app when package is found', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand.mockResolvedValue('true\n'); + const result = await isAppInstalled.call(driver, 'Microsoft.WindowsCalculator_8wekyb3d8bbwe!App'); + expect(result).toBe(true); + const cmd = decodeCommand(driver.sendPowerShellCommand.mock.calls[0][0]); + expect(cmd).toContain('Get-AppxPackage'); + }); + + it('returns false for UWP app when package is not found', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand.mockResolvedValue('false\n'); + const result = await isAppInstalled.call(driver, 'NonExistent_app!App'); + expect(result).toBe(false); + }); + + it('uses Test-Path for full file paths', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand.mockResolvedValue('true\n'); + const result = await isAppInstalled.call(driver, 'C:\\Windows\\System32\\notepad.exe'); + expect(result).toBe(true); + const cmd = decodeCommand(driver.sendPowerShellCommand.mock.calls[0][0]); + expect(cmd).toContain('Test-Path'); + expect(cmd).toContain('-LiteralPath'); + }); + + it('uses Get-Command for bare process names', async () => { + const driver = createMockDriver() as any; + driver.sendPowerShellCommand.mockResolvedValue('true\n'); + const result = await isAppInstalled.call(driver, 'calc.exe'); + expect(result).toBe(true); + const cmd = decodeCommand(driver.sendPowerShellCommand.mock.calls[0][0]); + expect(cmd).toContain('Get-Command'); + }); +}); diff --git a/test/e2e/device-app-management.e2e.ts b/test/e2e/device-app-management.e2e.ts new file mode 100644 index 0000000..8c3aa82 --- /dev/null +++ b/test/e2e/device-app-management.e2e.ts @@ -0,0 +1,178 @@ +import { describe, it, beforeAll, afterAll, afterEach, expect } from 'vitest'; +import type { Browser } from 'webdriverio'; +import { + createCalculatorSession, + createNotepadSession, + createRootSession, + quitSession, + closeAllTestApps, + CALCULATOR_APP_ID, + NOTEPAD_APP_PATH, +} from './helpers/session.js'; + +// ─── isAppInstalled ─────────────────────────────────────────────────────────── +// Uses a shared Root session — these checks are read-only and don't affect app state. + +describe('isAppInstalled', () => { + let driver: Browser; + + beforeAll(async () => { + driver = await createRootSession(); + }); + + afterAll(async () => { + await quitSession(driver); + }); + + it('returns true for Notepad by full path', async () => { + expect(await driver.isAppInstalled(NOTEPAD_APP_PATH)).toBe(true); + }); + + it('returns true for Calculator UWP app by bundle ID', async () => { + expect(await driver.isAppInstalled(CALCULATOR_APP_ID)).toBe(true); + }); + + it('returns false for a non-existent full path', async () => { + expect(await driver.isAppInstalled('C:\\nonexistent\\definitely-fake.exe')).toBe(false); + }); + + it('returns true for bare executable name "notepad.exe" (Get-Command lookup)', async () => { + expect(await driver.isAppInstalled('notepad.exe')).toBe(true); + }); + + it('returns false for a non-existent bare name', async () => { + expect(await driver.isAppInstalled('definitelyfakeapp12345.exe')).toBe(false); + }); +}); + +// ─── terminateApp ──────────────────────────────────────────────────────────── +// Each test manages its own session — terminateApp mutates running process state. + +describe('terminateApp', () => { + afterEach(() => closeAllTestApps()); + + it('returns false when Notepad is not already running', async () => { + closeAllTestApps(); // ensure it is not running before test + const driver = await createRootSession(); + try { + const result = await driver.terminateApp(NOTEPAD_APP_PATH); + expect(result).toBe(false); + } finally { + await quitSession(driver); + } + }); + + it('returns true and kills Notepad when it is running', async () => { + const notepadDriver = await createNotepadSession(); + const rootDriver = await createRootSession(); + try { + const result = await rootDriver.terminateApp(NOTEPAD_APP_PATH); + expect(result).toBe(true); + } finally { + await quitSession(rootDriver); + await quitSession(notepadDriver); + } + }); + + it('returns false on a second call after app has already been terminated', async () => { + const notepadDriver = await createNotepadSession(); + const rootDriver = await createRootSession(); + try { + await rootDriver.terminateApp(NOTEPAD_APP_PATH); // first kill + const result = await rootDriver.terminateApp(NOTEPAD_APP_PATH); // already gone + expect(result).toBe(false); + } finally { + await quitSession(rootDriver); + await quitSession(notepadDriver); + } + }); + + it('returns true and kills Calculator UWP when it is running', async () => { + const calcDriver = await createCalculatorSession(); + const rootDriver = await createRootSession(); + try { + const result = await rootDriver.terminateApp(CALCULATOR_APP_ID); + expect(result).toBe(true); + } finally { + await quitSession(rootDriver); + await quitSession(calcDriver); + } + }); +}); + +// ─── activateApp ───────────────────────────────────────────────────────────── +// Each test manages its own session — activateApp mutates $rootElement. + +describe('activateApp', () => { + afterEach(() => closeAllTestApps()); + + it('launches Notepad when it is not already running and sets it as root', async () => { + closeAllTestApps(); + const driver = await createRootSession(); + try { + await driver.activateApp(NOTEPAD_APP_PATH); + const titleText = await driver.getTitle(); + expect(titleText).toMatch(/notepad/i); + } finally { + await quitSession(driver); + } + }); + + it('focuses an already-running Notepad instance without launching a second one', async () => { + const notepadDriver = await createNotepadSession(); + const rootDriver = await createRootSession(); + try { + await rootDriver.activateApp(NOTEPAD_APP_PATH); + const titleText = await rootDriver.getTitle(); + expect(titleText).toMatch(/notepad/i); + } finally { + await quitSession(rootDriver); + await quitSession(notepadDriver); + } + }); + + it('launches and attaches to Calculator UWP app', async () => { + closeAllTestApps(); + const driver = await createRootSession(); + try { + await driver.activateApp(CALCULATOR_APP_ID); + const titleText = await driver.getTitle(); + expect(titleText).toMatch(/calc/i); + } finally { + await quitSession(driver); + } + }); +}); + +// ─── hideKeyboard / isKeyboardShown ────────────────────────────────────────── +// The Windows touch keyboard (TabTip / TextInputHost) is typically not visible on +// a non-touch development machine, so we verify command semantics rather than +// asserting a specific visibility state. + +describe('hideKeyboard / isKeyboardShown', () => { + let driver: Browser; + + beforeAll(async () => { + driver = await createCalculatorSession(); + }); + + afterAll(async () => { + await quitSession(driver); + }); + + it('isKeyboardShown returns a boolean', async () => { + const result = await driver.isKeyboardShown(); + expect(typeof result).toBe('boolean'); + }); + + it('hideKeyboard does not throw when keyboard is not visible', async () => { + await expect(driver.hideKeyboard()).resolves.not.toThrow(); + }); + + it('isKeyboardShown returns false on a standard desktop (no touch keyboard)', async () => { + // On a headless / non-touch machine TabTip and TextInputHost are not running. + // If this fails on a touch device, remove the assertion and keep only the type check above. + const result = await driver.isKeyboardShown(); + expect(result).toBe(false); + }); +}); diff --git a/test/e2e/device-file-transfer.e2e.ts b/test/e2e/device-file-transfer.e2e.ts new file mode 100644 index 0000000..dd4de02 --- /dev/null +++ b/test/e2e/device-file-transfer.e2e.ts @@ -0,0 +1,160 @@ +import { describe, it, beforeAll, afterAll, afterEach, expect } from 'vitest'; +import { existsSync, mkdirSync, writeFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { Browser } from 'webdriverio'; +import { createCalculatorSession, quitSession } from './helpers/session.js'; + +// pushFile / pullFile / pullFolder use assertFeatureEnabled(MODIFY_FS_FEATURE). +// The Appium server must be started with --allow-insecure modify_fs. + +describe('pushFile / pullFile / pullFolder', () => { + let driver: Browser; + + beforeAll(async () => { + driver = await createCalculatorSession(); + }); + + afterAll(async () => { + await quitSession(driver); + }); + + // ─── pushFile ──────────────────────────────────────────────────────────── + + describe('pushFile', () => { + let cleanup: string = ''; + + afterEach(() => { + if (cleanup && existsSync(cleanup)) { + try { rmSync(cleanup, { recursive: true, force: true }); } catch { /* noop */ } + } + cleanup = ''; + }); + + it('writes a base64-encoded text file to disk', async () => { + cleanup = join(tmpdir(), `nova-push-${Date.now()}.txt`); + await driver.pushFile(cleanup, Buffer.from('Hello, NovaWindows!').toString('base64')); + expect(existsSync(cleanup)).toBe(true); + }); + + it('round-trips text content via pushFile → pullFile', async () => { + cleanup = join(tmpdir(), `nova-roundtrip-${Date.now()}.txt`); + const content = 'round-trip test content'; + await driver.pushFile(cleanup, Buffer.from(content, 'utf8').toString('base64')); + const pulled = await driver.pullFile(cleanup); + expect(Buffer.from(pulled, 'base64').toString('utf8')).toBe(content); + }); + + it('round-trips binary data without corruption', async () => { + cleanup = join(tmpdir(), `nova-binary-${Date.now()}.bin`); + const bytes = Buffer.from([0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD]); + await driver.pushFile(cleanup, bytes.toString('base64')); + const pulled = await driver.pullFile(cleanup); + expect(Buffer.from(pulled, 'base64')).toEqual(bytes); + }); + + it('creates missing parent directories automatically', async () => { + const parentDir = join(tmpdir(), `nova-nested-${Date.now()}`); + const filePath = join(parentDir, 'sub', 'dir', 'file.txt'); + cleanup = parentDir; + await driver.pushFile(filePath, Buffer.from('nested').toString('base64')); + expect(existsSync(filePath)).toBe(true); + }); + + it('overwrites an existing file', async () => { + cleanup = join(tmpdir(), `nova-overwrite-${Date.now()}.txt`); + writeFileSync(cleanup, 'old content', 'utf8'); + await driver.pushFile(cleanup, Buffer.from('new content').toString('base64')); + const pulled = await driver.pullFile(cleanup); + expect(Buffer.from(pulled, 'base64').toString('utf8')).toBe('new content'); + }); + }); + + // ─── pullFile ──────────────────────────────────────────────────────────── + + describe('pullFile', () => { + let cleanup: string = ''; + + afterEach(() => { + if (cleanup && existsSync(cleanup)) { + try { rmSync(cleanup); } catch { /* noop */ } + } + cleanup = ''; + }); + + it('returns the base64-encoded content of an existing file', async () => { + cleanup = join(tmpdir(), `nova-pull-${Date.now()}.txt`); + const content = 'pull this!'; + writeFileSync(cleanup, content, 'utf8'); + const result = await driver.pullFile(cleanup); + expect(Buffer.from(result, 'base64').toString('utf8')).toBe(content); + }); + + it('result is a valid base64 string', async () => { + cleanup = join(tmpdir(), `nova-b64-${Date.now()}.txt`); + writeFileSync(cleanup, 'base64 check', 'utf8'); + const result = await driver.pullFile(cleanup); + expect(() => Buffer.from(result, 'base64')).not.toThrow(); + }); + + it('throws when the file does not exist', async () => { + const missing = join(tmpdir(), `nova-missing-${Date.now()}.txt`); + await expect(driver.pullFile(missing)).rejects.toThrow(); + }); + }); + + // ─── pullFolder ────────────────────────────────────────────────────────── + + describe('pullFolder', () => { + let cleanup: string = ''; + + afterEach(() => { + if (cleanup && existsSync(cleanup)) { + try { rmSync(cleanup, { recursive: true }); } catch { /* noop */ } + } + cleanup = ''; + }); + + it('returns a base64-encoded ZIP with valid PK header', async () => { + cleanup = join(tmpdir(), `nova-folder-${Date.now()}`); + mkdirSync(cleanup); + writeFileSync(join(cleanup, 'a.txt'), 'file a'); + writeFileSync(join(cleanup, 'b.txt'), 'file b'); + + const result = await driver.pullFolder(cleanup); + const bytes = Buffer.from(result, 'base64'); + + // ZIP local file header: PK\x03\x04 + expect(bytes[0]).toBe(0x50); // P + expect(bytes[1]).toBe(0x4B); // K + expect(bytes[2]).toBe(0x03); + expect(bytes[3]).toBe(0x04); + }); + + it('ZIP is larger when folder has more content', async () => { + const smallDir = join(tmpdir(), `nova-small-${Date.now()}`); + const largeDir = join(tmpdir(), `nova-large-${Date.now()}`); + cleanup = smallDir; // afterEach cleans one; we clean largeDir here manually + + mkdirSync(smallDir); + writeFileSync(join(smallDir, 'tiny.txt'), 'x'); + + mkdirSync(largeDir); + writeFileSync(join(largeDir, 'big.txt'), 'x'.repeat(10_000)); + + const smallZip = await driver.pullFolder(smallDir); + const largeZip = await driver.pullFolder(largeDir); + + expect(Buffer.from(largeZip, 'base64').length).toBeGreaterThan( + Buffer.from(smallZip, 'base64').length + ); + + rmSync(largeDir, { recursive: true, force: true }); + }); + + it('throws when the directory does not exist', async () => { + const missing = join(tmpdir(), `nova-missing-dir-${Date.now()}`); + await expect(driver.pullFolder(missing)).rejects.toThrow(); + }); + }); +}); diff --git a/test/e2e/extension-multimonitor.e2e.ts b/test/e2e/extension-multimonitor.e2e.ts new file mode 100644 index 0000000..9d54a87 --- /dev/null +++ b/test/e2e/extension-multimonitor.e2e.ts @@ -0,0 +1,143 @@ +import { describe, it, beforeAll, afterAll, beforeEach, expect } from 'vitest'; +import type { Browser } from 'webdriverio'; +import { + createCalculatorSession, + quitSession, + resetCalculator, +} from './helpers/session.js'; + +describe('windows: getMonitors extension command', () => { + let calc: Browser; + + beforeAll(async () => { + calc = await createCalculatorSession(); + }); + + afterAll(async () => { + await quitSession(calc); + }); + + beforeEach(async () => { + await resetCalculator(calc); + }); + + describe('getMonitors — response shape', () => { + it('returns a non-empty array', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + expect(Array.isArray(monitors)).toBe(true); + expect(monitors.length).toBeGreaterThanOrEqual(1); + }); + + it('each monitor has required numeric index and non-empty deviceName', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + for (const monitor of monitors) { + expect(typeof monitor.index).toBe('number'); + expect(typeof monitor.deviceName).toBe('string'); + expect(monitor.deviceName.length).toBeGreaterThan(0); + } + }); + + it('each monitor has a boolean primary field', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + for (const monitor of monitors) { + expect(typeof monitor.primary).toBe('boolean'); + } + }); + + it('exactly one monitor is marked as primary', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + const primaries = monitors.filter((m: any) => m.primary); + expect(primaries).toHaveLength(1); + }); + + it('each monitor has bounds with positive width and height', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + for (const monitor of monitors) { + expect(typeof monitor.bounds.x).toBe('number'); + expect(typeof monitor.bounds.y).toBe('number'); + expect(monitor.bounds.width).toBeGreaterThan(0); + expect(monitor.bounds.height).toBeGreaterThan(0); + } + }); + + it('each monitor has workingArea with positive width and height', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + for (const monitor of monitors) { + expect(typeof monitor.workingArea.x).toBe('number'); + expect(typeof monitor.workingArea.y).toBe('number'); + expect(monitor.workingArea.width).toBeGreaterThan(0); + expect(monitor.workingArea.height).toBeGreaterThan(0); + } + }); + + it('workingArea is contained within bounds for each monitor', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + for (const monitor of monitors) { + expect(monitor.workingArea.x).toBeGreaterThanOrEqual(monitor.bounds.x); + expect(monitor.workingArea.y).toBeGreaterThanOrEqual(monitor.bounds.y); + expect(monitor.workingArea.width).toBeLessThanOrEqual(monitor.bounds.width); + expect(monitor.workingArea.height).toBeLessThanOrEqual(monitor.bounds.height); + } + }); + + it('monitor indices are sequential starting from 0', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + const indices = monitors.map((m: any) => m.index).sort((a: number, b: number) => a - b); + for (let i = 0; i < indices.length; i++) { + expect(indices[i]).toBe(i); + } + }); + + it('primary monitor bounds origin is at the Windows virtual origin (0, 0)', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + const primary = monitors.find((m: any) => m.primary); + expect(primary.bounds.x).toBe(0); + expect(primary.bounds.y).toBe(0); + }); + }); + + describe('virtual-screen absolute click regression', () => { + it('clicking Calculator "9" button by absolute screen coordinates shows 9 in display', async () => { + const btn = await calc.$('~num9Button'); + const loc = await btn.getLocation(); + const size = await btn.getSize(); + const windowRect = await calc.getWindowRect(); + + const x = Math.round(windowRect.x + loc.x + size.width / 2); + const y = Math.round(windowRect.y + loc.y + size.height / 2); + + await calc.executeScript('windows: click', [{ x, y }]); + + const display = await calc.$('~CalculatorResults'); + expect(await display.getText()).toContain('9'); + }); + + it('clicking Calculator "5" button by absolute screen coordinates shows 5 in display', async () => { + const btn = await calc.$('~num5Button'); + const loc = await btn.getLocation(); + const size = await btn.getSize(); + const windowRect = await calc.getWindowRect(); + + const x = Math.round(windowRect.x + loc.x + size.width / 2); + const y = Math.round(windowRect.y + loc.y + size.height / 2); + + await calc.executeScript('windows: click', [{ x, y }]); + + const display = await calc.$('~CalculatorResults'); + expect(await display.getText()).toContain('5'); + }); + + it('absolute coordinates derived from getMonitors primary bounds contain the Calculator window', async () => { + const monitors = await calc.executeScript('windows: getMonitors', []) as any[]; + const primary = monitors.find((m: any) => m.primary); + const windowRect = await calc.getWindowRect(); + + // Calculator window should fall within primary monitor bounds + // (it was launched without any monitor preference, so it opens on primary) + expect(windowRect.x).toBeGreaterThanOrEqual(primary.bounds.x); + expect(windowRect.y).toBeGreaterThanOrEqual(primary.bounds.y); + expect(windowRect.x + windowRect.width).toBeLessThanOrEqual(primary.bounds.x + primary.bounds.width); + expect(windowRect.y + windowRect.height).toBeLessThanOrEqual(primary.bounds.y + primary.bounds.height); + }); + }); +}); diff --git a/test/e2e/helpers/session.ts b/test/e2e/helpers/session.ts index 9a293f3..168fad1 100644 --- a/test/e2e/helpers/session.ts +++ b/test/e2e/helpers/session.ts @@ -19,7 +19,7 @@ export async function createCalculatorSession(extraCaps?: Record): ...APPIUM_SERVER, capabilities: { platformName: 'Windows', - 'appium:automationName': 'NovaWindows', + 'appium:automationName': 'DesktopDriver', 'appium:app': NOTEPAD_APP_PATH, ...extraCaps, } as Caps, @@ -47,7 +47,7 @@ export async function createTodoSession(extraCaps?: Record): Pr ...APPIUM_SERVER, capabilities: { platformName: 'Windows', - 'appium:automationName': 'NovaWindows', + 'appium:automationName': 'DesktopDriver', 'appium:app': TODO_APP_ID, ...extraCaps, } as Caps, @@ -61,7 +61,7 @@ export async function createRootSession(extraCaps?: Record): Pr ...APPIUM_SERVER, capabilities: { platformName: 'Windows', - 'appium:automationName': 'NovaWindows', + 'appium:automationName': 'DesktopDriver', 'appium:app': 'Root', ...extraCaps, } as Caps, diff --git a/test/e2e/session.e2e.ts b/test/e2e/session.e2e.ts index 788340f..9304a10 100644 --- a/test/e2e/session.e2e.ts +++ b/test/e2e/session.e2e.ts @@ -1,4 +1,4 @@ -import { existsSync, unlinkSync } from 'node:fs'; +import { existsSync, readFileSync, unlinkSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { beforeEach, describe, expect, it } from 'vitest'; @@ -136,6 +136,22 @@ describe('Session creation and capabilities', () => { if (existsSync(markerPath)) { unlinkSync(markerPath); } }); + it('passes appEnvironment variables into the PowerShell session', async () => { + const markerPath = join(tmpdir(), `novawindows-session-env-${Date.now()}.txt`); + const driver = await createRootSession({ + 'appium:appEnvironment': { NOVA_TEST_VAR: 'hello_from_env' }, + 'appium:prerun': { + script: `[System.IO.File]::WriteAllText('${markerPath}', $env:NOVA_TEST_VAR)`, + }, + }); + try { + expect(readFileSync(markerPath, 'utf8')).toBe('hello_from_env'); + } finally { + await quitSession(driver); + if (existsSync(markerPath)) { unlinkSync(markerPath); } + } + }); + it('throws when an unknown automationName is specified', async () => { const { remote } = await import('webdriverio'); await expect( diff --git a/test/e2e/window.e2e.ts b/test/e2e/window.e2e.ts index d80891e..9956d03 100644 --- a/test/e2e/window.e2e.ts +++ b/test/e2e/window.e2e.ts @@ -1,19 +1,23 @@ import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import type { Browser } from 'webdriverio'; -import { createCalculatorSession, createRootSession, quitSession } from './helpers/session.js'; +import { closeAllTestApps, createCalculatorSession, createRootSession, quitSession } from './helpers/session.js'; describe('Window and app management commands', () => { let calc: Browser; + let calcAllHandles: Browser; let root: Browser; beforeAll(async () => { calc = await createCalculatorSession(); + calcAllHandles = await createCalculatorSession({ 'appium:returnAllWindowHandles': true }); root = await createRootSession(); }); afterAll(async () => { await quitSession(calc); + await quitSession(calcAllHandles); await quitSession(root); + closeAllTestApps(); }); describe('getWindowHandle', () => { @@ -30,13 +34,39 @@ describe('Window and app management commands', () => { }); describe('getWindowHandles', () => { - it('returns an array from the Root session', async () => { - const handles = await root.getWindowHandles(); - expect(Array.isArray(handles)).toBe(true); + it('(app session, default) returns only the app windows — not all desktop windows', async () => { + const appHandles = await calc.getWindowHandles(); + expect(appHandles.length).toBeGreaterThanOrEqual(1); + }); + + it('(app session, default) includes the current window handle', async () => { + const current = await calc.getWindowHandle(); + const handles = await calc.getWindowHandles(); + expect(handles).toContain(current); + }); + + it('(app session, default) all returned handles match the 0x hex format', async () => { + const handles = await calc.getWindowHandles(); + for (const h of handles) { + expect(h).toMatch(/^0x[0-9a-fA-F]{8}$/); + } }); - it('returns at least one window handle from the desktop', async () => { + it('(returnAllWindowHandles=true) returns all desktop windows, same count as root session', async () => { + const appAllHandles = await calcAllHandles.getWindowHandles(); + const rootHandles = await root.getWindowHandles(); + expect(appAllHandles.length).toBe(rootHandles.length); + }); + + it('(returnAllWindowHandles=true) includes the current app window handle', async () => { + const current = await calc.getWindowHandle(); + const appAllHandles = await calcAllHandles.getWindowHandles(); + expect(appAllHandles).toContain(current); + }); + + it('(root session) returns an array of at least one window handle', async () => { const handles = await root.getWindowHandles(); + expect(Array.isArray(handles)).toBe(true); expect(handles.length).toBeGreaterThanOrEqual(1); }); }); diff --git a/test/fixtures/driver.ts b/test/fixtures/driver.ts index 3aa19bd..1eb2be7 100644 --- a/test/fixtures/driver.ts +++ b/test/fixtures/driver.ts @@ -8,6 +8,8 @@ export interface MockDriver { sendPowerShellCommand: ReturnType; log: { debug: ReturnType; info?: ReturnType }; assertFeatureEnabled: ReturnType; + appProcessIds: number[]; + caps: Record; } export function createMockDriver(overrides?: Partial): MockDriver { @@ -18,6 +20,8 @@ export function createMockDriver(overrides?: Partial): MockDriver { sendPowerShellCommand, log, assertFeatureEnabled, + appProcessIds: [], + caps: {}, ...overrides, }; return driver; diff --git a/test/mcp/fixtures/server.ts b/test/mcp/fixtures/server.ts new file mode 100644 index 0000000..3752366 --- /dev/null +++ b/test/mcp/fixtures/server.ts @@ -0,0 +1,16 @@ +import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; + +export function createMockServer() { + const handlers = new Map Promise>(); + const server = { + registerTool: (_name: string, _config: unknown, handler: (params: unknown) => Promise) => { + handlers.set(_name, handler); + }, + call: (name: string, params: unknown = {}) => { + const handler = handlers.get(name); + if (!handler) { throw new Error(`Tool not registered: ${name}`); } + return handler(params); + }, + }; + return server as unknown as McpServer & typeof server; +} diff --git a/test/mcp/fixtures/session.ts b/test/mcp/fixtures/session.ts new file mode 100644 index 0000000..18cb1f9 --- /dev/null +++ b/test/mcp/fixtures/session.ts @@ -0,0 +1,58 @@ +import { vi } from 'vitest'; +import type { AppiumSession } from '../../../lib/mcp/session.js'; + +const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf'; + +export function createMockChildElement() { + return { + elementId: 'child-element-id', + isExisting: vi.fn().mockResolvedValue(true), + }; +} + +export function createMockElement() { + const child = createMockChildElement(); + return { + elementId: 'mock-element-id', + click: vi.fn().mockResolvedValue(undefined), + setValue: vi.fn().mockResolvedValue(undefined), + clearValue: vi.fn().mockResolvedValue(undefined), + getText: vi.fn().mockResolvedValue('mock text'), + getAttribute: vi.fn().mockResolvedValue('mock-value'), + isDisplayed: vi.fn().mockResolvedValue(true), + isEnabled: vi.fn().mockResolvedValue(true), + isExisting: vi.fn().mockResolvedValue(true), + $: vi.fn().mockResolvedValue(child), + _child: child, + }; +} + +export function createMockBrowser() { + const el = createMockElement(); + const browser = { + $: vi.fn().mockResolvedValue(el), + $$: vi.fn().mockResolvedValue([el]), + findElement: vi.fn().mockResolvedValue({ [ELEMENT_KEY]: 'mock-element-id' }), + findElements: vi.fn().mockResolvedValue([{ [ELEMENT_KEY]: 'mock-element-id' }]), + findElementFromElement: vi.fn().mockResolvedValue({ [ELEMENT_KEY]: 'child-element-id' }), + executeScript: vi.fn().mockResolvedValue(undefined), + takeScreenshot: vi.fn().mockResolvedValue('base64screenshot'), + getPageSource: vi.fn().mockResolvedValue(''), + getWindowRect: vi.fn().mockResolvedValue({ x: 0, y: 0, width: 1024, height: 768 }), + getWindowHandles: vi.fn().mockResolvedValue(['handle-1', 'handle-2']), + switchToWindow: vi.fn().mockResolvedValue(undefined), + _element: el, + }; + return browser; +} + +export function createMockSession() { + const mockBrowser = createMockBrowser(); + const session = { + getDriver: vi.fn().mockReturnValue(mockBrowser), + isActive: vi.fn().mockReturnValue(true), + create: vi.fn().mockResolvedValue(undefined), + delete: vi.fn().mockResolvedValue(undefined), + } as unknown as AppiumSession; + return { session, mockBrowser, mockElement: mockBrowser._element }; +} diff --git a/test/mcp/tools/advanced.test.ts b/test/mcp/tools/advanced.test.ts new file mode 100644 index 0000000..831a6f0 --- /dev/null +++ b/test/mcp/tools/advanced.test.ts @@ -0,0 +1,133 @@ +import { describe, it, expect, vi } from 'vitest'; +import { registerAdvancedTools } from '../../../lib/mcp/tools/advanced.js'; +import { createMockServer } from '../fixtures/server.js'; +import { createMockSession } from '../fixtures/session.js'; + +describe('advanced tools', () => { + describe('advanced_click', () => { + it('calls driver.executeScript("windows: click") with args and returns "clicked"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerAdvancedTools(server, session); + + const params = { elementId: 'el-1', button: 'right', modifierKeys: ['ctrl'], durationMs: 0, times: 1, interClickDelayMs: 100 }; + const result = await server.call('advanced_click', params) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: click', [params]); + expect(result.content[0].text).toBe('clicked'); + expect(result.isError).toBeUndefined(); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('click failed')); + registerAdvancedTools(server, session); + + const result = await server.call('advanced_click', { button: 'left', modifierKeys: [], durationMs: 0, times: 1, interClickDelayMs: 100 }) as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('click failed'); + }); + }); + + describe('send_keys', () => { + it('calls driver.executeScript("windows: keys") with args and returns "keys sent"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerAdvancedTools(server, session); + + const params = { actions: [{ text: 'Hello' }, { virtualKeyCode: 13 }], forceUnicode: false }; + const result = await server.call('send_keys', params) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: keys', [params]); + expect(result.content[0].text).toBe('keys sent'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('keys failed')); + registerAdvancedTools(server, session); + + const result = await server.call('send_keys', { actions: [], forceUnicode: false }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('hover', () => { + it('calls driver.executeScript("windows: hover") with args and returns "hovered"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerAdvancedTools(server, session); + + const params = { endX: 100, endY: 200, modifierKeys: [], durationMs: 500 }; + const result = await server.call('hover', params) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: hover', [params]); + expect(result.content[0].text).toBe('hovered'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('hover failed')); + registerAdvancedTools(server, session); + + const result = await server.call('hover', { modifierKeys: [], durationMs: 500 }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('scroll', () => { + it('calls driver.executeScript("windows: scroll") with args and returns "scrolled"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerAdvancedTools(server, session); + + const params = { elementId: 'el-1', deltaX: 0, deltaY: 3, modifierKeys: [] }; + const result = await server.call('scroll', params) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: scroll', [params]); + expect(result.content[0].text).toBe('scrolled'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('scroll failed')); + registerAdvancedTools(server, session); + + const result = await server.call('scroll', { deltaX: 0, deltaY: 0, modifierKeys: [] }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('click_and_drag', () => { + it('calls driver.executeScript("windows: clickAndDrag") with args and returns "drag completed"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerAdvancedTools(server, session); + + const params = { startX: 10, startY: 10, endX: 100, endY: 100, modifierKeys: [], durationMs: 500, button: 'left' }; + const result = await server.call('click_and_drag', params) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: clickAndDrag', [params]); + expect(result.content[0].text).toBe('drag completed'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('drag failed')); + registerAdvancedTools(server, session); + + const result = await server.call('click_and_drag', { modifierKeys: [], durationMs: 500, button: 'left' }) as any; + + expect(result.isError).toBe(true); + }); + }); +}); diff --git a/test/mcp/tools/app.test.ts b/test/mcp/tools/app.test.ts new file mode 100644 index 0000000..38c5d36 --- /dev/null +++ b/test/mcp/tools/app.test.ts @@ -0,0 +1,120 @@ +import { describe, it, expect, vi } from 'vitest'; +import { registerAppTools } from '../../../lib/mcp/tools/app.js'; +import { createMockServer } from '../fixtures/server.js'; +import { createMockSession } from '../fixtures/session.js'; + +const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf'; + +describe('app tools', () => { + describe('get_window_element', () => { + it('calls executeScript("windows: getWindowElement") and extracts element ID from result', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockResolvedValue({ [ELEMENT_KEY]: 'window-el-id' }); + registerAppTools(server, session); + + const result = await server.call('get_window_element') as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: getWindowElement', [{}]); + expect(result.content[0].text).toBe('window-el-id'); + expect(result.isError).toBeUndefined(); + }); + + it('falls back to ELEMENT key if element-6066 key is absent', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockResolvedValue({ ELEMENT: 'legacy-el-id' }); + registerAppTools(server, session); + + const result = await server.call('get_window_element') as any; + + expect(result.content[0].text).toBe('legacy-el-id'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('getWindowElement failed')); + registerAppTools(server, session); + + const result = await server.call('get_window_element') as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('getWindowElement failed'); + }); + }); + + describe('launch_app', () => { + it('calls executeScript("windows: launchApp") and returns "app launched"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerAppTools(server, session); + + const result = await server.call('launch_app') as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: launchApp', [{}]); + expect(result.content[0].text).toBe('app launched'); + expect(result.isError).toBeUndefined(); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('launch failed')); + registerAppTools(server, session); + + const result = await server.call('launch_app') as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('close_app', () => { + it('calls executeScript("windows: closeApp") and returns "app closed"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerAppTools(server, session); + + const result = await server.call('close_app') as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: closeApp', [{}]); + expect(result.content[0].text).toBe('app closed'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('close failed')); + registerAppTools(server, session); + + const result = await server.call('close_app') as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('get_device_time', () => { + it('calls executeScript("windows: getDeviceTime") and returns stringified result', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockResolvedValue('2026-03-13T10:00:00'); + registerAppTools(server, session); + + const result = await server.call('get_device_time') as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: getDeviceTime', [{}]); + expect(result.content[0].text).toBe('2026-03-13T10:00:00'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('time failed')); + registerAppTools(server, session); + + const result = await server.call('get_device_time') as any; + + expect(result.isError).toBe(true); + }); + }); +}); diff --git a/test/mcp/tools/clipboard.test.ts b/test/mcp/tools/clipboard.test.ts new file mode 100644 index 0000000..9545380 --- /dev/null +++ b/test/mcp/tools/clipboard.test.ts @@ -0,0 +1,74 @@ +import { describe, it, expect, vi } from 'vitest'; +import { registerClipboardTools } from '../../../lib/mcp/tools/clipboard.js'; +import { createMockServer } from '../fixtures/server.js'; +import { createMockSession } from '../fixtures/session.js'; + +describe('clipboard tools', () => { + describe('get_clipboard', () => { + it('calls executeScript("windows: getClipboard", [contentType]) and returns stringified result', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockResolvedValue('aGVsbG8='); + registerClipboardTools(server, session); + + const result = await server.call('get_clipboard', { contentType: 'plaintext' }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: getClipboard', [{ contentType: 'plaintext' }]); + expect(result.content[0].text).toBe('aGVsbG8='); + expect(result.isError).toBeUndefined(); + }); + + it('supports image content type', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockResolvedValue('imagebase64data'); + registerClipboardTools(server, session); + + const result = await server.call('get_clipboard', { contentType: 'image' }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: getClipboard', [{ contentType: 'image' }]); + expect(result.content[0].text).toBe('imagebase64data'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('clipboard read failed')); + registerClipboardTools(server, session); + + const result = await server.call('get_clipboard', { contentType: 'plaintext' }) as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('clipboard read failed'); + }); + }); + + describe('set_clipboard', () => { + it('calls executeScript("windows: setClipboard", [{b64Content, contentType}]) and returns "clipboard set"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerClipboardTools(server, session); + + const result = await server.call('set_clipboard', { b64Content: 'aGVsbG8=', contentType: 'plaintext' }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith( + 'windows: setClipboard', + [{ b64Content: 'aGVsbG8=', contentType: 'plaintext' }] + ); + expect(result.content[0].text).toBe('clipboard set'); + expect(result.isError).toBeUndefined(); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('clipboard write failed')); + registerClipboardTools(server, session); + + const result = await server.call('set_clipboard', { b64Content: 'abc', contentType: 'plaintext' }) as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('clipboard write failed'); + }); + }); +}); diff --git a/test/mcp/tools/find.test.ts b/test/mcp/tools/find.test.ts new file mode 100644 index 0000000..881d677 --- /dev/null +++ b/test/mcp/tools/find.test.ts @@ -0,0 +1,207 @@ +import { describe, it, expect, vi } from 'vitest'; +import { registerFindTools } from '../../../lib/mcp/tools/find.js'; +import { createMockServer } from '../fixtures/server.js'; +import { createMockSession } from '../fixtures/session.js'; + +const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf'; + +describe('find tools', () => { + describe('find_element', () => { + it('calls driver.findElement() with accessibility id and returns elementId', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.findElement = vi.fn().mockResolvedValue({ [ELEMENT_KEY]: 'found-el-id' }); + registerFindTools(server, session); + + const result = await server.call('find_element', { strategy: 'accessibility id', selector: 'MyButton' }) as any; + + expect(mockBrowser.findElement).toHaveBeenCalledWith('accessibility id', 'MyButton'); + expect(result.content[0].text).toBe('found-el-id'); + expect(result.isError).toBeUndefined(); + }); + + it('maps "id" strategy to "accessibility id"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerFindTools(server, session); + + await server.call('find_element', { strategy: 'id', selector: 'MyId' }); + + expect(mockBrowser.findElement).toHaveBeenCalledWith('accessibility id', 'MyId'); + }); + + it('calls driver.findElement() with xpath strategy directly', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerFindTools(server, session); + + await server.call('find_element', { strategy: 'xpath', selector: '//Button[@Name="OK"]' }); + + expect(mockBrowser.findElement).toHaveBeenCalledWith('xpath', '//Button[@Name="OK"]'); + }); + + it('calls driver.findElement() with name strategy directly (no CSS hack)', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerFindTools(server, session); + + await server.call('find_element', { strategy: 'name', selector: 'Submit' }); + + expect(mockBrowser.findElement).toHaveBeenCalledWith('name', 'Submit'); + }); + + it('calls driver.findElement() with class name strategy directly (no CSS hack)', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerFindTools(server, session); + + await server.call('find_element', { strategy: 'class name', selector: 'Button' }); + + expect(mockBrowser.findElement).toHaveBeenCalledWith('class name', 'Button'); + }); + + it('returns isError when driver throws (element not found)', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.findElement = vi.fn().mockRejectedValue(new Error('no such element')); + registerFindTools(server, session); + + const result = await server.call('find_element', { strategy: 'accessibility id', selector: 'Missing' }) as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('no such element'); + }); + }); + + describe('find_elements', () => { + it('calls driver.findElements() and returns JSON array of element IDs', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.findElements = vi.fn().mockResolvedValue([ + { [ELEMENT_KEY]: 'el-1' }, + { [ELEMENT_KEY]: 'el-2' }, + ]); + registerFindTools(server, session); + + const result = await server.call('find_elements', { strategy: 'xpath', selector: '//Button' }) as any; + + expect(mockBrowser.findElements).toHaveBeenCalledWith('xpath', '//Button'); + const ids = JSON.parse(result.content[0].text); + expect(ids).toEqual(['el-1', 'el-2']); + }); + + it('returns empty array when no elements found', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.findElements = vi.fn().mockResolvedValue([]); + registerFindTools(server, session); + + const result = await server.call('find_elements', { strategy: 'xpath', selector: '//Button' }) as any; + + expect(JSON.parse(result.content[0].text)).toEqual([]); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.findElements = vi.fn().mockRejectedValue(new Error('find failed')); + registerFindTools(server, session); + + const result = await server.call('find_elements', { strategy: 'xpath', selector: '//Button' }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('find_child_element', () => { + it('calls driver.findElementFromElement() with parent id and strategy', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.findElementFromElement = vi.fn().mockResolvedValue({ [ELEMENT_KEY]: 'child-id' }); + registerFindTools(server, session); + + const result = await server.call('find_child_element', { + parentElementId: 'parent-id', + strategy: 'name', + selector: 'ChildItem', + }) as any; + + expect(mockBrowser.findElementFromElement).toHaveBeenCalledWith('parent-id', 'name', 'ChildItem'); + expect(result.content[0].text).toBe('child-id'); + }); + + it('returns isError when child element not found', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.findElementFromElement = vi.fn().mockRejectedValue(new Error('no such element')); + registerFindTools(server, session); + + const result = await server.call('find_child_element', { + parentElementId: 'p', + strategy: 'accessibility id', + selector: 'Missing', + }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('wait_for_element', () => { + it('returns elementId immediately when element is found on first try', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.findElement = vi.fn().mockResolvedValue({ [ELEMENT_KEY]: 'waited-el-id' }); + registerFindTools(server, session); + + const result = await server.call('wait_for_element', { + strategy: 'accessibility id', + selector: 'MyBtn', + timeoutMs: 3000, + }) as any; + + expect(mockBrowser.findElement).toHaveBeenCalledWith('accessibility id', 'MyBtn'); + expect(result.content[0].text).toBe('waited-el-id'); + expect(result.isError).toBeUndefined(); + }); + + it('returns isError after timeout when element never appears', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.findElement = vi.fn().mockRejectedValue(new Error('no such element')); + registerFindTools(server, session); + + const result = await server.call('wait_for_element', { + strategy: 'accessibility id', + selector: 'NeverAppears', + timeoutMs: 0, + pollIntervalMs: 50, + }) as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('NeverAppears'); + }); + + it('retries until element appears', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + let attempts = 0; + mockBrowser.findElement = vi.fn().mockImplementation(() => { + attempts++; + if (attempts < 3) { return Promise.reject(new Error('no such element')); } + return Promise.resolve({ [ELEMENT_KEY]: 'late-el-id' }); + }); + registerFindTools(server, session); + + const result = await server.call('wait_for_element', { + strategy: 'accessibility id', + selector: 'LateElement', + timeoutMs: 5000, + pollIntervalMs: 50, + }) as any; + + expect(attempts).toBe(3); + expect(result.content[0].text).toBe('late-el-id'); + expect(result.isError).toBeUndefined(); + }); + }); +}); diff --git a/test/mcp/tools/inspect.test.ts b/test/mcp/tools/inspect.test.ts new file mode 100644 index 0000000..f71a36b --- /dev/null +++ b/test/mcp/tools/inspect.test.ts @@ -0,0 +1,76 @@ +import { describe, it, expect, vi } from 'vitest'; +import { registerInspectTools } from '../../../lib/mcp/tools/inspect.js'; +import { createMockServer } from '../fixtures/server.js'; +import { createMockSession } from '../fixtures/session.js'; + +const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf'; +const ELEM_ID = 'el-inspect-1'; + +describe('inspect tools', () => { + describe('get_element_info', () => { + it('fetches UIA properties and returns structured JSON', async () => { + const server = createMockServer(); + const { session, mockBrowser, mockElement } = createMockSession(); + mockElement.getAttribute = vi.fn() + .mockResolvedValueOnce('OK') // Name + .mockResolvedValueOnce('OkButton') // AutomationId + .mockResolvedValueOnce('Button') // ClassName + .mockResolvedValueOnce('ControlType.Button') // ControlType + .mockResolvedValueOnce('True'); // IsEnabled + registerInspectTools(server, session); + + const result = await server.call('get_element_info', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.$).toHaveBeenCalledWith({ [ELEMENT_KEY]: ELEM_ID }); + expect(mockElement.getAttribute).toHaveBeenCalledWith('Name'); + expect(mockElement.getAttribute).toHaveBeenCalledWith('AutomationId'); + expect(mockElement.getAttribute).toHaveBeenCalledWith('ClassName'); + expect(mockElement.getAttribute).toHaveBeenCalledWith('ControlType'); + expect(mockElement.getAttribute).toHaveBeenCalledWith('IsEnabled'); + + const info = JSON.parse(result.content[0].text); + expect(info.elementId).toBe(ELEM_ID); + expect(info.name).toBe('OK'); + expect(info.automationId).toBe('OkButton'); + expect(info.className).toBe('Button'); + expect(info.controlType).toBe('ControlType.Button'); + expect(info.isEnabled).toBe('True'); + expect(Array.isArray(info.suggestedSelectors)).toBe(true); + expect(result.isError).toBeUndefined(); + }); + + it('includes accessibility id selector when AutomationId is present', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.getAttribute = vi.fn() + .mockResolvedValueOnce('Submit') + .mockResolvedValueOnce('SubmitBtn') + .mockResolvedValueOnce(null) + .mockResolvedValueOnce('ControlType.Button') + .mockResolvedValueOnce('True'); + registerInspectTools(server, session); + + const result = await server.call('get_element_info', { elementId: ELEM_ID }) as any; + const info = JSON.parse(result.content[0].text); + + const accessibilityIdSuggestion = info.suggestedSelectors.find( + (s: any) => s.strategy === 'accessibility id' + ); + expect(accessibilityIdSuggestion).toBeDefined(); + expect(accessibilityIdSuggestion.selector).toBe('SubmitBtn'); + expect(accessibilityIdSuggestion.reliability).toBe('high'); + }); + + it('returns isError when driver throws', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.$ = vi.fn().mockRejectedValue(new Error('No such element')); + registerInspectTools(server, session); + + const result = await server.call('get_element_info', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('No such element'); + }); + }); +}); diff --git a/test/mcp/tools/interact.test.ts b/test/mcp/tools/interact.test.ts new file mode 100644 index 0000000..ed95be4 --- /dev/null +++ b/test/mcp/tools/interact.test.ts @@ -0,0 +1,186 @@ +import { describe, it, expect, vi } from 'vitest'; +import { registerInteractTools } from '../../../lib/mcp/tools/interact.js'; +import { createMockServer } from '../fixtures/server.js'; +import { createMockSession } from '../fixtures/session.js'; + +const ELEMENT_KEY = 'element-6066-11e4-a52e-4f735466cecf'; +const ELEM_ID = 'abc-123'; + +describe('interact tools', () => { + describe('click_element', () => { + it('calls driver.$({elementKey: id}).click() and returns "clicked"', async () => { + const server = createMockServer(); + const { session, mockBrowser, mockElement } = createMockSession(); + registerInteractTools(server, session); + + const result = await server.call('click_element', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.$).toHaveBeenCalledWith({ [ELEMENT_KEY]: ELEM_ID }); + expect(mockElement.click).toHaveBeenCalled(); + expect(result.content[0].text).toBe('clicked'); + expect(result.isError).toBeUndefined(); + }); + + it('returns isError when click throws', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.click = vi.fn().mockRejectedValue(new Error('Element stale')); + registerInteractTools(server, session); + + const result = await server.call('click_element', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Element stale'); + }); + }); + + describe('set_value', () => { + it('calls el.setValue() with the provided value', async () => { + const server = createMockServer(); + const { session, mockBrowser, mockElement } = createMockSession(); + registerInteractTools(server, session); + + const result = await server.call('set_value', { elementId: ELEM_ID, value: 'hello world' }) as any; + + expect(mockBrowser.$).toHaveBeenCalledWith({ [ELEMENT_KEY]: ELEM_ID }); + expect(mockElement.setValue).toHaveBeenCalledWith('hello world'); + expect(result.content[0].text).toBe('value set'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.setValue = vi.fn().mockRejectedValue(new Error('setValue failed')); + registerInteractTools(server, session); + + const result = await server.call('set_value', { elementId: ELEM_ID, value: 'x' }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('clear_element', () => { + it('calls el.clearValue() and returns "cleared"', async () => { + const server = createMockServer(); + const { session, mockBrowser, mockElement } = createMockSession(); + registerInteractTools(server, session); + + const result = await server.call('clear_element', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.$).toHaveBeenCalledWith({ [ELEMENT_KEY]: ELEM_ID }); + expect(mockElement.clearValue).toHaveBeenCalled(); + expect(result.content[0].text).toBe('cleared'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.clearValue = vi.fn().mockRejectedValue(new Error('clearValue failed')); + registerInteractTools(server, session); + + const result = await server.call('clear_element', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('get_text', () => { + it('calls el.getText() and returns the text', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.getText = vi.fn().mockResolvedValue('Hello World'); + registerInteractTools(server, session); + + const result = await server.call('get_text', { elementId: ELEM_ID }) as any; + + expect(mockElement.getText).toHaveBeenCalled(); + expect(result.content[0].text).toBe('Hello World'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.getText = vi.fn().mockRejectedValue(new Error('getText failed')); + registerInteractTools(server, session); + + const result = await server.call('get_text', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('get_attribute', () => { + it('calls el.getAttribute() with the attribute name and returns the value', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.getAttribute = vi.fn().mockResolvedValue('MyButton'); + registerInteractTools(server, session); + + const result = await server.call('get_attribute', { elementId: ELEM_ID, attribute: 'Name' }) as any; + + expect(mockElement.getAttribute).toHaveBeenCalledWith('Name'); + expect(result.content[0].text).toBe('MyButton'); + }); + + it('returns empty string when attribute value is null', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.getAttribute = vi.fn().mockResolvedValue(null); + registerInteractTools(server, session); + + const result = await server.call('get_attribute', { elementId: ELEM_ID, attribute: 'AutomationId' }) as any; + + expect(result.content[0].text).toBe(''); + }); + }); + + describe('is_element_displayed', () => { + it('calls el.isDisplayed() and returns "true"', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.isDisplayed = vi.fn().mockResolvedValue(true); + registerInteractTools(server, session); + + const result = await server.call('is_element_displayed', { elementId: ELEM_ID }) as any; + + expect(mockElement.isDisplayed).toHaveBeenCalled(); + expect(result.content[0].text).toBe('true'); + }); + + it('returns "false" when not displayed', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.isDisplayed = vi.fn().mockResolvedValue(false); + registerInteractTools(server, session); + + const result = await server.call('is_element_displayed', { elementId: ELEM_ID }) as any; + + expect(result.content[0].text).toBe('false'); + }); + }); + + describe('is_element_enabled', () => { + it('calls el.isEnabled() and returns "true"', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.isEnabled = vi.fn().mockResolvedValue(true); + registerInteractTools(server, session); + + const result = await server.call('is_element_enabled', { elementId: ELEM_ID }) as any; + + expect(mockElement.isEnabled).toHaveBeenCalled(); + expect(result.content[0].text).toBe('true'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockElement } = createMockSession(); + mockElement.isEnabled = vi.fn().mockRejectedValue(new Error('isEnabled failed')); + registerInteractTools(server, session); + + const result = await server.call('is_element_enabled', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + }); + }); +}); diff --git a/test/mcp/tools/patterns.test.ts b/test/mcp/tools/patterns.test.ts new file mode 100644 index 0000000..72475da --- /dev/null +++ b/test/mcp/tools/patterns.test.ts @@ -0,0 +1,194 @@ +import { describe, it, expect, vi } from 'vitest'; +import { registerPatternTools } from '../../../lib/mcp/tools/patterns.js'; +import { createMockServer } from '../fixtures/server.js'; +import { createMockSession } from '../fixtures/session.js'; + +const ELEM_ID = 'pattern-el-1'; + +describe('pattern tools', () => { + describe('invoke_element', () => { + it('calls driver.executeScript("windows: invoke", [{elementId}]) and returns "invoked"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerPatternTools(server, session); + + const result = await server.call('invoke_element', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: invoke', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('invoked'); + expect(result.isError).toBeUndefined(); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('invoke failed')); + registerPatternTools(server, session); + + const result = await server.call('invoke_element', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('expand_element', () => { + it('calls driver.executeScript("windows: expand") and returns "expanded"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerPatternTools(server, session); + + const result = await server.call('expand_element', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: expand', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('expanded'); + }); + }); + + describe('collapse_element', () => { + it('calls driver.executeScript("windows: collapse") and returns "collapsed"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerPatternTools(server, session); + + const result = await server.call('collapse_element', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: collapse', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('collapsed'); + }); + }); + + describe('toggle_element', () => { + it('calls driver.executeScript("windows: toggle") and returns "toggled"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerPatternTools(server, session); + + const result = await server.call('toggle_element', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: toggle', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('toggled'); + }); + }); + + describe('set_element_value', () => { + it('calls driver.executeScript("windows: setValue", [{elementId, value}]) and returns "value set"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerPatternTools(server, session); + + const result = await server.call('set_element_value', { elementId: ELEM_ID, value: '42' }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: setValue', [{ elementId: ELEM_ID, value: '42' }]); + expect(result.content[0].text).toBe('value set'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('setValue failed')); + registerPatternTools(server, session); + + const result = await server.call('set_element_value', { elementId: ELEM_ID, value: '1' }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('get_element_value', () => { + it('calls driver.executeScript("windows: getValue") and returns stringified result', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockResolvedValue(75); + registerPatternTools(server, session); + + const result = await server.call('get_element_value', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: getValue', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('75'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('getValue failed')); + registerPatternTools(server, session); + + const result = await server.call('get_element_value', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('get_toggle_state', () => { + it('calls driver.executeScript("windows: getToggleState") and returns state string', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockResolvedValue('On'); + registerPatternTools(server, session); + + const result = await server.call('get_toggle_state', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: getToggleState', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('On'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('getToggleState failed')); + registerPatternTools(server, session); + + const result = await server.call('get_toggle_state', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('focus_element', () => { + it('calls driver.executeScript("windows: setFocus") and returns "focused"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerPatternTools(server, session); + + const result = await server.call('focus_element', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: setFocus', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('focused'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('setFocus failed')); + registerPatternTools(server, session); + + const result = await server.call('focus_element', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('select_item', () => { + it('calls driver.executeScript("windows: select") and returns "selected"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerPatternTools(server, session); + + const result = await server.call('select_item', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: select', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('selected'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('select failed')); + registerPatternTools(server, session); + + const result = await server.call('select_item', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + }); + }); +}); diff --git a/test/mcp/tools/session.test.ts b/test/mcp/tools/session.test.ts new file mode 100644 index 0000000..fbff336 --- /dev/null +++ b/test/mcp/tools/session.test.ts @@ -0,0 +1,82 @@ +import { describe, it, expect, vi } from 'vitest'; +import { registerSessionTools } from '../../../lib/mcp/tools/session.js'; +import { createMockServer } from '../fixtures/server.js'; +import { createMockSession } from '../fixtures/session.js'; + +describe('session tools', () => { + describe('create_session', () => { + it('calls session.create() with params and returns success message', async () => { + const server = createMockServer(); + const { session } = createMockSession(); + registerSessionTools(server, session); + + const result = await server.call('create_session', { app: 'notepad.exe' }) as any; + + expect(session.create).toHaveBeenCalledWith({ app: 'notepad.exe' }); + expect(result.content[0].text).toContain('notepad.exe'); + expect(result.isError).toBeUndefined(); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session } = createMockSession(); + vi.mocked(session.create).mockRejectedValue(new Error('Connection refused')); + registerSessionTools(server, session); + + const result = await server.call('create_session', { app: 'notepad.exe' }) as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Connection refused'); + }); + }); + + describe('delete_session', () => { + it('calls session.delete() when session is active', async () => { + const server = createMockServer(); + const { session } = createMockSession(); + registerSessionTools(server, session); + + const result = await server.call('delete_session') as any; + + expect(session.isActive).toHaveBeenCalled(); + expect(session.delete).toHaveBeenCalled(); + expect(result.content[0].text).toContain('deleted'); + expect(result.isError).toBeUndefined(); + }); + + it('returns no-op message when no session is active', async () => { + const server = createMockServer(); + const { session } = createMockSession(); + vi.mocked(session.isActive).mockReturnValue(false); + registerSessionTools(server, session); + + const result = await server.call('delete_session') as any; + + expect(session.delete).not.toHaveBeenCalled(); + expect(result.content[0].text).toContain('No active session'); + }); + }); + + describe('get_session_status', () => { + it('returns active message when session is active', async () => { + const server = createMockServer(); + const { session } = createMockSession(); + registerSessionTools(server, session); + + const result = await server.call('get_session_status') as any; + + expect(result.content[0].text).toContain('active'); + }); + + it('returns inactive message when no session', async () => { + const server = createMockServer(); + const { session } = createMockSession(); + vi.mocked(session.isActive).mockReturnValue(false); + registerSessionTools(server, session); + + const result = await server.call('get_session_status') as any; + + expect(result.content[0].text).toContain('No active session'); + }); + }); +}); diff --git a/test/mcp/tools/window.test.ts b/test/mcp/tools/window.test.ts new file mode 100644 index 0000000..e7850b4 --- /dev/null +++ b/test/mcp/tools/window.test.ts @@ -0,0 +1,201 @@ +import { describe, it, expect, vi } from 'vitest'; +import { registerWindowTools } from '../../../lib/mcp/tools/window.js'; +import { createMockServer } from '../fixtures/server.js'; +import { createMockSession } from '../fixtures/session.js'; + +describe('window tools', () => { + describe('take_screenshot', () => { + it('calls driver.takeScreenshot() and returns image content', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.takeScreenshot = vi.fn().mockResolvedValue('base64encodedpng'); + registerWindowTools(server, session); + + const result = await server.call('take_screenshot') as any; + + expect(mockBrowser.takeScreenshot).toHaveBeenCalled(); + expect(result.content[0].type).toBe('image'); + expect(result.content[0].data).toBe('base64encodedpng'); + expect(result.content[0].mimeType).toBe('image/png'); + expect(result.isError).toBeUndefined(); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.takeScreenshot = vi.fn().mockRejectedValue(new Error('screenshot failed')); + registerWindowTools(server, session); + + const result = await server.call('take_screenshot') as any; + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('screenshot failed'); + }); + }); + + describe('get_page_source', () => { + it('calls driver.getPageSource() and returns XML string', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.getPageSource = vi.fn().mockResolvedValue(''); + registerWindowTools(server, session); + + const result = await server.call('get_page_source') as any; + + expect(mockBrowser.getPageSource).toHaveBeenCalled(); + expect(result.content[0].text).toContain(' { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.getPageSource = vi.fn().mockRejectedValue(new Error('page source failed')); + registerWindowTools(server, session); + + const result = await server.call('get_page_source') as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('get_window_rect', () => { + it('calls driver.getWindowRect() and returns JSON', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + const rect = { x: 10, y: 20, width: 800, height: 600 }; + mockBrowser.getWindowRect = vi.fn().mockResolvedValue(rect); + registerWindowTools(server, session); + + const result = await server.call('get_window_rect') as any; + + expect(mockBrowser.getWindowRect).toHaveBeenCalled(); + expect(JSON.parse(result.content[0].text)).toEqual(rect); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.getWindowRect = vi.fn().mockRejectedValue(new Error('rect failed')); + registerWindowTools(server, session); + + const result = await server.call('get_window_rect') as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('get_window_handles', () => { + it('calls driver.getWindowHandles() and returns JSON array', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.getWindowHandles = vi.fn().mockResolvedValue(['h1', 'h2', 'h3']); + registerWindowTools(server, session); + + const result = await server.call('get_window_handles') as any; + + expect(mockBrowser.getWindowHandles).toHaveBeenCalled(); + expect(JSON.parse(result.content[0].text)).toEqual(['h1', 'h2', 'h3']); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.getWindowHandles = vi.fn().mockRejectedValue(new Error('handles failed')); + registerWindowTools(server, session); + + const result = await server.call('get_window_handles') as any; + + expect(result.isError).toBe(true); + }); + }); + + describe('switch_to_window', () => { + it('calls driver.switchToWindow() with the handle', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerWindowTools(server, session); + + const result = await server.call('switch_to_window', { handle: 'h2' }) as any; + + expect(mockBrowser.switchToWindow).toHaveBeenCalledWith('h2'); + expect(result.content[0].text).toContain('h2'); + expect(result.isError).toBeUndefined(); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.switchToWindow = vi.fn().mockRejectedValue(new Error('switch failed')); + registerWindowTools(server, session); + + const result = await server.call('switch_to_window', { handle: 'bad' }) as any; + + expect(result.isError).toBe(true); + }); + }); + + const ELEM_ID = 'win-el-1'; + + describe('maximize_window', () => { + it('calls driver.executeScript("windows: maximize") and returns "maximized"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerWindowTools(server, session); + + const result = await server.call('maximize_window', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: maximize', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('maximized'); + }); + }); + + describe('minimize_window', () => { + it('calls driver.executeScript("windows: minimize") and returns "minimized"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerWindowTools(server, session); + + const result = await server.call('minimize_window', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: minimize', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('minimized'); + }); + }); + + describe('restore_window', () => { + it('calls driver.executeScript("windows: restore") and returns "restored"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerWindowTools(server, session); + + const result = await server.call('restore_window', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: restore', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('restored'); + }); + }); + + describe('close_window', () => { + it('calls driver.executeScript("windows: close") and returns "closed"', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + registerWindowTools(server, session); + + const result = await server.call('close_window', { elementId: ELEM_ID }) as any; + + expect(mockBrowser.executeScript).toHaveBeenCalledWith('windows: close', [{ elementId: ELEM_ID }]); + expect(result.content[0].text).toBe('closed'); + }); + + it('returns isError on failure', async () => { + const server = createMockServer(); + const { session, mockBrowser } = createMockSession(); + mockBrowser.executeScript = vi.fn().mockRejectedValue(new Error('close failed')); + registerWindowTools(server, session); + + const result = await server.call('close_window', { elementId: ELEM_ID }) as any; + + expect(result.isError).toBe(true); + }); + }); +}); diff --git a/test/util.test.ts b/test/util.test.ts index 758adef..077eca4 100644 --- a/test/util.test.ts +++ b/test/util.test.ts @@ -2,7 +2,34 @@ * Unit tests for lib/util.ts */ import { describe, it, expect } from 'vitest'; -import { assertSupportedEasingFunction, $ } from '../lib/util'; +import { assertIntegerCap, assertSupportedEasingFunction, $ } from '../lib/util'; + +describe('assertIntegerCap', () => { + it('accepts value equal to min', () => { + expect(() => assertIntegerCap('x', 0, 0)).not.toThrow(); + expect(() => assertIntegerCap('x', 1, 1)).not.toThrow(); + }); + + it('accepts value above min', () => { + expect(() => assertIntegerCap('x', 5, 1)).not.toThrow(); + expect(() => assertIntegerCap('x', 100, 0)).not.toThrow(); + }); + + it('throws for value below min', () => { + expect(() => assertIntegerCap('ms:windowSwitchRetries', 0, 1)).toThrow('must be an integer >= 1'); + expect(() => assertIntegerCap('ms:windowSwitchInterval', -1, 0)).toThrow('must be an integer >= 0'); + }); + + it('throws for floats', () => { + expect(() => assertIntegerCap('x', 1.5, 1)).toThrow('must be an integer'); + expect(() => assertIntegerCap('x', 0.1, 0)).toThrow('must be an integer'); + }); + + it('includes cap name and received value in error message', () => { + expect(() => assertIntegerCap('ms:windowSwitchRetries', -3, 1)).toThrow("'ms:windowSwitchRetries'"); + expect(() => assertIntegerCap('ms:windowSwitchRetries', -3, 1)).toThrow('got -3'); + }); +}); describe('assertSupportedEasingFunction', () => { it.each(['linear', 'ease', 'ease-in', 'ease-out', 'ease-in-out'])(