Skip to content

Commit 342c37d

Browse files
authored
Add new step to automatically verify if the deployed container app started successfully (#909)
1 parent b2a9b9a commit 342c37d

File tree

14 files changed

+370
-7255
lines changed

14 files changed

+370
-7255
lines changed

package-lock.json

Lines changed: 39 additions & 7234 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -858,12 +858,13 @@
858858
"@azure/arm-resources": "^5.2.0",
859859
"@azure/container-registry": "1.0.0-beta.5",
860860
"@azure/core-rest-pipeline": "1.10.3",
861+
"@azure/monitor-query": "^1.2.0",
861862
"@azure/storage-blob": "^12.4.1",
862863
"@fluentui/react-components": "^9.56.2",
863864
"@fluentui/react-icons": "^2.0.265",
864865
"@microsoft/vscode-azext-azureutils": "^3.3.3",
865866
"@microsoft/vscode-azext-github": "^1.0.2",
866-
"@microsoft/vscode-azext-utils": "^3.2.1",
867+
"@microsoft/vscode-azext-utils": "^3.3.0",
867868
"@microsoft/vscode-azureresources-api": "^2.0.2",
868869
"buffer": "^6.0.3",
869870
"dayjs": "^1.11.3",

src/commands/CommandAttributes.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
6+
import { type ActivityAttributes } from "@microsoft/vscode-azext-utils";
7+
8+
export class CommandAttributes {
9+
10+
static readonly DeployContainerAppContainerRegistry: ActivityAttributes = {
11+
description: `Deploys an existing image from a container registry to a target Azure Container App.
12+
The container registry and image must already be available for pulling.
13+
Supports public images from any registry, and both public and private images from Azure Container Registry (ACR).
14+
For private image deployment from other third party registries, we support deployment through the 'vscode-containers' extension
15+
via the command titled "Container Registries: Deploy Image to Azure Container Apps...".`,
16+
troubleshooting: [
17+
`If a container app resource envelope is provided in attributes, do not confuse null secrets as missing container app secrets. This is because secrets are not typically
18+
copied over with the core resource metadata. Any issues with secrets will require inspecting the remote resource directly.`,
19+
]
20+
};
21+
22+
static readonly DeployWorkspaceProjectInternal: ActivityAttributes = {
23+
description: `Takes a workspace project with a Dockerfile and deploys it to an Azure Container App.
24+
Automatically creates any required resources (resource group, managed environment, container registry, container app, log analytics workspace).
25+
Supports single repo and monorepo, with deployment settings saved and reused via local VS Code settings (.vscode/settings.json).
26+
Deployment settings are saved under "containerApps.deploymentConfigurations".
27+
Deployment is agnostic to project runtime and language.`,
28+
troubleshooting: [
29+
`When ACR build errors are present, try to inspect the Dockerfile and ACR build logs.
30+
When an error is related to the Dockerfile, offer to make direct fixes for the user.`,
31+
`If a container app resource envelope is provided in attributes, do not confuse empty secrets as missing container app secrets. This is because secrets are not typically
32+
copied over with the core resource metadata. Any issues with secrets will require inspecting the remote resource directly.`,
33+
],
34+
};
35+
}

src/commands/EXECUTE_PRIORITY.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ When creating or updating resources, execute steps should occupy certain priorit
6969

7070
- QuickStartImageConfigureStep: 610
7171
- ContainerAppCreateStep: 620
72-
- ContainerAppUpdateStep: 650
72+
- ContainerAppUpdateStep: 680
73+
- ContainerAppStartVerificationStep: 690
7374

7475
### 6. Ingress
7576

src/commands/createContainerApp/ContainerAppCreateStep.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55

66
import { KnownActiveRevisionsMode, type ContainerAppsAPIClient, type Ingress } from "@azure/arm-appcontainers";
77
import { LocationListStep } from "@microsoft/vscode-azext-azureutils";
8-
import { AzureWizardExecuteStepWithActivityOutput, nonNullProp, nonNullValueAndProp } from "@microsoft/vscode-azext-utils";
8+
import { AzureWizardExecuteStepWithActivityOutput, nonNullProp, nonNullValueAndProp, type AzureWizardExecuteStep } from "@microsoft/vscode-azext-utils";
99
import { type Progress } from "vscode";
10-
import { containerAppsWebProvider } from "../../constants";
10+
import { containerAppsWebProvider, ImageSource } from "../../constants";
1111
import { ContainerAppItem } from "../../tree/ContainerAppItem";
1212
import { createContainerAppsAPIClient } from "../../utils/azureClients";
1313
import { localize } from "../../utils/localize";
14+
import { ContainerAppStartVerificationStep } from "../image/imageSource/ContainerAppStartVerificationStep";
1415
import { getContainerNameForImage } from "../image/imageSource/containerRegistry/getContainerNameForImage";
1516
import { enabledIngressDefaults } from "../ingress/enableIngress/EnableIngressStep";
1617
import { type ContainerAppCreateContext } from "./ContainerAppCreateContext";
@@ -59,4 +60,12 @@ export class ContainerAppCreateStep<T extends ContainerAppCreateContext> extends
5960
public shouldExecute(context: T): boolean {
6061
return !context.containerApp;
6162
}
63+
64+
public addExecuteSteps(context: T): AzureWizardExecuteStep<T>[] {
65+
if (context.imageSource === ImageSource.QuickstartImage) {
66+
return [];
67+
}
68+
69+
return [new ContainerAppStartVerificationStep()];
70+
}
6271
}

src/commands/deployContainerApp/deployContainerApp.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { getVerifyProvidersStep } from "../../utils/getVerifyProvidersStep";
1414
import { localize } from "../../utils/localize";
1515
import { pickContainerApp } from "../../utils/pickItem/pickContainerApp";
1616
import { OpenConfirmationViewStep } from "../../webviews/OpenConfirmationViewStep";
17+
import { CommandAttributes } from "../CommandAttributes";
1718
import { ContainerAppOverwriteConfirmStep } from "../ContainerAppOverwriteConfirmStep";
1819
import { deployWorkspaceProject } from "../deployWorkspaceProject/deployWorkspaceProject";
1920
import { editContainerCommandName } from "../editContainer/editContainer";
@@ -50,6 +51,7 @@ export async function deployContainerApp(context: IActionContext, node?: Contain
5051
containerApp: item.containerApp,
5152
managedEnvironment: await getManagedEnvironmentFromContainerApp(subscriptionActionContext, item.containerApp),
5253
imageSource,
54+
activityAttributes: CommandAttributes.DeployContainerAppContainerRegistry,
5355
};
5456

5557
if (isAzdExtensionInstalled()) {
@@ -79,6 +81,9 @@ export async function deployContainerApp(context: IActionContext, node?: Contain
7981
await wizard.prompt();
8082
wizardContext.activityTitle = localize('deployContainerAppActivityTitle', 'Deploy image to container app "{0}"', wizardContext.containerApp?.name);
8183
await wizard.execute();
84+
85+
wizardContext.activityAttributes ??= {};
86+
wizardContext.activityAttributes.azureResource = wizardContext.containerApp;
8287
}
8388

8489
async function promptImageSource(context: ISubscriptionActionContext): Promise<ImageSource> {

src/commands/deployWorkspaceProject/internal/deployWorkspaceProjectInternal.ts

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { ext } from "../../../extensionVariables";
1212
import { createActivityContext, prependOrInsertAfterLastInfoChild } from "../../../utils/activityUtils";
1313
import { getVerifyProvidersStep } from "../../../utils/getVerifyProvidersStep";
1414
import { localize } from "../../../utils/localize";
15+
import { CommandAttributes } from "../../CommandAttributes";
1516
import { ContainerAppCreateStep } from "../../createContainerApp/ContainerAppCreateStep";
1617
import { LogAnalyticsCreateStep } from "../../createManagedEnvironment/LogAnalyticsCreateStep";
1718
import { ManagedEnvironmentCreateStep } from "../../createManagedEnvironment/ManagedEnvironmentCreateStep";
@@ -95,18 +96,7 @@ export async function deployWorkspaceProjectInternal(
9596
...context,
9697
...activityContext,
9798
...startingConfiguration,
98-
activityAttributes: {
99-
description: `Takes a workspace project with a Dockerfile and deploys it to an Azure Container App.
100-
Automatically creates any required resources (resource group, managed environment, container registry, container app, log analytics workspace).
101-
Supports single repo and monorepo, with deployment settings saved and reused via local VS Code settings (.vscode/settings.json).
102-
Deployment settings are saved under "containerApps.deploymentConfigurations".
103-
Deployment is agnostic to project runtime and language.`,
104-
troubleshooting: [
105-
`When ACR build errors are present, try to inspect the Dockerfile and ACR build logs.
106-
When an error is related to the Dockerfile, offer to make direct fixes for the user.
107-
If the Dockerfile and project dependencies don't match, focus on updating the Dockerfile to reflect the project, not the other way around.`
108-
],
109-
},
99+
activityAttributes: CommandAttributes.DeployWorkspaceProjectInternal,
110100
};
111101

112102
const promptSteps: AzureWizardPromptStep<DeployWorkspaceProjectInternalContext>[] = [
@@ -271,5 +261,8 @@ export async function deployWorkspaceProjectInternal(
271261

272262
ext.branchDataProvider.refresh();
273263

264+
wizardContext.activityAttributes ??= {};
265+
wizardContext.activityAttributes.azureResource = wizardContext.containerApp;
266+
274267
return wizardContext;
275268
}
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
6+
import { KnownRevisionProvisioningState, KnownRevisionRunningState, type ContainerAppsAPIClient, type Revision } from "@azure/arm-appcontainers";
7+
import { LogsQueryResultStatus, type LogsTable } from "@azure/monitor-query";
8+
import { parseAzureResourceId, uiUtils } from "@microsoft/vscode-azext-azureutils";
9+
import { AzureWizardExecuteStepWithActivityOutput, createSubscriptionContext, maskUserInfo, nonNullValueAndProp, parseError, type IParsedError, type LogActivityAttributes } from "@microsoft/vscode-azext-utils";
10+
import { type Progress } from "vscode";
11+
import { ext } from "../../../extensionVariables";
12+
import { type ContainerAppStartVerificationTelemetryProps } from "../../../telemetry/ContainerAppStartVerificationTelemetryProps";
13+
import { type SetTelemetryProps } from "../../../telemetry/SetTelemetryProps";
14+
import { createContainerAppsAPIClient, createLogsQueryClientPublicCloud } from "../../../utils/azureClients";
15+
import { delayWithExponentialBackoff } from "../../../utils/delay";
16+
import { localize } from "../../../utils/localize";
17+
import { type IngressContext } from "../../ingress/IngressContext";
18+
import { type ImageSourceContext } from "./ImageSourceContext";
19+
20+
type ContainerAppStartVerificationContext = ImageSourceContext & IngressContext & SetTelemetryProps<ContainerAppStartVerificationTelemetryProps>;
21+
22+
/**
23+
* Verifies that the recently deployed container app did not have any startup issues.
24+
*
25+
* Note: Sometimes an image builds and deploys successfully but fails to run.
26+
* This leads to the Azure Container Apps service silently reverting to the last successful revision.
27+
*/
28+
export class ContainerAppStartVerificationStep<T extends ContainerAppStartVerificationContext> extends AzureWizardExecuteStepWithActivityOutput<T> {
29+
public priority: number = 690;
30+
public stepName: string = 'containerAppStartVerificationStep';
31+
32+
private _client: ContainerAppsAPIClient;
33+
34+
protected getOutputLogSuccess = (context: T): string => localize('verifyContainerAppSuccess', 'Verified container app "{0}" deployment started successfully.', context.containerApp?.name);
35+
protected getOutputLogFail = (context: T): string => localize('updateContainerAppFail', 'Failed to verify container app "{0}" deployment started successfully.', context.containerApp?.name);
36+
protected getTreeItemLabel = (): string => localize('verifyContainerAppLabel', 'Verify container app deployment started successfully');
37+
38+
public async execute(context: T, progress: Progress<{ message?: string | undefined; increment?: number | undefined }>): Promise<void> {
39+
progress.report({ message: localize('verifyingContainerApp', 'Verifying container app startup status...') });
40+
const containerAppName: string = nonNullValueAndProp(context.containerApp, 'name');
41+
42+
// Estimated time (n=1): 1s
43+
const revisionId: string | undefined = await this.waitAndGetRevisionId(context, 1000 * 10 /** maxWaitTimeMs */);
44+
if (!revisionId) {
45+
throw new Error(localize('revisionCheckTimeout', 'Status check timed out before retrieving the latest deployed container app revision.'));
46+
}
47+
48+
// Estimated time (n=1): 20s
49+
const revisionStatus: string | undefined = await this.waitAndGetRevisionStatus(context, revisionId, containerAppName, 1000 * 60 /** maxWaitTimeMs */);
50+
51+
const parsedResource = parseAzureResourceId(revisionId);
52+
if (!revisionStatus) {
53+
throw new Error(localize('revisionStatusTimeout', 'Status check timed out for the deployed container app revision "{0}".', parsedResource.resourceName));
54+
} else if (revisionStatus !== KnownRevisionRunningState.Running) {
55+
try {
56+
context.telemetry.properties.targetCloud = context.environment.name;
57+
58+
// Try to query and provide any logs to the LLM before throwing
59+
await this.tryAddLogAttributes(context, parsedResource.resourceName);
60+
context.telemetry.properties.addedContainerAppStartLogs = 'true';
61+
} catch (error) {
62+
const perr: IParsedError = parseError(error);
63+
ext.outputChannel.appendLog(localize('logQueryError', 'Error encountered while trying to verify container app revision logs through log query platform.'));
64+
ext.outputChannel.appendLog(perr.message);
65+
context.telemetry.properties.addedContainerAppStartLogs = 'false';
66+
context.telemetry.properties.getLogsQueryError = maskUserInfo(perr.message, []);
67+
}
68+
69+
throw new Error(localize(
70+
'unexpectedRevisionState',
71+
'The deployed container app revision "{0}" has failed to start. If you are updating an existing container app, the service will try to revert to the previous working revision. Inspect the application logs to check for any known startup issues.',
72+
parsedResource.resourceName,
73+
));
74+
}
75+
}
76+
77+
public shouldExecute(context: T): boolean {
78+
return !!context.containerApp;
79+
}
80+
81+
private async waitAndGetRevisionId(context: T, maxWaitTimeMs: number): Promise<string | undefined> {
82+
this._client ??= await createContainerAppsAPIClient([context, createSubscriptionContext(context.subscription)]);
83+
84+
const resourceGroupName: string = nonNullValueAndProp(context.containerApp, 'resourceGroup');
85+
const containerAppName: string = nonNullValueAndProp(context.containerApp, 'name');
86+
87+
let revision: Revision | undefined;
88+
let revisions: Revision[];
89+
90+
let attempt: number = 1;
91+
const start: number = Date.now();
92+
93+
while (true) {
94+
if ((Date.now() - start) > maxWaitTimeMs) {
95+
break;
96+
}
97+
98+
await delayWithExponentialBackoff(attempt, 1000 /** baseDelayMs */, maxWaitTimeMs);
99+
attempt++;
100+
101+
revisions = await uiUtils.listAllIterator(this._client.containerAppsRevisions.listRevisions(resourceGroupName, containerAppName));
102+
revision = revisions.find(r => r.name === context.containerApp?.latestRevisionName && r.template?.containers?.[context.containersIdx ?? 0].image === context.image);
103+
104+
if (revision) {
105+
return revision.id;
106+
}
107+
}
108+
109+
return undefined;
110+
}
111+
112+
private async waitAndGetRevisionStatus(context: T, revisionId: string, containerAppName: string, maxWaitTimeMs: number): Promise<string | undefined> {
113+
this._client ??= await createContainerAppsAPIClient([context, createSubscriptionContext(context.subscription)]);
114+
const parsedRevision = parseAzureResourceId(revisionId);
115+
116+
let revision: Revision;
117+
let attempt: number = 1;
118+
const start: number = Date.now();
119+
120+
while (true) {
121+
if ((Date.now() - start) > maxWaitTimeMs) {
122+
break;
123+
}
124+
125+
await delayWithExponentialBackoff(attempt, 1000 /** baseDelayMs */, maxWaitTimeMs);
126+
attempt++;
127+
128+
revision = await this._client.containerAppsRevisions.getRevision(parsedRevision.resourceGroup, containerAppName, parsedRevision.resourceName);
129+
130+
if (
131+
revision.provisioningState === KnownRevisionProvisioningState.Deprovisioning ||
132+
revision.provisioningState === KnownRevisionProvisioningState.Provisioning ||
133+
revision.runningState === KnownRevisionRunningState.Processing ||
134+
revision.runningState === 'Activating' // For some reason this isn't listed in the known enum
135+
) {
136+
continue;
137+
}
138+
139+
return revision.runningState;
140+
}
141+
142+
return undefined;
143+
}
144+
145+
/**
146+
* Try to query for any logs associated with the revision and add them to the Copilot activity attributes
147+
*/
148+
private async tryAddLogAttributes(context: T, revisionName: string) {
149+
// Basic validation check since we're including a name directly in the query
150+
if (revisionName.length > 54 || !/^[\w-]+$/.test(revisionName)) {
151+
const invalidName: string = localize('unexpectedRevisionName', 'Internal warning: Encountered an unexpected revision name format "{0}". Skipping log query for the revision status check.', revisionName);
152+
ext.outputChannel.appendLog(invalidName);
153+
throw new Error(invalidName);
154+
}
155+
156+
const workspaceId = context.managedEnvironment.appLogsConfiguration?.logAnalyticsConfiguration?.customerId;
157+
if (!workspaceId) {
158+
return;
159+
}
160+
161+
const logsQueryClient = await createLogsQueryClientPublicCloud(context);
162+
const query = `
163+
ContainerAppConsoleLogs_CL
164+
| where RevisionName_s == "${revisionName}"
165+
| project TimeGenerated, Stream_s, Log_s
166+
| order by TimeGenerated desc
167+
`;
168+
169+
const queryResult = await logsQueryClient.queryWorkspace(workspaceId, query, {
170+
// <= 5 min ago (ISO 8601)
171+
duration: 'PT5M'
172+
});
173+
174+
if (queryResult.status !== LogsQueryResultStatus.Success) {
175+
return;
176+
}
177+
178+
const lines: string[] = [];
179+
const table: LogsTable = queryResult.tables[0];
180+
181+
if (!table.rows.length) {
182+
// Note: Often times we will only be able to find logs when the image source was for `RemoteAcrBuild`
183+
throw new Error(localize('noQueryLogs', 'No query logs were found for revision "{0}".', revisionName));
184+
}
185+
186+
lines.push(table.columnDescriptors.map(c => c.name ?? '{columnName}').join(','));
187+
for (const row of table.rows) {
188+
if (!Array.isArray(row)) {
189+
continue;
190+
}
191+
lines.push(row.map(r => r instanceof Date ? r.toLocaleString() : String(r)).join(' '));
192+
}
193+
194+
const logs: LogActivityAttributes = {
195+
name: 'Container App Console Logs',
196+
description: `Container runtime logs for revision "${revisionName}" (<= 5 min ago). When a container app update was unsuccessful, these should be inspected to help identify the root cause.`,
197+
content: lines.join('\n'),
198+
};
199+
200+
context.activityAttributes ??= {};
201+
context.activityAttributes.logs ??= [];
202+
context.activityAttributes?.logs.push(logs);
203+
}
204+
}

0 commit comments

Comments
 (0)