diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts b/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts index 8dd25323a6..87b719a4f1 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts @@ -1,4 +1,5 @@ import { Octokit } from '@octokit/rest'; +import { RequestError } from '@octokit/request-error'; import moment from 'moment'; import nock from 'nock'; @@ -403,6 +404,121 @@ describe('Scale down runners', () => { expect(mockTerminateRunners).toHaveBeenCalledWith(orphanRunner.instanceId); }); + it('Should handle 404 error when checking orphaned runner (JIT) - treat as orphaned', async () => { + // arrange + const orphanRunner = createRunnerTestData( + 'orphan-jit-404', + type, + MINIMUM_BOOT_TIME + 1, + false, + true, + true, // should be terminated when 404 + undefined, + 1234567890, + ); + const runners = [orphanRunner]; + + mockGitHubRunners([]); + mockAwsRunners(runners); + + // Mock 404 error response + const error404 = new RequestError('Runner not found', 404, { + request: { + method: 'GET', + url: 'https://api.github.com/test', + headers: {}, + }, + }); + + if (type === 'Repo') { + mockOctokit.actions.getSelfHostedRunnerForRepo.mockRejectedValueOnce(error404); + } else { + mockOctokit.actions.getSelfHostedRunnerForOrg.mockRejectedValueOnce(error404); + } + + // act + await scaleDown(); + + // assert - should terminate since 404 means runner doesn't exist on GitHub + expect(mockTerminateRunners).toHaveBeenCalledWith(orphanRunner.instanceId); + }); + + it('Should handle 404 error when checking runner busy state - treat as not busy', async () => { + // arrange + const runner = createRunnerTestData( + 'runner-404', + type, + MINIMUM_TIME_RUNNING_IN_MINUTES + 1, + true, + false, + true, // should be terminated since not busy due to 404 + ); + const runners = [runner]; + + mockGitHubRunners(runners); + mockAwsRunners(runners); + + // Mock 404 error response for busy state check + const error404 = new RequestError('Runner not found', 404, { + request: { + method: 'GET', + url: 'https://api.github.com/test', + headers: {}, + }, + }); + + if (type === 'Repo') { + mockOctokit.actions.getSelfHostedRunnerForRepo.mockRejectedValueOnce(error404); + } else { + mockOctokit.actions.getSelfHostedRunnerForOrg.mockRejectedValueOnce(error404); + } + + // act + await scaleDown(); + + // assert - should terminate since 404 means runner is not busy + checkTerminated(runners); + }); + + it('Should re-throw non-404 errors when checking runner state', async () => { + // arrange + const orphanRunner = createRunnerTestData( + 'orphan-error', + type, + MINIMUM_BOOT_TIME + 1, + false, + true, + false, + undefined, + 1234567890, + ); + const runners = [orphanRunner]; + + mockGitHubRunners([]); + mockAwsRunners(runners); + + // Mock non-404 error response + const error500 = new RequestError('Internal server error', 500, { + request: { + method: 'GET', + url: 'https://api.github.com/test', + headers: {}, + }, + }); + + if (type === 'Repo') { + mockOctokit.actions.getSelfHostedRunnerForRepo.mockRejectedValueOnce(error500); + } else { + mockOctokit.actions.getSelfHostedRunnerForOrg.mockRejectedValueOnce(error500); + } + + // act & assert - should not throw because error handling is in terminateOrphan + await expect(scaleDown()).resolves.not.toThrow(); + + // Should not terminate since the error was not a 404 + expect(terminateRunner).not.toHaveBeenCalledWith(orphanRunner.instanceId); + }); + it(`Should ignore errors when termination orphan fails.`, async () => { // setup const orphanRunner = createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true); diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-down.ts b/lambdas/functions/control-plane/src/scale-runners/scale-down.ts index 8f5cbd42d4..1e5e712a24 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-down.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-down.ts @@ -1,5 +1,6 @@ import { Octokit } from '@octokit/rest'; import { Endpoints } from '@octokit/types'; +import { RequestError } from '@octokit/request-error'; import { createChildLogger } from '@aws-github-runner/aws-powertools-util'; import moment from 'moment'; @@ -55,25 +56,39 @@ async function getGitHubSelfHostedRunnerState( client: Octokit, ec2runner: RunnerInfo, runnerId: number, -): Promise { - const state = - ec2runner.type === 'Org' - ? await client.actions.getSelfHostedRunnerForOrg({ - runner_id: runnerId, - org: ec2runner.owner, - }) - : await client.actions.getSelfHostedRunnerForRepo({ - runner_id: runnerId, - owner: ec2runner.owner.split('/')[0], - repo: ec2runner.owner.split('/')[1], - }); - metricGitHubAppRateLimit(state.headers); - - return state.data; +): Promise { + try { + const state = + ec2runner.type === 'Org' + ? await client.actions.getSelfHostedRunnerForOrg({ + runner_id: runnerId, + org: ec2runner.owner, + }) + : await client.actions.getSelfHostedRunnerForRepo({ + runner_id: runnerId, + owner: ec2runner.owner.split('/')[0], + repo: ec2runner.owner.split('/')[1], + }); + metricGitHubAppRateLimit(state.headers); + + return state.data; + } catch (error) { + if (error instanceof RequestError && error.status === 404) { + logger.info(`Runner '${ec2runner.instanceId}' with GitHub Runner ID '${runnerId}' not found on GitHub (404)`); + return null; + } + throw error; + } } async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo, runnerId: number): Promise { const state = await getGitHubSelfHostedRunnerState(client, ec2runner, runnerId); + if (state === null) { + logger.info( + `Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Not found on GitHub, treating as not busy`, + ); + return false; + } logger.info(`Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Busy: ${state.busy}`); return state.busy; } @@ -227,12 +242,18 @@ async function lastChanceCheckOrphanRunner(runner: RunnerList): Promise const ec2Instance = runner as RunnerInfo; const state = await getGitHubSelfHostedRunnerState(client, ec2Instance, runnerId); let isOrphan = false; - logger.debug( - `Runner '${runner.instanceId}' is '${state.status}' and is currently '${state.busy ? 'busy' : 'idle'}'.`, - ); - const isOfflineAndBusy = state.status === 'offline' && state.busy; - if (isOfflineAndBusy) { + + if (state === null) { + logger.debug(`Runner '${runner.instanceId}' not found on GitHub, treating as orphaned.`); isOrphan = true; + } else { + logger.debug( + `Runner '${runner.instanceId}' is '${state.status}' and is currently '${state.busy ? 'busy' : 'idle'}'.`, + ); + const isOfflineAndBusy = state.status === 'offline' && state.busy; + if (isOfflineAndBusy) { + isOrphan = true; + } } logger.info(`Runner '${runner.instanceId}' is judged to ${isOrphan ? 'be' : 'not be'} orphaned.`); return isOrphan;