Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Octokit } from '@octokit/rest';
import { RequestError } from '@octokit/request-error';
import moment from 'moment';
import nock from 'nock';

Expand Down Expand Up @@ -403,6 +404,121 @@ describe('Scale down runners', () => {
expect(mockTerminateRunners).toHaveBeenCalledWith(orphanRunner.instanceId);
});

it('Should handle 404 error when checking orphaned runner (JIT) - treat as orphaned', async () => {
// arrange
const orphanRunner = createRunnerTestData(
'orphan-jit-404',
type,
MINIMUM_BOOT_TIME + 1,
false,
true,
true, // should be terminated when 404
undefined,
1234567890,
);
const runners = [orphanRunner];

mockGitHubRunners([]);
mockAwsRunners(runners);

// Mock 404 error response
const error404 = new RequestError('Runner not found', 404, {
request: {
method: 'GET',
url: 'https://api.github.com/test',
headers: {},
},
});

if (type === 'Repo') {
mockOctokit.actions.getSelfHostedRunnerForRepo.mockRejectedValueOnce(error404);
} else {
mockOctokit.actions.getSelfHostedRunnerForOrg.mockRejectedValueOnce(error404);
}

// act
await scaleDown();

// assert - should terminate since 404 means runner doesn't exist on GitHub
expect(mockTerminateRunners).toHaveBeenCalledWith(orphanRunner.instanceId);
});

it('Should handle 404 error when checking runner busy state - treat as not busy', async () => {
// arrange
const runner = createRunnerTestData(
'runner-404',
type,
MINIMUM_TIME_RUNNING_IN_MINUTES + 1,
true,
false,
true, // should be terminated since not busy due to 404
);
const runners = [runner];

mockGitHubRunners(runners);
mockAwsRunners(runners);

// Mock 404 error response for busy state check
const error404 = new RequestError('Runner not found', 404, {
request: {
method: 'GET',
url: 'https://api.github.com/test',
headers: {},
},
});

if (type === 'Repo') {
mockOctokit.actions.getSelfHostedRunnerForRepo.mockRejectedValueOnce(error404);
} else {
mockOctokit.actions.getSelfHostedRunnerForOrg.mockRejectedValueOnce(error404);
}

// act
await scaleDown();

// assert - should terminate since 404 means runner is not busy
checkTerminated(runners);
});

it('Should re-throw non-404 errors when checking runner state', async () => {
// arrange
const orphanRunner = createRunnerTestData(
'orphan-error',
type,
MINIMUM_BOOT_TIME + 1,
false,
true,
false,
undefined,
1234567890,
);
const runners = [orphanRunner];

mockGitHubRunners([]);
mockAwsRunners(runners);

// Mock non-404 error response
const error500 = new RequestError('Internal server error', 500, {
request: {
method: 'GET',
url: 'https://api.github.com/test',
headers: {},
},
});

if (type === 'Repo') {
mockOctokit.actions.getSelfHostedRunnerForRepo.mockRejectedValueOnce(error500);
} else {
mockOctokit.actions.getSelfHostedRunnerForOrg.mockRejectedValueOnce(error500);
}

// act & assert - should not throw because error handling is in terminateOrphan
await expect(scaleDown()).resolves.not.toThrow();

// Should not terminate since the error was not a 404
expect(terminateRunner).not.toHaveBeenCalledWith(orphanRunner.instanceId);
});

it(`Should ignore errors when termination orphan fails.`, async () => {
// setup
const orphanRunner = createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true);
Expand Down
61 changes: 41 additions & 20 deletions lambdas/functions/control-plane/src/scale-runners/scale-down.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Octokit } from '@octokit/rest';
import { Endpoints } from '@octokit/types';
import { RequestError } from '@octokit/request-error';
import { createChildLogger } from '@aws-github-runner/aws-powertools-util';
import moment from 'moment';

Expand Down Expand Up @@ -55,25 +56,39 @@ async function getGitHubSelfHostedRunnerState(
client: Octokit,
ec2runner: RunnerInfo,
runnerId: number,
): Promise<RunnerState> {
const state =
ec2runner.type === 'Org'
? await client.actions.getSelfHostedRunnerForOrg({
runner_id: runnerId,
org: ec2runner.owner,
})
: await client.actions.getSelfHostedRunnerForRepo({
runner_id: runnerId,
owner: ec2runner.owner.split('/')[0],
repo: ec2runner.owner.split('/')[1],
});
metricGitHubAppRateLimit(state.headers);

return state.data;
): Promise<RunnerState | null> {
try {
const state =
ec2runner.type === 'Org'
? await client.actions.getSelfHostedRunnerForOrg({
runner_id: runnerId,
org: ec2runner.owner,
})
: await client.actions.getSelfHostedRunnerForRepo({
runner_id: runnerId,
owner: ec2runner.owner.split('/')[0],
repo: ec2runner.owner.split('/')[1],
});
metricGitHubAppRateLimit(state.headers);

return state.data;
} catch (error) {
if (error instanceof RequestError && error.status === 404) {
logger.info(`Runner '${ec2runner.instanceId}' with GitHub Runner ID '${runnerId}' not found on GitHub (404)`);
return null;
}
throw error;
}
}

async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo, runnerId: number): Promise<boolean> {
const state = await getGitHubSelfHostedRunnerState(client, ec2runner, runnerId);
if (state === null) {
logger.info(
`Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Not found on GitHub, treating as not busy`,
);
return false;
}
logger.info(`Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Busy: ${state.busy}`);
return state.busy;
}
Expand Down Expand Up @@ -227,12 +242,18 @@ async function lastChanceCheckOrphanRunner(runner: RunnerList): Promise<boolean>
const ec2Instance = runner as RunnerInfo;
const state = await getGitHubSelfHostedRunnerState(client, ec2Instance, runnerId);
let isOrphan = false;
logger.debug(
`Runner '${runner.instanceId}' is '${state.status}' and is currently '${state.busy ? 'busy' : 'idle'}'.`,
);
const isOfflineAndBusy = state.status === 'offline' && state.busy;
if (isOfflineAndBusy) {

if (state === null) {
logger.debug(`Runner '${runner.instanceId}' not found on GitHub, treating as orphaned.`);
isOrphan = true;
} else {
logger.debug(
`Runner '${runner.instanceId}' is '${state.status}' and is currently '${state.busy ? 'busy' : 'idle'}'.`,
);
const isOfflineAndBusy = state.status === 'offline' && state.busy;
if (isOfflineAndBusy) {
isOrphan = true;
}
}
logger.info(`Runner '${runner.instanceId}' is judged to ${isOrphan ? 'be' : 'not be'} orphaned.`);
return isOrphan;
Expand Down