Skip to content

Commit 99c86b9

Browse files
scbeddbenbp
andauthored
Discover Affected Recordings (#8391)
* adjustments to Assets.Maintenance.Tool to allow a bit more flexibility as to which commits are pulled down * add a script which can retrieve multiple assets tags at a time, then scan them for contents Co-authored-by: Ben Broderick Phillips <ben@benbp.net>
1 parent 25aa714 commit 99c86b9

6 files changed

Lines changed: 241 additions & 30 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,8 +485,10 @@ local.settings.json
485485
# Java tooling
486486
*.iml
487487

488+
# generated by test-proxy related scripts and packages when testing
488489
.assets
489490
.testruns
491+
.results
490492

491493
# oav converter
492494
tools/oav-traffic-converter/input-example/

tools/assets-automation/assets-maintenance-tool/Azure.Sdk.Tools.Assets.MaintenanceTool/Model/RepoConfiguration.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,20 @@ public RepoConfiguration() {
2424
/// almost arbitrarily. Official test-proxy began supported external assets in late November of 2022, so we don't
2525
/// need to go further back then that when examining the SHAs in the language repos. There is no possibility of an
2626
/// assets.json past this date!
27+
///
28+
/// If provided with "latest" argument, only the most recent commit on each considered branch will be included.
2729
/// </summary>
28-
public DateTime ScanStartDate { get; set; } = DateTime.Parse("2022-12-01");
30+
public string ScanStartDate { get; set; } = "2022-12-01";
2931

3032
/// <summary>
3133
/// The set of branches that we will examine. Defaults to just 'main'.
3234
/// </summary>
3335
public List<string> Branches { get; set; } = new List<string> { "main" };
36+
37+
/// <summary>
38+
/// The folder patterns that are used to filter the repo. Functionally, these strings
39+
/// will be combined with **/assets.json while searching for assets. Non-presence indicates
40+
/// the intent to scan the entire repository.
41+
/// </summary>
42+
public List<string> ScanFolders { get; set; } = new List<string>{};
3443
}

tools/assets-automation/assets-maintenance-tool/Azure.Sdk.Tools.Assets.MaintenanceTool/Scan/AssetsScanner.cs

Lines changed: 47 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -85,36 +85,29 @@ private List<AssetsResult> ScanRepo(RepoConfiguration config, AssetsResultSet? p
8585
}
8686

8787
var targetRepoUri = $"https://{authString}github.com/{config.LanguageRepo}.git";
88-
var workingDirectory = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());
88+
var workingDirectory = Path.Combine(WorkingDirectory, config.LanguageRepo.Replace("/", "_"));
8989
var results = new List<AssetsResult>();
9090

91-
try
91+
if (!Directory.Exists(workingDirectory))
9292
{
93-
if (!Directory.Exists(workingDirectory))
94-
{
95-
Directory.CreateDirectory(workingDirectory);
96-
}
93+
Directory.CreateDirectory(workingDirectory);
94+
}
9795

98-
foreach (var branch in config.Branches)
99-
{
100-
var commitsOnBranch = GetBranchCommits(targetRepoUri, branch, config.ScanStartDate, workingDirectory);
101-
var unretrievedCommits = ResolveUnhandledCommits(commitsOnBranch, previousOutput);
96+
foreach (var branch in config.Branches)
97+
{
98+
var commitsOnBranch = GetBranchCommits(targetRepoUri, branch, config.ScanStartDate, workingDirectory);
99+
var unretrievedCommits = ResolveUnhandledCommits(commitsOnBranch, previousOutput);
102100

103-
results.AddRange(GetAssetsResults(config.LanguageRepo, unretrievedCommits, workingDirectory));
101+
results.AddRange(GetAssetsResults(config.LanguageRepo, unretrievedCommits, workingDirectory, config.ScanFolders));
104102

105-
if (previousOutput != null)
103+
if (previousOutput != null)
104+
{
105+
foreach (var commit in commitsOnBranch.Where(commit => !unretrievedCommits.Contains(commit)))
106106
{
107-
foreach (var commit in commitsOnBranch.Where(commit => !unretrievedCommits.Contains(commit)))
108-
{
109-
results.AddRange(previousOutput.ByOriginSHA[commit]);
110-
}
107+
results.AddRange(previousOutput.ByOriginSHA[commit]);
111108
}
112109
}
113110
}
114-
finally
115-
{
116-
CleanupWorkingDirectory(workingDirectory);
117-
}
118111

119112
return results;
120113
}
@@ -123,9 +116,10 @@ private List<AssetsResult> ScanRepo(RepoConfiguration config, AssetsResultSet? p
123116
/// Clones a specific branch, then returns all commit shas newer than our targeted date.
124117
/// </summary>
125118
/// <returns>A list of commits (limited to after a startdate) from the targeted branch.</returns>
126-
private List<string> GetBranchCommits(string uri, string branch, DateTime since, string workingDirectory)
119+
private List<string> GetBranchCommits(string uri, string branch, string since, string workingDirectory)
127120
{
128121
var commitSHAs = new List<string>();
122+
129123
try
130124
{
131125
// if git is already initialized, we just need to checkout a specific branch
@@ -141,7 +135,15 @@ private List<string> GetBranchCommits(string uri, string branch, DateTime since,
141135
Cleanup(workingDirectory);
142136
}
143137

144-
var tagResult = handler.Run($"log --since={since.ToString("yyyy-MM-dd")} --format=format:%H", workingDirectory);
138+
CommandResult tagResult;
139+
if (since == "latest")
140+
{
141+
tagResult = handler.Run($"log -n 1 --format=format:%H", workingDirectory);
142+
}
143+
else
144+
{
145+
tagResult = handler.Run($"log --since={since} --format=format:%H", workingDirectory);
146+
}
145147
commitSHAs.AddRange(tagResult.StdOut.Split(Environment.NewLine).Select(x => x.Trim()).Where(x => !string.IsNullOrWhiteSpace(x)));
146148
}
147149
catch (GitProcessException gitException)
@@ -207,11 +209,23 @@ public Assets()
207209
/// Find all assets.jsons beneath a targeted folder.
208210
/// </summary>
209211
/// <returns>AssetsResults for each discovered assets.json, populating other metadata as necessary.</returns>
210-
private List<AssetsResult> ScanDirectory(string repo, string commit, string workingDirectory)
212+
private List<AssetsResult> ScanDirectory(string repo, string commit, string workingDirectory, List<string> scanFolders)
211213
{
212214
Matcher matcher = new();
213215
List<AssetsResult> locatedAssets = new List<AssetsResult>();
214-
matcher.AddIncludePatterns(new[] { "**/assets.json" });
216+
217+
if (scanFolders.Count > 0)
218+
{
219+
foreach (string folder in scanFolders)
220+
{
221+
matcher.AddIncludePatterns(new[] { Path.Combine(folder, "**/assets.json") });
222+
}
223+
}
224+
else
225+
{
226+
matcher.AddIncludePatterns(new[] { "**/assets.json" });
227+
}
228+
215229
IEnumerable<string> assetsJsons = matcher.GetResultsInFullPath(workingDirectory);
216230

217231
foreach (var assetsJson in assetsJsons)
@@ -233,14 +247,14 @@ private List<AssetsResult> ScanDirectory(string repo, string commit, string work
233247
/// Walks a set of targeted commits, extracting all available assets.jsons from each.
234248
/// </summary>
235249
/// <returns>A list of AssetsResults reflecting all discovered assets.jsons from each targeted commit.</returns>
236-
private List<AssetsResult> GetAssetsResults(string repo, List<string> commits, string workingDirectory)
250+
private List<AssetsResult> GetAssetsResults(string repo, List<string> commits, string workingDirectory, List<string> folderGlobs)
237251
{
238252
var allResults = new List<AssetsResult>();
239253
foreach (var commit in commits)
240254
{
241255
handler.Run($"checkout {commit}", workingDirectory);
242256
Cleanup(workingDirectory);
243-
allResults.AddRange(ScanDirectory(repo, commit, workingDirectory));
257+
allResults.AddRange(ScanDirectory(repo, commit, workingDirectory, folderGlobs));
244258
}
245259

246260
return allResults;
@@ -275,7 +289,7 @@ private void Cleanup(string workingDirectory)
275289
/// This is necessary because certain `.pack` files created by git cannot be deleted without
276290
/// adjusting these permissions.
277291
/// </summary>
278-
private void SetPermissionsAndDelete(string gitfolder)
292+
public static void SetPermissionsAndDelete(string gitfolder)
279293
{
280294
File.SetAttributes(gitfolder, FileAttributes.Normal);
281295

@@ -300,7 +314,7 @@ private void SetPermissionsAndDelete(string gitfolder)
300314
/// The .git folder's .pack files can be super finicky to delete from code.
301315
/// This function abstracts the necessary permissions update and cleans that folder for us.
302316
/// </summary>
303-
private void CleanupWorkingDirectory(string workingDirectory)
317+
public static void CleanupGitDirectory(string workingDirectory)
304318
{
305319
var gitDir = Path.Combine(workingDirectory, ".git");
306320

@@ -319,7 +333,11 @@ public void Save(AssetsResultSet newResults)
319333
{
320334
using (var stream = System.IO.File.OpenWrite(ResultsFile))
321335
{
322-
stream.Write(Encoding.UTF8.GetBytes(JsonSerializer.Serialize(newResults.Results)));
336+
var options = new JsonSerializerOptions
337+
{
338+
WriteIndented = true
339+
};
340+
stream.Write(Encoding.UTF8.GetBytes(JsonSerializer.Serialize(newResults.Results, options: options)));
323341
}
324342
}
325343
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
<#
2+
.SYNOPSIS
3+
Used to retrieve a list of references to test recordings that that "contain" a string in their contents.
4+
5+
.DESCRIPTION
6+
Uses the Azure.Sdk.Tools.Assets.MaintenanceTool to retrieve a copy of all the assets files, then grep each tag's contents
7+
for the specified string. Intermediary results are stored in the .results folder, and the final ressults will be output to the
8+
console on successful completion of the script.
9+
10+
This will enable easy access to the test recordings that are affected by a change in the SDK, and will help to identify
11+
which tests need to be updated.
12+
13+
PreReqs:
14+
- Azure.Sdk.Tools.Assets.MaintenanceTool must be available on the PATH
15+
- git is available on the PATH
16+
- Running powershell core
17+
- ripgrip installed on the machine. 'choco install ripgrep' on elevated terminal
18+
19+
.PARAMETER ConfigFilePath
20+
The query configuration file, which contains targeted repos, branches, and paths.
21+
22+
.PARAMETER SearchString
23+
The regex string to search for in the contents of the test recordings.
24+
#>
25+
param(
26+
[Parameter(Mandatory = $true)]
27+
[string]$ConfigFilePath,
28+
[Parameter(Mandatory = $true)]
29+
[string]$SearchString
30+
)
31+
32+
Set-StrictMode -Version 4
33+
$ErrorActionPreference = "Stop"
34+
. (Join-Path $PSScriptRoot ".." "utilities.ps1")
35+
36+
if (!(Test-Path $ConfigFilePath -PathType Leaf)) {
37+
Write-Error "Config file not found: $ConfigFilePath"
38+
exit 1
39+
}
40+
41+
$ResultsFolder = Create-If-Not-Exists (Join-Path $PSScriptRoot ".results")
42+
$TagsFolder = Create-If-Not-Exists (Join-Path $ResultsFolder "tags")
43+
$ScanOutputJson = Join-Path $ResultsFolder "output.json"
44+
45+
try {
46+
Push-Location $ResultsFolder
47+
48+
if (!(Test-Path $ScanOutputJson)) {
49+
Azure.Sdk.Tools.Assets.MaintenanceTool scan --config $ConfigFilePath
50+
if ($LASTEXITCODE) { exit $LASTEXITCODE }
51+
}
52+
else {
53+
Write-Host "Skipping scan, using cached results"
54+
}
55+
}
56+
finally {
57+
Pop-Location
58+
}
59+
60+
$DiscoveredAssets = Get-Content $ScanOutputJson | ConvertFrom-Json
61+
62+
foreach ($asset in $DiscoveredAssets) {
63+
$TagFolder = Get-AssetsRepoSlice -Tag $asset.Tag -WorkDirectory $ResultsFolder
64+
}
65+
66+
Write-Host "rg $SearchString -g `"*.json`" $TagsFolder"
67+
rg $SearchString -g "*.json" $TagsFolder
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"LanguageRepos": [
3+
{
4+
"LanguageRepo": "Azure/azure-sdk-for-java",
5+
"ScanStartDate": "latest",
6+
"ScanFolders": [
7+
"sdk/containerregistry"
8+
]
9+
},
10+
{
11+
"LanguageRepo": "Azure/azure-sdk-for-python",
12+
"ScanStartDate": "latest",
13+
"ScanFolders": [
14+
"sdk/containerregistry"
15+
]
16+
},
17+
{
18+
"LanguageRepo": "Azure/azure-sdk-for-go",
19+
"ScanStartDate": "latest",
20+
"ScanFolders": [
21+
"sdk/containers"
22+
]
23+
},
24+
{
25+
"LanguageRepo": "Azure/azure-sdk-for-net",
26+
"ScanStartDate": "latest",
27+
"ScanFolders": [
28+
"sdk/containerregistry"
29+
]
30+
},
31+
{
32+
"LanguageRepo": "Azure/azure-sdk-for-js",
33+
"ScanStartDate": "latest",
34+
"ScanFolders": [
35+
"sdk/containerregistry"
36+
]
37+
}
38+
]
39+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
Set-StrictMode -Version 4
2+
3+
function Create-If-Not-Exists {
4+
param(
5+
[string]$Path
6+
)
7+
8+
if (!(Test-Path $Path)) {
9+
New-Item -ItemType Directory -Path $Path -Force | Out-Null
10+
}
11+
12+
return $Path
13+
}
14+
15+
<#
16+
.SYNOPSIS
17+
Retrieve a specific tag from an assets repo and store it on disk within the work directory.
18+
19+
.DESCRIPTION
20+
Clones a specific tag from an assets repo (defaults to azure-sdk-assets) and stores it on disk
21+
within the work directory under a folder with pattern:
22+
23+
.results <-- this should be WorkDirectory arg
24+
tags/
25+
<tagname>/
26+
<tagged repo contents>
27+
<tagname>/
28+
<tagged repo contents>
29+
<tagname>/
30+
<tagged repo contents>
31+
...
32+
33+
Returns the location of the folder after the work is complete.
34+
35+
.PARAMETER Tag
36+
The tag to retrieve from the assets repo.
37+
38+
.PARAMETER WorkDirectory
39+
The path to the .results directory within which this script will operate.
40+
41+
.PARAMETER TargetRepo
42+
Defaults to "Azure/azure-sdk-assets". This is the repo that will be cloned from.
43+
#>
44+
function Get-AssetsRepoSlice {
45+
param(
46+
[Parameter(Mandatory=$true)]
47+
[string]$Tag,
48+
[Parameter(Mandatory=$true)]
49+
[string]$WorkDirectory,
50+
[string]$TargetRepo = "Azure/azure-sdk-assets"
51+
)
52+
$CloneUri = "https://github.com/$TargetRepo.git"
53+
$TagFolderName = Join-Path $WorkDirectory "tags" $Tag.Replace("/", "-")
54+
55+
$TagFolder = Create-If-Not-Exists -Path $TagFolderName
56+
57+
Write-Host "TagFolder is $TagFolder"
58+
59+
if (Test-Path $TagFolder/.git) {
60+
Write-Host "TagFolder already exists, skipping clone, returning $TagFolder"
61+
return $TagFolder
62+
}
63+
else {
64+
try {
65+
Push-Location $TagFolder
66+
git clone -c core.longpaths=true --no-checkout --filter=tree:0 $CloneUri .
67+
git fetch origin "refs/tags/$($Tag):refs/tags/$Tag"
68+
git checkout $Tag
69+
}
70+
finally {
71+
Pop-Location
72+
}
73+
74+
return $TagFolder
75+
}
76+
}

0 commit comments

Comments
 (0)