Skip to content

Commit 42a6ce3

Browse files
authored
improve performance by only looking up git information once (#191)
* performance: cache git informatoin when possible in headers command - only find the repository's first commit date once instead of on every file read - parallelize checking files on startup by moving file.isDir() check to worker thread - headers: only calculate repository root once instead of on every file * improve perf by using a cached git index * rename method to buildRepositoryCache * fix lint errors * only stat the file if it isn't in ignore patterns * run gofmt * remove stray comments * remove stray comment * revert filtering logic back to main thread * prefix year in git log to disambiguate between files and dates * restore API compatibility for library consumers
1 parent 3fa4dc0 commit 42a6ce3

2 files changed

Lines changed: 135 additions & 38 deletions

File tree

cmd/headers.go

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ config, see the "copywrite init" command.`,
9898
".github/dependabot.yml",
9999
"**/node_modules/**",
100100
".copywrite.hcl",
101+
".git/**/*.pack",
101102
}
102103
ignoredPatterns := lo.Union(conf.Project.HeaderIgnore, autoSkippedPatterns)
103104

@@ -181,12 +182,18 @@ func init() {
181182
// updateExistingHeaders walks through files and updates copyright headers based on config and git history
182183
// Returns the count of updated files, a boolean indicating if any file was updated, and the LICENSE file path (if found)
183184
func updateExistingHeaders(cmd *cobra.Command, ignoredPatterns []string, dryRun bool) (int, bool, string) {
185+
_ = licensecheck.InitializeGitCache(".")
184186
targetHolder := conf.Project.CopyrightHolder
185187
if targetHolder == "" {
186188
targetHolder = "IBM Corp."
187189
}
188190

189191
configYear := conf.Project.CopyrightYear
192+
repoFirstYear, _ := licensecheck.GetRepoFirstCommitYear(".")
193+
194+
// Open git repository once for all file operations
195+
repoRoot, _ := licensecheck.GetRepoRoot(".")
196+
190197
updatedCount := 0
191198
anyFileUpdated := false
192199
var licensePath string
@@ -211,6 +218,7 @@ func updateExistingHeaders(cmd *cobra.Command, ignoredPatterns []string, dryRun
211218
for i := 0; i < workers; i++ {
212219
go func() {
213220
defer wg.Done()
221+
214222
for path := range ch {
215223
// capture base and skip LICENSE files here as well
216224
base := filepath.Base(path)
@@ -224,14 +232,14 @@ func updateExistingHeaders(cmd *cobra.Command, ignoredPatterns []string, dryRun
224232
}
225233

226234
if !dryRun {
227-
updated, err := licensecheck.UpdateCopyrightHeader(path, targetHolder, configYear, false)
235+
updated, err := licensecheck.UpdateCopyrightHeaderWithCache(path, targetHolder, configYear, false, repoFirstYear, repoRoot)
228236
if err == nil && updated {
229237
cmd.Printf(" %s\n", path)
230238
atomic.AddInt64(&updatedCount64, 1)
231239
atomic.StoreInt32(&anyFileUpdatedFlag, 1)
232240
}
233241
} else {
234-
needsUpdate, err := licensecheck.NeedsUpdate(path, targetHolder, configYear, false)
242+
needsUpdate, err := licensecheck.NeedsUpdateWithCache(path, targetHolder, configYear, false, repoFirstYear, repoRoot)
235243
if err == nil && needsUpdate {
236244
cmd.Printf(" %s\n", path)
237245
atomic.AddInt64(&updatedCount64, 1)
@@ -244,13 +252,13 @@ func updateExistingHeaders(cmd *cobra.Command, ignoredPatterns []string, dryRun
244252

245253
// Producer: walk the tree and push files onto the channel
246254
go func() {
247-
_ = filepath.Walk(".", func(path string, info os.FileInfo, err error) error {
248-
if err != nil || info.IsDir() {
255+
_ = filepath.WalkDir(".", func(path string, d os.DirEntry, err error) error {
256+
// Check if file should be ignored
257+
if addlicense.FileMatches(path, ignoredPatterns) {
249258
return nil
250259
}
251260

252-
// Check if file should be ignored
253-
if addlicense.FileMatches(path, ignoredPatterns) {
261+
if err != nil || d.IsDir() {
254262
return nil
255263
}
256264

@@ -284,16 +292,20 @@ func updateLicenseFile(cmd *cobra.Command, licensePath string, anyFileUpdated bo
284292
targetHolder = "IBM Corp."
285293
}
286294

295+
repoFirstYear, _ := licensecheck.GetRepoFirstCommitYear(".")
287296
configYear := conf.Project.CopyrightYear
288297

298+
// Open git repository for LICENSE file operations
299+
repoRoot, _ := licensecheck.GetRepoRoot(".")
300+
289301
// Update LICENSE file, forcing current year if any file was updated
290302
if !dryRun {
291-
updated, err := licensecheck.UpdateCopyrightHeader(licensePath, targetHolder, configYear, anyFileUpdated)
303+
updated, err := licensecheck.UpdateCopyrightHeaderWithCache(licensePath, targetHolder, configYear, anyFileUpdated, repoFirstYear, repoRoot)
292304
if err == nil && updated {
293305
cmd.Printf("\nUpdated LICENSE file: %s\n", licensePath)
294306
}
295307
} else {
296-
needsUpdate, err := licensecheck.NeedsUpdate(licensePath, targetHolder, configYear, anyFileUpdated)
308+
needsUpdate, err := licensecheck.NeedsUpdateWithCache(licensePath, targetHolder, configYear, anyFileUpdated, repoFirstYear, repoRoot)
297309
if err == nil && needsUpdate {
298310
cmd.Printf("\n[DRY RUN] Would update LICENSE file: %s\n", licensePath)
299311
}

licensecheck/update.go

Lines changed: 115 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"regexp"
1313
"strconv"
1414
"strings"
15+
"sync"
1516
"time"
1617
)
1718

@@ -72,6 +73,8 @@ func extractAllCopyrightInfo(filePath string) ([]*CopyrightInfo, error) {
7273
return copyrights, scanner.Err()
7374
}
7475

76+
var contentStartsWithCopyright = regexp.MustCompile(`(?i)^copyright\b`)
77+
7578
// parseCopyrightLine extracts copyright details from a line
7679
// parseCopyrightLine extracts copyright details from a line
7780
// inHbsCommentBlock indicates if we're inside a {{! ... }} block (for .hbs files)
@@ -130,7 +133,7 @@ func parseCopyrightLine(line string, lineNum int, filePath string, inHbsCommentB
130133
// Validate content starts with "Copyright"
131134
// Normalize content for the check
132135
content = strings.TrimSpace(content)
133-
if !regexp.MustCompile(`(?i)^copyright\b`).MatchString(content) {
136+
if !contentStartsWithCopyright.MatchString(content) {
134137
return nil
135138
}
136139

@@ -394,8 +397,8 @@ func calculateYearUpdates(
394397
return shouldUpdate, newStartYear, newEndYear
395398
}
396399

397-
// getRepoRoot finds the git repository root from a given directory
398-
func getRepoRoot(workingDir string) (string, error) {
400+
// GetRepoRoot finds the git repository root from a given directory
401+
func GetRepoRoot(workingDir string) (string, error) {
399402
repoRootOutput, err := executeGitCommand(
400403
workingDir,
401404
"rev-parse", "--show-toplevel",
@@ -406,15 +409,77 @@ func getRepoRoot(workingDir string) (string, error) {
406409
return strings.TrimSpace(string(repoRootOutput)), nil
407410
}
408411

409-
// getFileLastCommitYear returns the year of the last commit that modified a file
410-
func getFileLastCommitYear(filePath string) (int, error) {
411-
absPath, err := filepath.Abs(filePath)
412+
// Returns:
413+
// - A map of file paths to their last commit years for all files in the repository
414+
// - The year of the first commit in the repository (or 0 if not found)
415+
// - An error if the git command fails
416+
func buildRepositoryCache(repoRoot string) (map[string]int, int, error) {
417+
cmd := exec.Command("git", "log", "--format=format:%ad", "--date=format:%Y", "--name-only")
418+
cmd.Dir = repoRoot
419+
output, err := cmd.Output()
412420
if err != nil {
413-
return 0, err
421+
return nil, 0, err
422+
}
423+
424+
result := make(map[string]int)
425+
var currentYear int
426+
firstYear := 0
427+
428+
for _, line := range strings.Split(string(output), "\n") {
429+
line = strings.TrimSpace(line)
430+
if line == "" {
431+
continue
432+
}
433+
434+
if strings.HasPrefix(line, "__CW_YEAR__=") {
435+
line = strings.TrimPrefix(line, "__CW_YEAR__=")
436+
// If it's a 4-digit year
437+
if year, err := strconv.Atoi(line); err == nil && year > 1900 && year < 2100 {
438+
currentYear = year
439+
if year < firstYear || firstYear == 0 {
440+
firstYear = year
441+
}
442+
}
443+
}
444+
if currentYear > 0 {
445+
// It's a filename - only store first occurrence (most recent)
446+
if _, exists := result[line]; !exists {
447+
result[line] = currentYear
448+
}
449+
}
414450
}
451+
return result, firstYear, nil
452+
}
415453

416-
// Find repository root
417-
repoRoot, err := getRepoRoot(filepath.Dir(absPath))
454+
var (
455+
lastCommitYearsCache map[string]int
456+
firstCommitYearCached = 0
457+
once sync.Once
458+
)
459+
460+
func InitializeGitCache(repoRoot string) error {
461+
once.Do(func() {
462+
cache, firstYear, err := buildRepositoryCache(repoRoot)
463+
if err != nil {
464+
lastCommitYearsCache = make(map[string]int)
465+
} else {
466+
lastCommitYearsCache = cache
467+
firstCommitYearCached = firstYear
468+
}
469+
})
470+
return nil
471+
}
472+
473+
func getCachedFileLastCommitYear(filePath string, repoRoot string) (int, error) {
474+
if year, exists := lastCommitYearsCache[filePath]; exists {
475+
return year, nil
476+
}
477+
return 0, fmt.Errorf("file not found in git cache")
478+
}
479+
480+
// getFileLastCommitYear returns the year of the last commit that modified a file
481+
func getFileLastCommitYear(filePath string, repoRoot string) (int, error) {
482+
absPath, err := filepath.Abs(filePath)
418483
if err != nil {
419484
return 0, err
420485
}
@@ -425,43 +490,36 @@ func getFileLastCommitYear(filePath string) (int, error) {
425490
return 0, fmt.Errorf("failed to calculate relative path: %w", err)
426491
}
427492

428-
// Run git log from repo root with relative path
429-
output, err := executeGitCommand(
430-
repoRoot,
431-
"log", "-1", "--format=%ad", "--date=format:%Y", "--", relPath,
432-
)
433-
if err != nil {
434-
return 0, err
493+
if cachedYear, err := getCachedFileLastCommitYear(relPath, repoRoot); err == nil {
494+
return cachedYear, nil
495+
} else {
496+
return 0, nil
435497
}
436498

437-
return parseYearFromGitOutput(output, false)
438499
}
439500

440501
// GetRepoFirstCommitYear returns the year of the first commit in the repository
441502
func GetRepoFirstCommitYear(workingDir string) (int, error) {
442503
// Find repository root for consistency
443-
repoRoot, err := getRepoRoot(workingDir)
504+
repoRoot, err := GetRepoRoot(workingDir)
444505
if err != nil {
445506
return 0, err
446507
}
447508

448-
output, err := executeGitCommand(repoRoot, "log", "--reverse", "--format=%ad", "--date=format:%Y")
449-
if err != nil {
450-
return 0, err
451-
}
509+
_ = InitializeGitCache(repoRoot)
452510

453-
return parseYearFromGitOutput(output, true)
511+
return firstCommitYearCached, nil
454512
}
455513

456514
// GetRepoLastCommitYear returns the year of the last commit in the repository
457515
func GetRepoLastCommitYear(workingDir string) (int, error) {
458516
// Find repository root for consistency
459-
repoRoot, err := getRepoRoot(workingDir)
517+
repoRoot, err := GetRepoRoot(workingDir)
460518
if err != nil {
461519
return 0, err
462520
}
463521

464-
output, err := executeGitCommand(repoRoot, "log", "-1", "--format=%ad", "--date=format:%Y")
522+
output, err := executeGitCommand(repoRoot, "log", "-1", "--format=__CW_YEAR__=%ad", "--date=format:%Y")
465523
if err != nil {
466524
return 0, err
467525
}
@@ -527,6 +585,16 @@ func evaluateCopyrightUpdates(
527585
// If forceCurrentYear is true, forces end year to current year regardless of git history
528586
// Returns true if the file was modified
529587
func UpdateCopyrightHeader(filePath string, targetHolder string, configYear int, forceCurrentYear bool) (bool, error) {
588+
repoRoot, _ := GetRepoRoot(filepath.Dir(filePath))
589+
repoFirstYear, _ := GetRepoFirstCommitYear(filepath.Dir(filePath))
590+
return UpdateCopyrightHeaderWithCache(filePath, targetHolder, configYear, forceCurrentYear, repoFirstYear, repoRoot)
591+
}
592+
593+
// UpdateCopyrightHeaderWithCache updates all copyright headers in a file if needed
594+
// If forceCurrentYear is true, forces end year to current year regardless of git history
595+
// repoFirstYear and repoRoot can be provided to avoid repeated git lookups when processing multiple files
596+
// Returns true if the file was modified
597+
func UpdateCopyrightHeaderWithCache(filePath string, targetHolder string, configYear int, forceCurrentYear bool, repoFirstYear int, repoRoot string) (bool, error) {
530598
// Skip .copywrite.hcl config file
531599
if filepath.Base(filePath) == ".copywrite.hcl" {
532600
return false, nil
@@ -555,8 +623,13 @@ func UpdateCopyrightHeader(filePath string, targetHolder string, configYear int,
555623
}
556624

557625
currentYear := time.Now().Year()
558-
lastCommitYear, _ := getFileLastCommitYear(filePath)
559-
repoFirstYear, _ := GetRepoFirstCommitYear(filepath.Dir(filePath))
626+
627+
// Try to get the last commit year from git if repo is available
628+
lastCommitYear := 0
629+
if repoRoot != "" {
630+
lastCommitYear, _ = getFileLastCommitYear(filePath, repoRoot)
631+
}
632+
// repoFirstYear, _ := GetRepoFirstCommitYear(filepath.Dir(filePath))
560633

561634
// Evaluate which copyrights need updating
562635
updates := evaluateCopyrightUpdates(
@@ -615,6 +688,16 @@ func UpdateCopyrightHeader(filePath string, targetHolder string, configYear int,
615688
// If forceCurrentYear is true, forces end year to current year regardless of git history
616689
// Returns true if the file has copyrights matching targetHolder that need year updates
617690
func NeedsUpdate(filePath string, targetHolder string, configYear int, forceCurrentYear bool) (bool, error) {
691+
repoRoot, _ := GetRepoRoot(filepath.Dir(filePath))
692+
repoFirstYear, _ := GetRepoFirstCommitYear(filepath.Dir(filePath))
693+
return NeedsUpdateWithCache(filePath, targetHolder, configYear, forceCurrentYear, repoFirstYear, repoRoot)
694+
}
695+
696+
// NeedsUpdateWithCache checks if a file would be updated without actually modifying it
697+
// If forceCurrentYear is true, forces end year to current year regardless of git history
698+
// repoFirstYear and repoRoot can be provided to avoid repeated git lookups when processing multiple files
699+
// Returns true if the file has copyrights matching targetHolder that need year updates
700+
func NeedsUpdateWithCache(filePath string, targetHolder string, configYear int, forceCurrentYear bool, repoFirstCommitYear int, repoRoot string) (bool, error) {
618701
// Skip .copywrite.hcl config file
619702
if filepath.Base(filePath) == ".copywrite.hcl" {
620703
return false, nil
@@ -642,12 +725,14 @@ func NeedsUpdate(filePath string, targetHolder string, configYear int, forceCurr
642725
}
643726

644727
currentYear := time.Now().Year()
645-
lastCommitYear, _ := getFileLastCommitYear(filePath)
646-
repoFirstYear, _ := GetRepoFirstCommitYear(filepath.Dir(filePath))
728+
lastCommitYear := 0
729+
if !forceCurrentYear {
730+
lastCommitYear, _ = getFileLastCommitYear(filePath, repoRoot)
731+
}
647732

648733
// Evaluate which copyrights need updating
649734
updates := evaluateCopyrightUpdates(
650-
copyrights, targetHolder, configYear, lastCommitYear, currentYear, forceCurrentYear, repoFirstYear,
735+
copyrights, targetHolder, configYear, lastCommitYear, currentYear, forceCurrentYear, repoFirstCommitYear,
651736
)
652737

653738
return len(updates) > 0, nil

0 commit comments

Comments
 (0)