Conversation
There was a problem hiding this comment.
Pull request overview
This PR refactors the cleanup workflow to reduce overall runtime by avoiding unnecessary initialization work and parallelizing cleanup tasks.
Changes:
- Add
Globe.InitForCleanup/Environment.InitForCleanupto initialize only what cleanup needs (skipping env data lib file generation). - Parallelize rendered manifest cleanup across ArgoCD/env directories and across environment entries.
- Parallelize vendir cache link discovery (
getLinksMap) and run manifest+cache cleanup concurrently from the CLI.
Reviewed changes
Copilot reviewed 3 out of 3 changed files in this pull request and generated 4 comments.
| File | Description |
|---|---|
| internal/myks/globe.go | Adds cleanup-specific init, introduces concurrent rendered-dir cleanup, and parallelizes cache link scanning. |
| internal/myks/environment.go | Adds cleanup-specific environment init that skips env data lib file generation. |
| cmd/cleanup.go | Switches cleanup to use InitForCleanup and runs cache/manifests cleanup concurrently when both modes are enabled. |
| // InitForCleanup discovers and initializes environments for cleanup operations. | ||
| // It is faster than Init because it skips rendering and saving the environment data lib file, | ||
| // which is only required for rendering — not for application discovery. | ||
| func (g *Globe) InitForCleanup(asyncLevel int, envSearchPathToAppMap EnvAppMap) error { | ||
| envAppMap := g.collectEnvironments(g.AddBaseDirToEnvAppMap(envSearchPathToAppMap)) | ||
| log.Debug().Interface("envAppMap", envAppMap).Msg(g.Msg("Environments collected from search paths")) | ||
|
|
||
| return process(asyncLevel, maps.Keys(envAppMap), func(envPath string) error { | ||
| env, ok := g.environments[envPath] | ||
| if !ok { | ||
| return fmt.Errorf("unable to find environment for path: %s", envPath) | ||
| } | ||
| appNames, ok := envAppMap[envPath] | ||
| if !ok { | ||
| return fmt.Errorf("unable to find app names for path: %s", envPath) | ||
| } | ||
| return env.InitForCleanup(appNames) | ||
| }) | ||
| } |
There was a problem hiding this comment.
New behavior is introduced for cleanup performance/concurrency (InitForCleanup, concurrent directory cleanup, parallel getLinksMap scanning), but there are no tests covering these cleanup paths in internal/myks/globe_test.go. Please add unit tests that validate (1) InitForCleanup does not create the rendered env data lib file, and (2) CleanupRenderedManifests/cleanupRenderedDir removes only unknown env/app entries (and behaves correctly with concurrency).
|
|
||
| return nil | ||
| eg := errgroup.Group{} | ||
| for _, envDirEntry := range entries { |
There was a problem hiding this comment.
In cleanupRenderedDir, the goroutine closure captures the loop variable envDirEntry. This will cause all goroutines to see the same (last) entry, so most environment directories will be skipped/handled incorrectly. Create a per-iteration copy (or use the existing process() helper which already avoids capture issues).
| for _, envDirEntry := range entries { | |
| for _, envDirEntry := range entries { | |
| envDirEntry := envDirEntry |
| eg := errgroup.Group{} | ||
| for _, envDirEntry := range entries { | ||
| eg.Go(func() error { | ||
| g.cleanupRenderedEnvDir(root, envDirEntry, legalEnvs, dryRun, getAppNameFunc) | ||
| return nil | ||
| }) | ||
| } | ||
| return eg.Wait() //nolint:wrapcheck // goroutines return nil; no errors to unwrap |
There was a problem hiding this comment.
cleanupRenderedDir starts one goroutine per directory entry without any concurrency limit. In large repos this can create thousands of goroutines and overwhelm the filesystem ("too many open files", heavy disk contention). Consider threading an asyncLevel into CleanupRenderedManifests/cleanupRenderedDir and calling eg.SetLimit(asyncLevel) (or reusing process(asyncLevel, ...) for the per-entry work).
| // Unlike Init, it skips rendering and saving the environment data lib file, | ||
| // which is only needed for rendering — not for application discovery. | ||
| func (e *Environment) InitForCleanup(applicationNames []string) error { | ||
| envDataFiles := e.collectBySubpath(e.cfg.EnvironmentDataFileName) | ||
| envDataYaml, err := e.renderEnvData(envDataFiles) | ||
| if err != nil { | ||
| log.Warn().Err(err).Str("dir", e.Dir).Msg(e.Msg("Unable to render environment data")) | ||
| return fmt.Errorf("rendering environment data for %s: %w", e.Dir, err) | ||
| } | ||
| if err = e.setEnvDataFromYaml(envDataYaml); err != nil { | ||
| log.Warn().Err(err).Str("dir", e.Dir).Msg(e.Msg("Unable to set environment data")) | ||
| return fmt.Errorf("parsing environment data yaml for %s: %w", e.Dir, err) | ||
| } | ||
| if err := e.initApplications(applicationNames); err != nil { | ||
| log.Error().Err(err).Msg(e.Msg("Unable to initialize applications")) | ||
| return fmt.Errorf("initializing applications for %s: %w", e.Dir, err) | ||
| } |
There was a problem hiding this comment.
Environment.InitForCleanup duplicates most of initEnvData's logic (collectBySubpath/renderEnvData/setEnvDataFromYaml) with slightly different error wrapping. This duplication can easily drift over time; consider refactoring initEnvData to accept an option (e.g., "saveLibFile") so both Init and InitForCleanup share the same implementation.
| // Unlike Init, it skips rendering and saving the environment data lib file, | |
| // which is only needed for rendering — not for application discovery. | |
| func (e *Environment) InitForCleanup(applicationNames []string) error { | |
| envDataFiles := e.collectBySubpath(e.cfg.EnvironmentDataFileName) | |
| envDataYaml, err := e.renderEnvData(envDataFiles) | |
| if err != nil { | |
| log.Warn().Err(err).Str("dir", e.Dir).Msg(e.Msg("Unable to render environment data")) | |
| return fmt.Errorf("rendering environment data for %s: %w", e.Dir, err) | |
| } | |
| if err = e.setEnvDataFromYaml(envDataYaml); err != nil { | |
| log.Warn().Err(err).Str("dir", e.Dir).Msg(e.Msg("Unable to set environment data")) | |
| return fmt.Errorf("parsing environment data yaml for %s: %w", e.Dir, err) | |
| } | |
| if err := e.initApplications(applicationNames); err != nil { | |
| log.Error().Err(err).Msg(e.Msg("Unable to initialize applications")) | |
| return fmt.Errorf("initializing applications for %s: %w", e.Dir, err) | |
| } | |
| // It uses the same initialization logic as Init to ensure consistent behavior. | |
| func (e *Environment) InitForCleanup(applicationNames []string) error { | |
| return e.Init(applicationNames) | |
| } | |
| // Cleanup removes rendered outputs for apps that are no longer configured. | |
| func (e *Environment) Cleanup() error { | |
| apps, err := e.renderedApplications() | |
| if err != nil { | |
| return err | |
| } | |
| for _, app := range apps { | |
| if _, exists := e.foundApplications[app]; exists { | |
| continue | |
| } | |
| appPath := filepath.Join(e.g.RootDir, e.g.ServiceDirName, e.Dir, e.g.RenderedDirName, app) | |
| if err := os.RemoveAll(appPath); err != nil { | |
| return fmt.Errorf("removing %s: %w", appPath, err) | |
| } | |
| } |
No description provided.