Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions internal/licenses/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@ package licenses

import (
"context"
"errors"
)

type licenseScannerKey struct{}

var ctxKey = licenseScannerKey{}

var ErrNoLicenseScanner = errors.New("no license scanner set in context")

func SetContextLicenseScanner(ctx context.Context, s Scanner) context.Context {
return context.WithValue(ctx, ctxKey, s)
}
Expand All @@ -18,8 +21,9 @@ func IsContextLicenseScannerSet(ctx context.Context) bool {
}

func ContextLicenseScanner(ctx context.Context) (Scanner, error) {
if s, ok := ctx.Value(ctxKey).(Scanner); ok {
return s, nil
s, ok := ctx.Value(ctxKey).(Scanner)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is still an unnecessary change here. There is a scanner created before cataloging happens, and that scanner is passed in the context to all users.

if !ok {
return nil, ErrNoLicenseScanner
}
return NewDefaultScanner()
return s, nil
}
11 changes: 5 additions & 6 deletions internal/licenses/context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,15 @@ func TestContextLicenseScanner(t *testing.T) {
scanner := testScanner()
ctx := SetContextLicenseScanner(context.Background(), scanner)
s, err := ContextLicenseScanner(ctx)
if err != nil || s != scanner {
t.Fatal("expected scanner from context")
}
require.NoError(t, err)
require.Equal(t, scanner, s)
})

t.Run("without scanner", func(t *testing.T) {
ctx := context.Background()
s, err := ContextLicenseScanner(ctx)
if err != nil || s == nil {
t.Fatal("expected default scanner")
}
require.Error(t, err)
require.ErrorIs(t, err, ErrNoLicenseScanner)
require.Nil(t, s)
})
}
33 changes: 22 additions & 11 deletions syft/pkg/cataloger/internal/dependency/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@ package dependency
import (
"sort"

"github.com/scylladb/go-set/strset"

"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

// pairKey is a struct used to track seen relationship pairs without string concatenation
type pairKey struct {
from, to artifact.ID
}

// Specification holds strings that indicate abstract resources that a package provides for other packages and
// requires for itself. These strings can represent anything from file paths, package names, or any other concept
// that is useful for dependency resolution within that packing ecosystem.
Expand Down Expand Up @@ -64,15 +67,15 @@ func Resolve(specifier Specifier, pkgs []pkg.Package) (relationships []artifact.
specsByPkg[id] = allProvides(pkgsProvidingResource, id, specifier(p))
}

seen := strset.New()
seen := make(map[pairKey]struct{})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a good change -- we should avoid creating temporary strings like this as much as possible, but using the map[]struct{} is always cumbersome. This is a change we could accept, though I hope at some point we add a generic set. We have a library to share across our projects for this purpose, and added a suggestion here: anchore/go-collections#5

for _, dependantPkg := range pkgs {
specs := specsByPkg[dependantPkg.ID()]
for _, spec := range specs {
for _, resource := range deduplicate(spec.Requires) {
for providingPkgID := range pkgsProvidingResource[resource] {
// prevent creating duplicate relationships
pairKey := string(providingPkgID) + "-" + string(dependantPkg.ID())
if seen.Has(pairKey) {
// prevent creating duplicate relationships using struct key instead of string concatenation
key := pairKey{from: providingPkgID, to: dependantPkg.ID()}
if _, exists := seen[key]; exists {
continue
}

Expand All @@ -86,7 +89,7 @@ func Resolve(specifier Specifier, pkgs []pkg.Package) (relationships []artifact.
},
)

seen.Add(pairKey)
seen[key] = struct{}{}
}
}
}
Expand All @@ -112,8 +115,16 @@ func allProvides(pkgsProvidingResource map[string]internal.Set[artifact.ID], id

func deduplicate(ss []string) []string {
// note: we sort the set such that multiple invocations of this function will be deterministic
set := strset.New(ss...)
list := set.List()
sort.Strings(list)
return list
// use map for O(1) lookups without strset overhead
unique := make(map[string]struct{}, len(ss))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change might be good, but it's sacrificing readability for what is probably a small gain.

Tangentially, I question whether this needs to be sorted.

result := make([]string, 0, len(unique))

for _, s := range ss {
if _, exists := unique[s]; !exists {
unique[s] = struct{}{}
result = append(result, s)
}
}
sort.Strings(result)
return result
}
26 changes: 17 additions & 9 deletions syft/pkg/cataloger/javascript/parse_pnpm_lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,21 @@ func (p *pnpmV6LockYaml) Parse(version float64, data []byte) ([]pnpmPackage, err
log.WithFields("key", key).Trace("unable to parse pnpm package key")
continue
}
pkgKey := name + "@" + ver
pkgKey := strings.Join([]string{name, ver}, "@")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still concatenating strings together and is less efficient -- up to half as efficient. This was my intuition, so I wrote a benchmark to verify this; I'll skip to the results:

% go test -bench=. -benchmem
...
Benchmark_strcat-12             24962109                41.53 ns/op            0 B/op          0 allocs/op
Benchmark_stringsJoin-12        18459672                62.24 ns/op           16 B/op          1 allocs/op

Test:

package main

import (
	"math/rand"
	"strings"
	"testing"
	"time"
)

func Benchmark_strcat(b *testing.B) {
	for b.Loop() {
		s1, s2 := samples()
		got := s1 + "/" + s2
		if len(got) > 100 {
			return
		}
	}
}

func Benchmark_stringsJoin(b *testing.B) {
	for b.Loop() {
		s1, s2 := samples()
		got := strings.Join([]string{s1, s2}, "/")
		if len(got) > 100 {
			return
		}

	}
}

var input = []string{
	"dsfgdsg",
	"34gg",
	"dfsgdfg",
	"asqwfdf",
	"qwefqwef",
	"agsah",
	"adgsadf",
	"wergerwg",
}

var ilen = len(input)
var r = rand.New(rand.NewSource(time.Now().UnixMilli()))

func samples() (s1, s2 string) {
	i := r.Intn(ilen)
	return input[i], input[(i+1)%ilen]
}

To be sure there wasn't too much being optimized out, I took the example further and it continued to show the concat form uses fewer allocations and less time.


integrity := ""
if value, ok := pkgInfo.Resolution["integrity"]; ok {
integrity = value
}

dependencies := make(map[string]string)
dependencies := make(map[string]string, len(pkgInfo.Dependencies))
for depName, depVersion := range pkgInfo.Dependencies {
var normalizedVersion = strings.SplitN(depVersion, "(", 2)[0]
dependencies[depName] = normalizedVersion
// Use strings.Cut for more efficient splitting
if normalizedVersion, _, ok := strings.Cut(depVersion, "("); ok {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of these changes to Cut seem like they have very limited impact, neither function is allocating and this change makes it more verbose. Additionally, the Split(..)[0] pattern is used throughout Syft whereas the other form is not; without a demonstrated benefit here, I think the should not be changed.

dependencies[depName] = normalizedVersion
} else {
dependencies[depName] = depVersion
}
}

packages[pkgKey] = pnpmPackage{Name: name, Version: ver, Integrity: integrity, Dependencies: dependencies, Dev: pkgInfo.Dev}
Expand All @@ -143,7 +147,7 @@ func (p *pnpmV9LockYaml) Parse(_ float64, data []byte) ([]pnpmPackage, error) {
log.WithFields("key", key).Trace("unable to parse pnpm v9 package key")
continue
}
pkgKey := name + "@" + ver
pkgKey := strings.Join([]string{name, ver}, "@")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the same less efficient change.

packages[pkgKey] = pnpmPackage{Name: name, Version: ver, Integrity: entry.Resolution["integrity"], Dev: entry.Dev}
}

Expand All @@ -153,12 +157,16 @@ func (p *pnpmV9LockYaml) Parse(_ float64, data []byte) ([]pnpmPackage, error) {
log.WithFields("key", key).Trace("unable to parse pnpm v9 package snapshot key")
continue
}
pkgKey := name + "@" + ver
pkgKey := strings.Join([]string{name, ver}, "@")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another instance.

if pkg, ok := packages[pkgKey]; ok {
pkg.Dependencies = make(map[string]string)
pkg.Dependencies = make(map[string]string, len(snapshotInfo.Dependencies))
for name, versionSpecifier := range snapshotInfo.Dependencies {
var normalizedVersion = strings.SplitN(versionSpecifier, "(", 2)[0]
pkg.Dependencies[name] = normalizedVersion
// Use strings.Cut for more efficient splitting
if normalizedVersion, _, ok := strings.Cut(versionSpecifier, "("); ok {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another Cut change.

pkg.Dependencies[name] = normalizedVersion
} else {
pkg.Dependencies[name] = versionSpecifier
}
}
packages[pkgKey] = pkg
} else {
Expand Down
60 changes: 42 additions & 18 deletions syft/pkg/cataloger/javascript/parse_yarn_lock.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package javascript

import (
"bufio"
"bytes"
"context"
"fmt"
Expand Down Expand Up @@ -71,20 +72,26 @@
}
}

func parseYarnV1LockFile(reader io.ReadCloser) ([]yarnPackage, error) {
content, err := io.ReadAll(reader)
if err != nil {
return nil, fmt.Errorf("failed to read yarn.lock file: %w", err)
// isYarnV1Lockfile checks if the reader contains a v1 yarn lockfile by peeking at the first few bytes
func isYarnV1Lockfile(reader *bufio.Reader) (bool, error) {
// Peek at first 100 bytes to check for v1 marker
peek, err := reader.Peek(100)
if err != nil && err != io.EOF {
return false, err
}
return bytes.Contains(peek, []byte("# yarn lockfile v1")), nil
}

re := regexp.MustCompile(`\r?\n`)
lines := re.Split(string(content), -1)
// parseYarnV1LockFile parses a v1 yarn.lock file using line-by-line scanning
func parseYarnV1LockFile(reader io.Reader) ([]yarnPackage, error) {
scanner := bufio.NewScanner(reader)
var pkgs []yarnPackage
var pkg = yarnPackage{}
var seenPkgs = strset.New()
dependencies := make(map[string]string)

for _, line := range lines {
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "#") {
continue
}
Expand All @@ -101,8 +108,7 @@
// The first line of a package entry is the name of the package with no
// leading spaces
if !strings.HasPrefix(line, " ") {
name := line
pkg.Name = findPackageName(name)
pkg.Name = findPackageName(line)
continue
}
if strings.HasPrefix(line, " ") && !strings.HasPrefix(line, " ") {
Expand Down Expand Up @@ -133,6 +139,11 @@
dependencies[dependencyName] = dependencyVersion
}
}

if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to read yarn.lock file: %w", err)
}

// If the last package in the list is not the same as the current package, add the current package
// to the list. In case there was no trailing new line before we hit EOF.
if len(pkg.Name) > 0 && !seenPkgs.Has(pkg.Name+"@"+pkg.Version) {
Expand Down Expand Up @@ -171,21 +182,34 @@
return nil, nil, nil
}

data, err := io.ReadAll(reader)
// Wrap reader in bufio.Reader for peeking at the lockfile version
bufReader := bufio.NewReader(reader)

var yarnPkgs []yarnPackage
var err error

// v1 Yarn lockfiles are not YAML, so we parse them line-by-line for memory efficiency.
// v2+ lockfiles are YAML and require loading the entire file.
// We peek at the first few bytes to determine the version.
isV1, err := isYarnV1Lockfile(bufReader)
if err != nil {
return nil, nil, fmt.Errorf("failed to load yarn.lock file: %w", err)
return nil, nil, fmt.Errorf("failed to determine yarn.lock version: %w", err)
}
// Reset the reader to the beginning of the file
reader.ReadCloser = io.NopCloser(bytes.NewBuffer(data))

var yarnPkgs []yarnPackage
// v1 Yarn lockfiles are not YAML, so we need to parse them as a special case. They typically
// include a comment line that indicates the version. I.e. "# yarn lockfile v1"
if strings.Contains(string(data), "# yarn lockfile v1") {
yarnPkgs, err = parseYarnV1LockFile(reader)
if isV1 {
// For v1, parse line-by-line without loading entire file into memory
yarnPkgs, err = parseYarnV1LockFile(bufReader)
} else {
// For v2+, we need to load the entire file as YAML
data, err := io.ReadAll(bufReader)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this should just be able to pass the bufReader to an io.NopCloser and not have to io.ReadAll, either

if err != nil {
return nil, nil, fmt.Errorf("failed to load yarn.lock file: %w", err)
}
// Reset the reader to the beginning of the file
reader.ReadCloser = io.NopCloser(bytes.NewBuffer(data))
yarnPkgs, err = parseYarnLockYaml(reader)

Check failure on line 210 in syft/pkg/cataloger/javascript/parse_yarn_lock.go

View workflow job for this annotation

GitHub Actions / Static analysis

SA4006: this value of err is never used (staticcheck)

Check failure on line 210 in syft/pkg/cataloger/javascript/parse_yarn_lock.go

View workflow job for this annotation

GitHub Actions / Static analysis

ineffectual assignment to err (ineffassign)
}

if err != nil {
return nil, nil, fmt.Errorf("failed to parse yarn.lock file: %w", err)
}
Expand Down
Loading