Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add repo dependency ingester #5030

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions docs/docs/ref/proto.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ require (
github.com/openfga/openfga v1.8.0
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c
github.com/prometheus/client_golang v1.20.5
github.com/protobom/protobom v0.5.0
github.com/puzpuzpuz/xsync/v3 v3.4.0
github.com/robfig/cron/v3 v3.0.1
github.com/rs/zerolog v1.33.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,8 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/protobom/protobom v0.5.0 h1:jJYqGpdHq99zwh0/n1SOPl1aickCBZdA8pHS9V/f+XQ=
github.com/protobom/protobom v0.5.0/go.mod h1:HL47tggz7SXYXgNm3WjQQrWB6iOirYnrATsXAEyTUkI=
github.com/puzpuzpuz/xsync v1.5.2 h1:yRAP4wqSOZG+/4pxJ08fPTwrfL0IzE/LKQ/cw509qGY=
github.com/puzpuzpuz/xsync v1.5.2/go.mod h1:K98BYhX3k1dQ2M63t1YNVDanbwUPmBCAhNmVrrxfiGg=
github.com/puzpuzpuz/xsync/v3 v3.4.0 h1:DuVBAdXuGFHv8adVXjWWZ63pJq+NRXOWVXlKDBZ+mJ4=
Expand Down
2 changes: 1 addition & 1 deletion internal/auth/keycloak/client/client.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

213 changes: 213 additions & 0 deletions internal/engine/ingester/deps/deps.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// SPDX-FileCopyrightText: Copyright 2024 The Minder Authors
// SPDX-License-Identifier: Apache-2.0

// Package deps provides the deps rule data ingest engine
package deps

import (
"context"
"errors"
"fmt"

"github.com/go-git/go-billy/v5"
"github.com/go-git/go-billy/v5/helper/iofs"
"github.com/go-viper/mapstructure/v2"
scalibr "github.com/google/osv-scalibr"
"github.com/google/osv-scalibr/extractor/filesystem/list"
scalibr_fs "github.com/google/osv-scalibr/fs"
scalibr_plugin "github.com/google/osv-scalibr/plugin"
"github.com/google/uuid"
"github.com/protobom/protobom/pkg/sbom"
"google.golang.org/protobuf/reflect/protoreflect"

engerrors "github.com/mindersec/minder/internal/engine/errors"
pb "github.com/mindersec/minder/pkg/api/protobuf/go/minder/v1"
"github.com/mindersec/minder/pkg/engine/v1/interfaces"
"github.com/mindersec/minder/pkg/entities/v1/checkpoints"
provifv1 "github.com/mindersec/minder/pkg/providers/v1"
)

const (
// DepsRuleDataIngestType is the type of the deps rule data ingest engine
DepsRuleDataIngestType = "deps"
defaultBranch = "main"
)

// Deps is the engine for a rule type that uses deps data ingest
type Deps struct {
cfg *pb.DepsType
gitprov provifv1.Git
}

// Config is the set of parameters to the deps rule data ingest engine
type Config struct {
Branch string `json:"branch" yaml:"branch" mapstructure:"branch"`
}

// NewDepsIngester creates a new deps rule data ingest engine
func NewDepsIngester(cfg *pb.DepsType, gitprov provifv1.Git) (*Deps, error) {
if gitprov == nil {
return nil, fmt.Errorf("provider is nil")
}

if cfg == nil {
cfg = &pb.DepsType{}
}

return &Deps{
cfg: cfg,
gitprov: gitprov,
}, nil
}

// GetType returns the type of the git rule data ingest engine
func (*Deps) GetType() string {
return DepsRuleDataIngestType
}

// GetConfig returns the config for the git rule data ingest engine
func (gi *Deps) GetConfig() protoreflect.ProtoMessage {
return gi.cfg
}

// Ingest does the actual data ingestion for a rule type by cloning a git repo,
// and scanning it for dependencies with scalibr.
func (gi *Deps) Ingest(ctx context.Context, ent protoreflect.ProtoMessage, params map[string]any) (*interfaces.Result, error) {
switch entity := ent.(type) {
case *pb.Repository:
return gi.ingestRepository(ctx, entity, params)
default:
return nil, fmt.Errorf("deps is only supported for repositories")
}
}
func (gi *Deps) ingestRepository(ctx context.Context, repo *pb.Repository, params map[string]any) (*interfaces.Result, error) {
userCfg := &Config{}
if err := mapstructure.Decode(params, userCfg); err != nil {
return nil, fmt.Errorf("failed to read git ingester configuration from params: %w", err)
}

if repo.GetCloneUrl() == "" {
return nil, fmt.Errorf("could not get clone url")
}

branch := gi.getBranch(repo, userCfg.Branch)

// We clone to the memfs go-billy filesystem driver, which doesn't
// allow for direct access to the underlying filesystem. This is
// because we want to be able to run this in a sandboxed environment
// where we don't have access to the underlying filesystem.
r, err := gi.gitprov.Clone(ctx, repo.GetCloneUrl(), branch)
if err != nil {
if errors.Is(err, provifv1.ErrProviderGitBranchNotFound) {
return nil, fmt.Errorf("%w: %s: branch %s", engerrors.ErrEvaluationFailed,
provifv1.ErrProviderGitBranchNotFound, branch)
} else if errors.Is(err, provifv1.ErrRepositoryEmpty) {
return nil, fmt.Errorf("%w: %s", engerrors.ErrEvaluationSkipped, provifv1.ErrRepositoryEmpty)
}
return nil, err
}

wt, err := r.Worktree()
if err != nil {
return nil, fmt.Errorf("could not get worktree: %w", err)
}

deps, err := scanFs(ctx, wt.Filesystem)
if err != nil {
return nil, fmt.Errorf("could not scan filesystem: %w", err)
}

head, err := r.Head()
if err != nil {
return nil, fmt.Errorf("could not get head: %w", err)
}

hsh := head.Hash()

chkpoint := checkpoints.NewCheckpointV1Now().
WithBranch(branch).
WithCommitHash(hsh.String())

return &interfaces.Result{
Object: deps,
Checkpoint: chkpoint,
}, nil
}

func (gi *Deps) getBranch(repo *pb.Repository, branch string) string {
// If the user has specified a branch, use that
if branch != "" {
return branch
}

// If the branch is provided in the rule-type
// configuration, use that
if gi.cfg.GetRepo().Branch != "" {
return gi.cfg.GetRepo().Branch
}
if repo.GetDefaultBranch() != "" {
return repo.GetDefaultBranch()
}

// If the branch is not provided in the rule-type
// configuration, use the default branch
return defaultBranch
}

func scanFs(ctx context.Context, memFS billy.Filesystem) (*sbom.NodeList, error) {
// have to down-cast here, because scalibr needs multiple io/fs types
wrapped, ok := iofs.New(memFS).(scalibr_fs.FS)
if !ok {
return nil, fmt.Errorf("error converting filesystem to ReadDirFS")
}

desiredCaps := scalibr_plugin.Capabilities{
OS: scalibr_plugin.OSLinux,
Network: true,
DirectFS: false,
RunningSystem: false,
}

scalibrFs := scalibr_fs.ScanRoot{FS: wrapped}
scanConfig := scalibr.ScanConfig{
ScanRoots: []*scalibr_fs.ScanRoot{&scalibrFs},
// All includes Ruby, Dotnet which we're not ready to test yet, so use the more limited Default set.
FilesystemExtractors: list.FilterByCapabilities(list.Default, &desiredCaps),
Capabilities: &desiredCaps,
}

scanner := scalibr.New()
scanResults := scanner.Scan(ctx, &scanConfig)

if scanResults == nil || scanResults.Status == nil {
return nil, fmt.Errorf("error scanning files: no results")
}
if scanResults.Status.Status != scalibr_plugin.ScanStatusSucceeded {
return nil, fmt.Errorf("error scanning files: %s", scanResults.Status)
}

res := sbom.NewNodeList()
for _, inv := range scanResults.Inventories {
node := &sbom.Node{
Type: sbom.Node_PACKAGE,
Id: uuid.New().String(),
Name: inv.Name,
Version: inv.Version,
Identifiers: map[int32]string{
int32(sbom.SoftwareIdentifierType_PURL): inv.Extractor.ToPURL(inv).String(),
// TODO: scalibr returns a _list_ of CPEs, but protobom will store one.
// use the first?
Copy link
Contributor

@puerco puerco Nov 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally the most specific one. Now that package URL has official support for ranges, the one-purl-per-package rule is now obsolete. This means that, perhaps, we should create a new identifier list to capture more than one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm slightly confused about what you're suggesting here. I can see why PURLs supporting ranges would be useful in general, but I'm not sure if you're suggesting:

  1. That upstream protobom needs a change in the identifiers type.
  2. That Scalibr needs a change in the ToPURL function.
  3. That this code should be parsing the purl / Inventory and creating multiple Node objects when the Inventory contains certain types of data.

Looking at the scalibr code, it looks like CPEs are only reported from the SPDX and CDX extractors, so multiple (or > 0) CPEs seems moot for our current use cases. :puzzled:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant number 1, but there are use cases when you may also want to return purls with ranges, more than one purl, and/or more than one CPE. It all depends on what you are trying to match.

// int32(sbom.SoftwareIdentifierType_CPE23): inv.Extractor.ToCPEs(inv),
},
}
for _, l := range inv.Locations {
node.Properties = append(node.Properties, &sbom.Property{
Name: "sourceFile",
Data: l,
})
}
res.AddNode(node)
}

return res, nil
}
7 changes: 7 additions & 0 deletions internal/engine/ingester/ingester.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/mindersec/minder/internal/engine/ingester/artifact"
"github.com/mindersec/minder/internal/engine/ingester/builtin"
"github.com/mindersec/minder/internal/engine/ingester/deps"
"github.com/mindersec/minder/internal/engine/ingester/diff"
"github.com/mindersec/minder/internal/engine/ingester/git"
"github.com/mindersec/minder/internal/engine/ingester/rest"
Expand Down Expand Up @@ -65,6 +66,12 @@ func NewRuleDataIngest(rt *pb.RuleType, provider provinfv1.Provider) (interfaces
return nil, errors.New("provider does not implement github trait")
}
return diff.NewDiffIngester(ing.GetDiff(), client)
case deps.DepsRuleDataIngestType:
client, err := provinfv1.As[provinfv1.Git](provider)
if err != nil {
return nil, errors.New("provider does not implement git trait")
}
return deps.NewDepsIngester(ing.GetDeps(), client)
default:
return nil, fmt.Errorf("unsupported rule type engine: %s", rt.Def.Ingest.Type)
}
Expand Down
2 changes: 1 addition & 1 deletion internal/proto/internal.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 25 additions & 3 deletions pkg/api/openapi/minder/v1/minder.swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading