diff --git a/api/v1/models.go b/api/v1/models.go index dce3c3658..0e62ac5e3 100644 --- a/api/v1/models.go +++ b/api/v1/models.go @@ -143,7 +143,7 @@ func LayerFromDatabaseModel(db database.Datastore, dbLayer database.Layer, linea layer.Features = append(layer.Features, *feature) } if !uncertifiedRHEL && namespaces.IsRHELNamespace(layer.NamespaceName) { - certified, err := addRHELv2Vulns(db, &layer) + certified, err := addRHELv2Vulns(db, &layer, lineage) if err != nil { return layer, notes, err } @@ -212,7 +212,7 @@ func ComponentsFromDatabaseModel(db database.Datastore, dbLayer *database.Layer, if !uncertifiedRHEL && namespaces.IsRHELNamespace(namespaceName) { var certified bool var err error - rhelv2PkgEnvs, certified, err = getRHELv2PkgEnvs(db, dbLayer.Name) + rhelv2PkgEnvs, certified, err = getRHELv2PkgEnvs(db, dbLayer.Name, lineage) if err != nil { return nil, err } diff --git a/api/v1/models_rhelv2.go b/api/v1/models_rhelv2.go index 47ec47759..58c5cd9d3 100644 --- a/api/v1/models_rhelv2.go +++ b/api/v1/models_rhelv2.go @@ -29,8 +29,8 @@ const ( // certified as part of Red Hat's Scanner Certification Program. // The returned bool indicates if full certified scanning was performed. // This is typically only `false` for images without proper CPE information. -func addRHELv2Vulns(db database.Datastore, layer *Layer) (bool, error) { - pkgEnvs, cpesExist, err := getRHELv2PkgEnvs(db, layer.Name) +func addRHELv2Vulns(db database.Datastore, layer *Layer, lineage string) (bool, error) { + pkgEnvs, cpesExist, err := getRHELv2PkgEnvs(db, layer.Name, lineage) if err != nil { return false, err } @@ -197,8 +197,8 @@ func shareCPEs(layers []*database.RHELv2Layer) bool { } // getRHELv2PkgEnvs returns a map from package ID to package environment and a bool to indicate CPEs exist in the image. -func getRHELv2PkgEnvs(db database.Datastore, layerName string) (map[int]*database.RHELv2PackageEnv, bool, error) { - layers, err := db.GetRHELv2Layers(layerName) +func getRHELv2PkgEnvs(db database.Datastore, layerName, layerLineage string) (map[int]*database.RHELv2PackageEnv, bool, error) { + layers, err := db.GetRHELv2Layers(layerName, layerLineage) if err != nil { return nil, false, err } diff --git a/api/v1/models_rhelv2_test.go b/api/v1/models_rhelv2_test.go index 9ae3eb7bf..96730736f 100644 --- a/api/v1/models_rhelv2_test.go +++ b/api/v1/models_rhelv2_test.go @@ -29,7 +29,7 @@ func newMockRHELv2Datastore() *mockRHELv2Datastore { layers: make(map[string][]*database.RHELv2Layer), vulns: make(map[int][]*database.RHELv2Vulnerability), } - db.FctGetRHELv2Layers = func(layer string) ([]*database.RHELv2Layer, error) { + db.FctGetRHELv2Layers = func(layer, lineage string) ([]*database.RHELv2Layer, error) { return db.layers[layer], nil } db.FctGetRHELv2Vulnerabilities = func(records []*database.RHELv2Record) (map[int][]*database.RHELv2Vulnerability, error) { diff --git a/database/database.go b/database/database.go index 3a4faa229..c7bab884f 100644 --- a/database/database.go +++ b/database/database.go @@ -145,7 +145,7 @@ type Datastore interface { // GetRHELv2Layers retrieves the corresponding layers for the image // represented by the given layer. // The returned slice is sorted in order from base layer to top. - GetRHELv2Layers(layer string) ([]*RHELv2Layer, error) + GetRHELv2Layers(layer, lineage string) ([]*RHELv2Layer, error) // GetRHELv2Vulnerabilities retrieves RHELv2 vulnerabilities based on the given records. // The returned value maps package ID to the related vulnerabilities. diff --git a/database/mock.go b/database/mock.go index af6a1cc60..4b656ed03 100644 --- a/database/mock.go +++ b/database/mock.go @@ -31,7 +31,7 @@ type MockDatastore struct { FctFindLayer func(name, lineage string, opts *DatastoreOptions) (Layer, error) FctDeleteLayer func(name string) error FctInsertRHELv2Layer func(*RHELv2Layer) error - FctGetRHELv2Layers func(layer string) ([]*RHELv2Layer, error) + FctGetRHELv2Layers func(layer, lineage string) ([]*RHELv2Layer, error) FctGetRHELv2Vulnerabilities func(records []*RHELv2Record) (map[int][]*RHELv2Vulnerability, error) FctListVulnerabilities func(namespaceName string, limit int, page int) ([]Vulnerability, int, error) FctInsertVulnerabilities func(vulnerabilities []Vulnerability) error @@ -82,9 +82,9 @@ func (mds *MockDatastore) InsertRHELv2Layer(layer *RHELv2Layer) error { panic("required mock function not implemented") } -func (mds *MockDatastore) GetRHELv2Layers(layer string) ([]*RHELv2Layer, error) { +func (mds *MockDatastore) GetRHELv2Layers(layer, layerLineage string) ([]*RHELv2Layer, error) { if mds.FctGetRHELv2Layers != nil { - return mds.FctGetRHELv2Layers(layer) + return mds.FctGetRHELv2Layers(layer, layerLineage) } panic("required mock function not implemented") } diff --git a/database/models.go b/database/models.go index 52b7475da..c3bb9d6c8 100644 --- a/database/models.go +++ b/database/models.go @@ -205,11 +205,13 @@ func (p *RHELv2Package) GetPackageVersion() string { type RHELv2Layer struct { Model - Hash string - ParentHash string - Dist string - Pkgs []*RHELv2Package - CPEs []string + Hash string + ParentHash string + Dist string + Pkgs []*RHELv2Package + CPEs []string + Lineage string + ParentLineage string } // RHELv2Components defines the RHELv2 components found in a layer. diff --git a/database/pgsql/migrations/00021_rhel_layer_lineage.go b/database/pgsql/migrations/00021_rhel_layer_lineage.go new file mode 100644 index 000000000..7b1c1c246 --- /dev/null +++ b/database/pgsql/migrations/00021_rhel_layer_lineage.go @@ -0,0 +1,24 @@ +package migrations + +import "github.com/remind101/migrate" + +func init() { + RegisterMigration(migrate.Migration{ + ID: 21, + Up: migrate.Queries([]string{ + + // The lineage column mimics the existing `layer` table. The parent_lineage column is used + // instead of equivalent parent_id column from the 'layer' table to avoid an extra query + // on insert (which would be necessary to determine the parent id). + `ALTER TABLE rhelv2_layer ADD COLUMN IF NOT EXISTS lineage varchar;`, + `ALTER TABLE rhelv2_layer ADD COLUMN IF NOT EXISTS parent_lineage varchar`, + + // Create a new unique constraint that includes lineage (and drop the old constraint) + `ALTER TABLE rhelv2_layer ADD CONSTRAINT rhelv2_layer_hash_lineage_key UNIQUE (hash, lineage)`, + `ALTER TABLE rhelv2_layer DROP CONSTRAINT IF EXISTS rhelv2_layer_hash_key`, + + // Create additional index to improve performance when recursively traversing parents. + `CREATE INDEX IF NOT EXISTS rhelv2_layer_parent_idx on rhelv2_layer (parent_hash, parent_lineage)`, + }), + }) +} diff --git a/database/pgsql/queries.go b/database/pgsql/queries.go index ed93f338e..c7fe1ad41 100644 --- a/database/pgsql/queries.go +++ b/database/pgsql/queries.go @@ -264,9 +264,9 @@ const ( deleteStaleRHELv2Vulns = `DELETE FROM vuln_v2 WHERE name = ANY($1::text[]) and package_name = ANY($2::text[]) and cpe = ANY($3::text[]) and package_module = $4;` insertRHELv2Layer = ` - INSERT INTO rhelv2_layer (hash, parent_hash, dist, cpes) - VALUES ($1, $2, $3, $4) - ON CONFLICT (hash) DO NOTHING;` + INSERT INTO rhelv2_layer (hash, parent_hash, dist, cpes, lineage, parent_lineage) + VALUES ($1, $2, $3, $4, $5, $6) + ON CONFLICT (hash, lineage) DO NOTHING;` // Inside the `WITH RECURSIVE`, the base case is the top query, and the // recursive case is the bottom query. @@ -276,15 +276,17 @@ const ( // This query looks for all the layers in the given layer's hierarchy. searchRHELv2Layers = ` WITH RECURSIVE layers AS ( - SELECT id, hash, parent_hash, dist, cpes + SELECT id, hash, parent_hash, dist, cpes, lineage, parent_lineage FROM rhelv2_layer - WHERE hash = $1 - UNION - SELECT l.id, l.hash, l.parent_hash, l.dist, l.cpes + WHERE hash = $1 + AND lineage = $2 + UNION + SELECT l.id, l.hash, l.parent_hash, l.dist, l.cpes, l.lineage, l.parent_lineage FROM layers ll, rhelv2_layer l WHERE ll.parent_hash = l.hash + AND ll.parent_lineage = l.lineage ) - SELECT id, hash, dist, cpes + SELECT id, hash, dist, cpes, lineage FROM layers;` insertRHELv2Package = ` @@ -304,7 +306,8 @@ const ( layer AS ( SELECT id AS layer_id FROM rhelv2_layer - WHERE rhelv2_layer.hash = $5 + WHERE rhelv2_layer.hash = $5 + AND rhelv2_layer.lineage = $6 ) INSERT INTO rhelv2_package_scanartifact (layer_id, package_id) @@ -324,7 +327,8 @@ const ( rhelv2_package_scanartifact LEFT JOIN rhelv2_package ON rhelv2_package_scanartifact.package_id = rhelv2_package.id - JOIN rhelv2_layer ON rhelv2_layer.hash = $1 + JOIN rhelv2_layer ON rhelv2_layer.hash = $1 + AND rhelv2_layer.lineage = $2 WHERE rhelv2_package_scanartifact.layer_id = rhelv2_layer.id;` diff --git a/database/pgsql/rhelv2_layer.go b/database/pgsql/rhelv2_layer.go index 6b05d8cad..7bfda747d 100644 --- a/database/pgsql/rhelv2_layer.go +++ b/database/pgsql/rhelv2_layer.go @@ -30,7 +30,7 @@ func (pgSQL *pgSQL) InsertRHELv2Layer(layer *database.RHELv2Layer) error { return err } - if err := pgSQL.insertRHELv2Packages(tx, layer.Hash, layer.Pkgs); err != nil { + if err := pgSQL.insertRHELv2Packages(tx, layer.Hash, layer.Pkgs, layer.Lineage); err != nil { utils.IgnoreError(tx.Rollback) return err } @@ -46,11 +46,11 @@ func (pgSQL *pgSQL) InsertRHELv2Layer(layer *database.RHELv2Layer) error { func (pgSQL *pgSQL) insertRHELv2Layer(tx *sql.Tx, layer *database.RHELv2Layer) error { defer metrics.ObserveQueryTime("insertRHELv2Layer", "layer", time.Now()) - _, err := tx.Exec(insertRHELv2Layer, layer.Hash, layer.ParentHash, layer.Dist, pq.Array(layer.CPEs)) + _, err := tx.Exec(insertRHELv2Layer, layer.Hash, layer.ParentHash, layer.Dist, pq.Array(layer.CPEs), layer.Lineage, layer.ParentLineage) return err } -func (pgSQL *pgSQL) insertRHELv2Packages(tx *sql.Tx, layer string, pkgs []*database.RHELv2Package) error { +func (pgSQL *pgSQL) insertRHELv2Packages(tx *sql.Tx, layer string, pkgs []*database.RHELv2Package, lineage string) error { // Sort packages to avoid potential deadlock. // Sort by the unique index (name, version, module, arch). sort.SliceStable(pkgs, func(i, j int) bool { @@ -91,6 +91,7 @@ func (pgSQL *pgSQL) insertRHELv2Packages(tx *sql.Tx, layer string, pkgs []*datab pkg.Module, pkg.Arch, layer, + lineage, ) if err != nil { @@ -101,7 +102,7 @@ func (pgSQL *pgSQL) insertRHELv2Packages(tx *sql.Tx, layer string, pkgs []*datab return nil } -func (pgSQL *pgSQL) GetRHELv2Layers(layerHash string) ([]*database.RHELv2Layer, error) { +func (pgSQL *pgSQL) GetRHELv2Layers(layerHash, layerLineage string) ([]*database.RHELv2Layer, error) { defer metrics.ObserveQueryTime("getRHELv2Layers", "all", time.Now()) tx, err := pgSQL.BeginTx(context.Background(), &sql.TxOptions{ @@ -111,7 +112,7 @@ func (pgSQL *pgSQL) GetRHELv2Layers(layerHash string) ([]*database.RHELv2Layer, return nil, handleError("GetRHELv2Layers.Begin()", err) } - rows, err := tx.Query(searchRHELv2Layers, layerHash) + rows, err := tx.Query(searchRHELv2Layers, layerHash, layerLineage) if err != nil { return nil, err } @@ -124,7 +125,7 @@ func (pgSQL *pgSQL) GetRHELv2Layers(layerHash string) ([]*database.RHELv2Layer, rhelv2Layer database.RHELv2Layer cpes []string ) - if err := rows.Scan(&rhelv2Layer.ID, &rhelv2Layer.Hash, &rhelv2Layer.Dist, pq.Array(&cpes)); err != nil { + if err := rows.Scan(&rhelv2Layer.ID, &rhelv2Layer.Hash, &rhelv2Layer.Dist, pq.Array(&cpes), &rhelv2Layer.Lineage); err != nil { utils.IgnoreError(tx.Rollback) return nil, err } @@ -176,7 +177,7 @@ func (pgSQL *pgSQL) populatePackages(tx *sql.Tx, layers []*database.RHELv2Layer) func (pgSQL *pgSQL) getPackagesByLayer(tx *sql.Tx, layer *database.RHELv2Layer) error { defer metrics.ObserveQueryTime("getRHELv2Layers", "packagesByLayer", time.Now()) - rows, err := tx.Query(searchRHELv2Package, layer.Hash) + rows, err := tx.Query(searchRHELv2Package, layer.Hash, layer.Lineage) if err != nil { return err } diff --git a/database/pgsql/rhelv2_layer_test.go b/database/pgsql/rhelv2_layer_test.go index 68a44e970..1da1ba5c3 100644 --- a/database/pgsql/rhelv2_layer_test.go +++ b/database/pgsql/rhelv2_layer_test.go @@ -8,6 +8,7 @@ import ( "github.com/stackrox/scanner/database" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestInsertRHELv2Layer(t *testing.T) { @@ -123,7 +124,7 @@ func TestGetRHELv2Layers(t *testing.T) { assert.NoError(t, err) // Get parent layer. - layers, err := datastore.GetRHELv2Layers("sha256:howdy") + layers, err := datastore.GetRHELv2Layers("sha256:howdy", "") assert.NoError(t, err) assert.Len(t, layers, 1) layer = layers[0] @@ -132,7 +133,7 @@ func TestGetRHELv2Layers(t *testing.T) { assert.Empty(t, layer.CPEs) // Get 2 layered-image. - layers, err = datastore.GetRHELv2Layers("sha256:howdyhowdy") + layers, err = datastore.GetRHELv2Layers("sha256:howdyhowdy", "") assert.NoError(t, err) assert.Len(t, layers, 2) layer = layers[1] @@ -176,21 +177,17 @@ func TestGetRHELv2Layers(t *testing.T) { assert.NoError(t, err) // Get 3 layered-image. - layers, err = datastore.GetRHELv2Layers("sha256:howdyhowdyhowdy") + layers, err = datastore.GetRHELv2Layers("sha256:howdyhowdyhowdy", "") assert.NoError(t, err) assert.Len(t, layers, 3) layer = layers[2] assert.Equal(t, "sha256:howdyhowdyhowdy", layer.Hash) - for _, pkg := range layer.Pkgs { - pkg.ID = 0 - } + resetPackageIDs(layer) assert.Equal(t, layer2Pkgs, layer.Pkgs) assert.Equal(t, layer2CPEs, layer.CPEs) layer = layers[1] assert.Equal(t, "sha256:howdyhowdy", layer.Hash) - for _, pkg := range layer.Pkgs { - pkg.ID = 0 - } + resetPackageIDs(layer) assert.Equal(t, layer1Pkgs, layer.Pkgs) assert.Equal(t, layer1CPEs, layer.CPEs) layer = layers[0] @@ -198,3 +195,115 @@ func TestGetRHELv2Layers(t *testing.T) { assert.Empty(t, layer.Pkgs) assert.Empty(t, layer.CPEs) } + +// TestRHELv2LayerLineage verifies that data for duplicate layers with different parent +// layers (lineage) is pulled correctly. +func TestRHELv2LayerLineage(t *testing.T) { + + datastore, err := openDatabaseForTest("RHELv2LayerLineage", false) + if err != nil { + t.Error(err) + return + } + defer datastore.Close() + + // Two 'fake' images will be created, each with 3 layers, the DB will resemble: + // id | hash | parent_hash | dist | cpes | lineage | parent_lineage + // ----+-----------------+-----------------+--------+----------------+-----------+---------------- + // 1 | sha256:base | | rhel:8 | | | + // 2 | sha256:layer1-a | sha256:base | rhel:8 | {cpe-a,cpe2-a} | lineage | + // 3 | sha256:layer1-b | sha256:base | rhel:8 | {cpe-b,cpe2-b} | lineage | + // 4 | sha256:leaf | sha256:layer1-a | rhel:8 | | lineage-a | lineage + // 5 | sha256:leaf | sha256:layer1-b | rhel:8 | | lineage-b | lineage + + // base layers + base := &database.RHELv2Layer{ + Hash: "sha256:base", + Dist: "rhel:8", + } + + err = datastore.InsertRHELv2Layer(base) + require.NoError(t, err) + + layer1a := &database.RHELv2Layer{ + Hash: "sha256:layer1-a", + Lineage: "lineage", + ParentHash: "sha256:base", + ParentLineage: "", + Dist: "rhel:8", + Pkgs: []*database.RHELv2Package{ + {Name: "pkg", Version: "v1-a", Arch: "x86_64"}, + {Name: "pkg2", Version: "v2-a", Module: "module", Arch: "i686"}, + }, + CPEs: []string{"cpe-a", "cpe2-a"}, + } + + layer1b := &database.RHELv2Layer{ + Hash: "sha256:layer1-b", + Lineage: "lineage", + ParentHash: "sha256:base", + ParentLineage: "", + Dist: "rhel:8", + Pkgs: []*database.RHELv2Package{ + {Name: "pkg", Version: "v1-b", Arch: "x86_64"}, + {Name: "pkg2", Version: "v2-b", Module: "module", Arch: "i686"}, + }, + CPEs: []string{"cpe-b", "cpe2-b"}, + } + + err = datastore.InsertRHELv2Layer(layer1a) + require.NoError(t, err) + err = datastore.InsertRHELv2Layer(layer1b) + require.NoError(t, err) + + leafa := &database.RHELv2Layer{ + Hash: "sha256:leaf", // for this test all leafs should have same digest + Lineage: "lineage-a", // lineage is specific to layer A + ParentHash: "sha256:layer1-a", + ParentLineage: "lineage", + Dist: "rhel:8", + } + + var leafb = new(database.RHELv2Layer) + *leafb = *leafa + leafb.Lineage = "lineage-b" + leafb.ParentHash = "sha256:layer1-b" + + err = datastore.InsertRHELv2Layer(leafa) + require.NoError(t, err) + err = datastore.InsertRHELv2Layer(leafb) + require.NoError(t, err) + + assertLayersEqual := func(t *testing.T, expected, actual *database.RHELv2Layer) { + resetPackageIDs(actual) + assert.Equal(t, expected.Hash, actual.Hash, "Hash mismatch") + assert.Equal(t, expected.Lineage, actual.Lineage, "Lineage mismatch") + assert.Equal(t, expected.CPEs, actual.CPEs, "CPEs mistmatch") + assert.Equal(t, expected.Pkgs, actual.Pkgs, "Pkgs mismatch") + } + + layers, err := datastore.GetRHELv2Layers("sha256:leaf", "lineage-a") + require.NoError(t, err) + require.Len(t, layers, 3) + + assertLayersEqual(t, base, layers[0]) + assertLayersEqual(t, layer1a, layers[1]) + assertLayersEqual(t, leafa, layers[2]) + + layers, err = datastore.GetRHELv2Layers("sha256:leaf", "lineage-b") + require.NoError(t, err) + require.Len(t, layers, 3) + + assertLayersEqual(t, base, layers[0]) + assertLayersEqual(t, layer1b, layers[1]) + assertLayersEqual(t, leafb, layers[2]) +} + +// resetPackageIDs sets all package IDs to 0. Package IDs are DB sequence numbers +// that will not be deterministic (depending on how tests are written), therefore +// set the IDs to 0 to allow tests pass. +func resetPackageIDs(layer *database.RHELv2Layer) { + for _, pkg := range layer.Pkgs { + pkg.ID = 0 + } +} diff --git a/e2etests/testcase_test.go b/e2etests/testcase_test.go index 323752bf9..3868a0dd0 100644 --- a/e2etests/testcase_test.go +++ b/e2etests/testcase_test.go @@ -4773,4 +4773,47 @@ All OpenShift Container Platform 4.10 users are advised to upgrade to these upda }, }, }, + // START: Lineage Tests + // The order of the next tests is important, the intent is to reproduce the conditions described in ROX-26604 + // in which differing parent layers were not properly handled leading to inaccuracies. These images share top + // and bottom layers, middle layers differ. + // Dockerfiles at: github.com/stackrox/stackrox/qa-tests-backend/test-images/lineage + { + image: "quay.io/rhacs-eng/qa:lineage-jdk-17.0.11", + registry: "https://quay.io", + source: "Red Hat", + username: os.Getenv("QUAY_RHACS_ENG_RO_USERNAME"), + password: os.Getenv("QUAY_RHACS_ENG_RO_PASSWORD"), + onlyCheckSpecifiedVulns: true, + namespace: "rhel:8", + expectedFeatures: []apiV1.Feature{ + { + Name: "java-17-openjdk-headless", + NamespaceName: "rhel:8", + VersionFormat: "rpm", + Version: "1:17.0.11.0.9-2.el8.x86_64", + FixedBy: "1:17.0.13.0.11-3.el8", + AddedBy: "sha256:06c7a3d491f551a56296ccb9bee8a68c83776991e73a9005e8b5ebb533002097", + }, + }, + }, + { + image: "quay.io/rhacs-eng/qa:lineage-jdk-17.0.13", + registry: "https://quay.io", + source: "Red Hat", + username: os.Getenv("QUAY_RHACS_ENG_RO_USERNAME"), + password: os.Getenv("QUAY_RHACS_ENG_RO_PASSWORD"), + onlyCheckSpecifiedVulns: true, + namespace: "rhel:8", + expectedFeatures: []apiV1.Feature{ + { + Name: "java-17-openjdk-headless", + NamespaceName: "rhel:8", + VersionFormat: "rpm", + Version: "1:17.0.13.0.11-3.el8.x86_64", + AddedBy: "sha256:2f7b9495af5ddc85b0be7ca9411fddb54f37999ea73b03cbf1115dd0c5bd4f95", + }, + }, + }, + // END: Lineage Tests } diff --git a/worker.go b/worker.go index 33238c8ca..db4d67d62 100644 --- a/worker.go +++ b/worker.go @@ -123,11 +123,13 @@ func ProcessLayerFromReader(datastore database.Datastore, imageFormat, name, lin parentHash = layer.Parent.Name } rhelv2Layer := &database.RHELv2Layer{ - Hash: layer.Name, - Dist: rhelv2Components.Dist, - Pkgs: rhelv2Components.Packages, - CPEs: rhelv2Components.CPEs, - ParentHash: parentHash, + Hash: layer.Name, + Dist: rhelv2Components.Dist, + Pkgs: rhelv2Components.Packages, + CPEs: rhelv2Components.CPEs, + ParentHash: parentHash, + Lineage: lineage, + ParentLineage: parentLineage, } if err := datastore.InsertRHELv2Layer(rhelv2Layer); err != nil {