Skip to content

Commit

Permalink
feat(tagger): implement remote External Data resolution
Browse files Browse the repository at this point in the history
Signed-off-by: Wassim DHIF <[email protected]>
  • Loading branch information
wdhif committed Dec 8, 2024
1 parent 512bca5 commit 1134007
Show file tree
Hide file tree
Showing 20 changed files with 732 additions and 162 deletions.
4 changes: 4 additions & 0 deletions comp/api/api/apiimpl/grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ func (s *serverSecure) TaggerStreamEntities(req *pb.StreamTagsRequest, srv pb.Ag
return s.taggerServer.TaggerStreamEntities(req, srv)
}

func (s *serverSecure) TaggerGenerateContainerIDFromExternalData(ctx context.Context, req *pb.GenerateContainerIDFromExternalDataRequest) (*pb.GenerateContainerIDFromExternalDataResponse, error) {
return s.taggerServer.TaggerGenerateContainerIDFromExternalData(ctx, req)
}

func (s *serverSecure) TaggerFetchEntity(ctx context.Context, req *pb.FetchEntityRequest) (*pb.FetchEntityResponse, error) {
return s.taggerServer.TaggerFetchEntity(ctx, req)
}
Expand Down
2 changes: 2 additions & 0 deletions comp/core/tagger/def/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package tagger
import (
"context"

"github.com/DataDog/datadog-agent/comp/core/tagger/origindetection"
"github.com/DataDog/datadog-agent/comp/core/tagger/telemetry"
"github.com/DataDog/datadog-agent/comp/core/tagger/types"
taggertypes "github.com/DataDog/datadog-agent/pkg/tagger/types"
Expand Down Expand Up @@ -37,6 +38,7 @@ type Component interface {
// integrations using the tagger
LegacyTag(entity string, cardinality types.TagCardinality) ([]string, error)
Tag(entityID types.EntityID, cardinality types.TagCardinality) ([]string, error)
GenerateContainerIDFromExternalData(externalData origindetection.ExternalData) (string, error)
AccumulateTagsFor(entityID types.EntityID, cardinality types.TagCardinality, tb tagset.TagsAccumulator) error
Standard(entityID types.EntityID) ([]string, error)
List() types.TaggerListResponse
Expand Down
7 changes: 7 additions & 0 deletions comp/core/tagger/impl-noop/tagger.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"context"

tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
"github.com/DataDog/datadog-agent/comp/core/tagger/origindetection"
"github.com/DataDog/datadog-agent/comp/core/tagger/telemetry"
"github.com/DataDog/datadog-agent/comp/core/tagger/types"
taggertypes "github.com/DataDog/datadog-agent/pkg/tagger/types"
Expand Down Expand Up @@ -49,6 +50,12 @@ func (n *noopTagger) LegacyTag(string, types.TagCardinality) ([]string, error) {
return nil, nil
}

// GenerateContainerIDFromExternalData generates a container ID from the external data.
// This is a no-op for the noop tagger
func (n *noopTagger) GenerateContainerIDFromExternalData(origindetection.ExternalData) (string, error) {
return "", nil
}

func (n *noopTagger) AccumulateTagsFor(types.EntityID, types.TagCardinality, tagset.TagsAccumulator) error {
return nil
}
Expand Down
68 changes: 68 additions & 0 deletions comp/core/tagger/impl-remote/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/DataDog/datadog-agent/comp/core/config"
log "github.com/DataDog/datadog-agent/comp/core/log/def"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
"github.com/DataDog/datadog-agent/comp/core/tagger/origindetection"
"github.com/DataDog/datadog-agent/comp/core/tagger/telemetry"
"github.com/DataDog/datadog-agent/comp/core/tagger/types"
"github.com/DataDog/datadog-agent/comp/core/tagger/utils"
Expand All @@ -47,6 +48,8 @@ const (

var errTaggerStreamNotStarted = errors.New("tagger stream not started")

var errTaggerFailedGenerateContainerIDFromExternalData = errors.New("tagger failed to generate container ID from external data")

// Requires defines the dependencies for the remote tagger.
type Requires struct {
compdef.In
Expand Down Expand Up @@ -82,6 +85,9 @@ type remoteTagger struct {
streamCancel context.CancelFunc
filter *types.Filter

queryCtx context.Context
queryCancel context.CancelFunc

ctx context.Context
cancel context.CancelFunc

Expand Down Expand Up @@ -250,6 +256,68 @@ func (t *remoteTagger) LegacyTag(entity string, cardinality types.TagCardinality
return t.Tag(entityID, cardinality)
}

// GenerateContainerIDFromExternalData returns a container ID for the given external data.
func (t *remoteTagger) GenerateContainerIDFromExternalData(externalData origindetection.ExternalData) (string, error) {
expBackoff := backoff.NewExponentialBackOff()
expBackoff.InitialInterval = 500 * time.Millisecond
expBackoff.MaxInterval = 1 * time.Second
expBackoff.MaxElapsedTime = 15 * time.Second

var containerID string

err := backoff.Retry(func() error {
select {
case <-t.telemetryTicker.C:
t.store.collectTelemetry()
case <-t.ctx.Done():
return &backoff.PermanentError{Err: errTaggerFailedGenerateContainerIDFromExternalData}
default:
}

t.telemetryStore.ExternalDataRequests.Inc()

// Fetch the auth token
token, err := t.options.TokenFetcher()
if err != nil {
_ = t.log.Errorf("unable to fetch auth token, will possibly retry: %s", err)
return err
}

// Create the context with the auth token
t.queryCtx, t.queryCancel = context.WithCancel(
metadata.NewOutgoingContext(t.ctx, metadata.MD{
"authorization": []string{fmt.Sprintf("Bearer %s", token)},
}),
)

// Call the GRPC method to get the container ID from the external data
containerIDResponse, err := t.client.TaggerGenerateContainerIDFromExternalData(t.queryCtx, &pb.GenerateContainerIDFromExternalDataRequest{
Init: externalData.Init,
ContainerName: externalData.ContainerName,
PodUID: externalData.PodUID,
})
if err != nil {
_ = t.log.Errorf("unable to generate container ID from external data, will retry: %s", err)
return err
}

if containerIDResponse == nil {
_ = t.log.Warnf("unable to generate container ID from external data, will retry: %s", err)
return errors.New("containerIDResponse is nil")
}
containerID = containerIDResponse.ContainerID

t.telemetryStore.ExternalDataRequests.Inc()
t.log.Debugf("Container ID generated successfully from external data %+v: %s", externalData, containerID)
return nil
}, expBackoff)

if err != nil {
return "", err
}
return containerID, nil
}

// AccumulateTagsFor returns tags for a given entity at the desired cardinality.
func (t *remoteTagger) AccumulateTagsFor(entityID types.EntityID, cardinality types.TagCardinality, tb tagset.TagsAccumulator) error {
tags, err := t.Tag(entityID, cardinality)
Expand Down
7 changes: 7 additions & 0 deletions comp/core/tagger/impl/local_tagger.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/DataDog/datadog-agent/comp/core/config"
"github.com/DataDog/datadog-agent/comp/core/tagger/collectors"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
"github.com/DataDog/datadog-agent/comp/core/tagger/origindetection"
"github.com/DataDog/datadog-agent/comp/core/tagger/tagstore"
"github.com/DataDog/datadog-agent/comp/core/tagger/telemetry"
"github.com/DataDog/datadog-agent/comp/core/tagger/types"
Expand Down Expand Up @@ -99,6 +100,12 @@ func (t *localTagger) Tag(entityID types.EntityID, cardinality types.TagCardinal
return tags.Copy(), nil
}

// GenerateContainerIDFromExternalData generates a container ID from the external data.
// This is a no-op for the local tagger
func (t *localTagger) GenerateContainerIDFromExternalData(_ origindetection.ExternalData) (string, error) {
return "", nil
}

// LegacyTag has the same behaviour as the Tag method, but it receives the entity id as a string and parses it.
// If possible, avoid using this function, and use the Tag method instead.
// This function exists in order not to break backward compatibility with rtloader and python
Expand Down
7 changes: 7 additions & 0 deletions comp/core/tagger/impl/replay_tagger.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
"github.com/DataDog/datadog-agent/comp/core/tagger/origindetection"
"github.com/DataDog/datadog-agent/comp/core/tagger/tagstore"
"github.com/DataDog/datadog-agent/comp/core/tagger/telemetry"
"github.com/DataDog/datadog-agent/comp/core/tagger/types"
Expand Down Expand Up @@ -77,6 +78,12 @@ func (t *replayTagger) LegacyTag(entity string, cardinality types.TagCardinality
return t.Tag(entityID, cardinality)
}

// GenerateContainerIDFromExternalData generates a container ID from the external data.
// This is a no-op for the replay tagger
func (t *replayTagger) GenerateContainerIDFromExternalData(origindetection.ExternalData) (string, error) {
return "", nil
}

// AccumulateTagsFor returns tags for a given entity at the desired cardinality.
func (t *replayTagger) AccumulateTagsFor(entityID types.EntityID, cardinality types.TagCardinality, tb tagset.TagsAccumulator) error {
tags := t.store.LookupHashed(entityID, cardinality)
Expand Down
7 changes: 7 additions & 0 deletions comp/core/tagger/impl/tagger.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
log "github.com/DataDog/datadog-agent/comp/core/log/def"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
taggermock "github.com/DataDog/datadog-agent/comp/core/tagger/mock"
"github.com/DataDog/datadog-agent/comp/core/tagger/origindetection"
"github.com/DataDog/datadog-agent/comp/core/tagger/telemetry"
"github.com/DataDog/datadog-agent/comp/core/tagger/types"
"github.com/DataDog/datadog-agent/comp/core/tagger/utils"
Expand Down Expand Up @@ -538,6 +539,12 @@ func (t *TaggerWrapper) EnrichTags(tb tagset.TagsAccumulator, originInfo taggert
}
}

// GenerateContainerIDFromExternalData generates a container ID from the external data.
func (t *TaggerWrapper) GenerateContainerIDFromExternalData(externalData origindetection.ExternalData) (string, error) {
metaCollector := metrics.GetProvider(optional.NewOption(t.wmeta)).GetMetaCollector()
return metaCollector.ContainerIDForPodUIDAndContName(externalData.PodUID, externalData.ContainerName, externalData.Init, time.Second)
}

// generateContainerIDFromExternalData generates a container ID from the external data
func (t *TaggerWrapper) generateContainerIDFromExternalData(e externalData, metricsProvider provider.ContainerIDForPodUIDAndContNameRetriever) (string, error) {
return metricsProvider.ContainerIDForPodUIDAndContName(e.podUID, e.containerName, e.init, time.Second)
Expand Down
6 changes: 6 additions & 0 deletions comp/core/tagger/mock/fake_tagger.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strconv"

tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
"github.com/DataDog/datadog-agent/comp/core/tagger/origindetection"
"github.com/DataDog/datadog-agent/comp/core/tagger/tagstore"
"github.com/DataDog/datadog-agent/comp/core/tagger/telemetry"
"github.com/DataDog/datadog-agent/comp/core/tagger/types"
Expand Down Expand Up @@ -118,6 +119,11 @@ func (f *FakeTagger) LegacyTag(entity string, cardinality types.TagCardinality)
return f.Tag(entityID, cardinality)
}

// GenerateContainerIDFromExternalData fake implementation
func (f *FakeTagger) GenerateContainerIDFromExternalData(origindetection.ExternalData) (string, error) {
return "", nil
}

// GlobalTags fake implementation
func (f *FakeTagger) GlobalTags(cardinality types.TagCardinality) ([]string, error) {
return f.Tag(types.GetGlobalEntityID(), cardinality)
Expand Down
3 changes: 3 additions & 0 deletions comp/core/tagger/origindetection/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/DataDog/datadog-agent/comp/core/tagger/origindetection

go 1.22.0
69 changes: 69 additions & 0 deletions comp/core/tagger/origindetection/origindetection.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2016-present Datadog, Inc.

package origindetection

import (
"strconv"
"strings"
)

// ProductOrigin is the origin of the product that sent the entity.
type ProductOrigin int

const (
// ProductOriginDogStatsDLegacy is the ProductOrigin for DogStatsD in Legacy mode.
// TODO: remove this when dogstatsd_origin_detection_unified is enabled by default
ProductOriginDogStatsDLegacy ProductOrigin = iota
// ProductOriginDogStatsD is the ProductOrigin for DogStatsD.
ProductOriginDogStatsD ProductOrigin = iota
// ProductOriginAPM is the ProductOrigin for APM.
ProductOriginAPM ProductOrigin = iota

// External Data Prefixes
// These prefixes are used to build the External Data Environment Variable.

ExternalDataInitPrefix = "it-"
ExternalDataContainerNamePrefix = "cn-"
ExternalDataPodUIDPrefix = "pu-"
)

// OriginInfo contains the Origin Detection information.
type OriginInfo struct {
ContainerIDFromSocket string // ContainerIDFromSocket is the origin resolved using Unix Domain Socket.
PodUID string // PodUID is the origin resolved from the Kubernetes Pod UID.
ContainerID string // ContainerID is the origin resolved from the container ID.
ExternalData ExternalData // ExternalData is the external data list.
Cardinality string // Cardinality is the cardinality of the resolved origin.
ProductOrigin ProductOrigin // ProductOrigin is the product that sent the origin information.
}

// ExternalData contains the parsed external data items.
type ExternalData struct {
Init bool
ContainerName string
PodUID string
}

type GenerateContainerIDFromExternalData func(externalData ExternalData) (string, error)

func ParseExternalData(externalEnv string) (ExternalData, error) {
if externalEnv == "" {
return ExternalData{}, nil
}
var externalData ExternalData
var parsingError error
for _, item := range strings.Split(externalEnv, ",") {
switch {
case strings.HasPrefix(item, ExternalDataInitPrefix):
externalData.Init, parsingError = strconv.ParseBool(item[len(ExternalDataInitPrefix):])
case strings.HasPrefix(item, ExternalDataContainerNamePrefix):
externalData.ContainerName = item[len(ExternalDataContainerNamePrefix):]
case strings.HasPrefix(item, ExternalDataPodUIDPrefix):
externalData.PodUID = item[len(ExternalDataPodUIDPrefix):]
}
}
return externalData, parsingError
}
17 changes: 17 additions & 0 deletions comp/core/tagger/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/google/uuid"

tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
"github.com/DataDog/datadog-agent/comp/core/tagger/origindetection"
"github.com/DataDog/datadog-agent/comp/core/tagger/proto"
"github.com/DataDog/datadog-agent/comp/core/tagger/types"
pb "github.com/DataDog/datadog-agent/pkg/proto/pbgo/core"
Expand Down Expand Up @@ -165,3 +166,19 @@ func (s *Server) TaggerFetchEntity(_ context.Context, in *pb.FetchEntityRequest)
Tags: tags,
}, nil
}

// TaggerGenerateContainerIDFromExternalData request the generation of a container ID from external data from the Tagger.
func (s *Server) TaggerGenerateContainerIDFromExternalData(_ context.Context, in *pb.GenerateContainerIDFromExternalDataRequest) (*pb.GenerateContainerIDFromExternalDataResponse, error) {
generatedContainerID, err := s.taggerComponent.GenerateContainerIDFromExternalData(origindetection.ExternalData{
Init: in.Init,
ContainerName: in.ContainerName,
PodUID: in.PodUID,
})
if err != nil {
return nil, status.Errorf(codes.InvalidArgument, "%s", err)
}

return &pb.GenerateContainerIDFromExternalDataResponse{
ContainerID: generatedContainerID,
}, nil
}
7 changes: 7 additions & 0 deletions comp/core/tagger/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ type Store struct {
// notification with a group of events.
Receives telemetry.Counter

// ExternalDataRequests tracks the number of requests to the tagger
// to generate a container ID from external data.
ExternalDataRequests telemetry.Counter

// ExternalDataSuccess tracks the number of successful resolutions of ExternalData.
ExternalDataSuccess telemetry.Counter

LowCardinalityQueries CardinalityTelemetry
OrchestratorCardinalityQueries CardinalityTelemetry
HighCardinalityQueries CardinalityTelemetry
Expand Down
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ replace (
github.com/DataDog/datadog-agent/comp/core/secrets => ./comp/core/secrets
github.com/DataDog/datadog-agent/comp/core/status => ./comp/core/status
github.com/DataDog/datadog-agent/comp/core/status/statusimpl => ./comp/core/status/statusimpl
github.com/DataDog/datadog-agent/comp/core/tagger/origindetection => ./comp/core/tagger/origindetection
github.com/DataDog/datadog-agent/comp/core/tagger/tags => ./comp/core/tagger/tags
github.com/DataDog/datadog-agent/comp/core/tagger/types => ./comp/core/tagger/types
github.com/DataDog/datadog-agent/comp/core/tagger/utils => ./comp/core/tagger/utils
Expand Down Expand Up @@ -612,6 +613,7 @@ require (
github.com/kouhin/envflag v0.0.0-20150818174321-0e9a86061649
github.com/lorenzosaino/go-sysctl v0.3.1
github.com/open-telemetry/opentelemetry-collector-contrib/pkg/datadog v0.114.0
go.opentelemetry.io/collector/component/componenttest v0.114.0
go.opentelemetry.io/collector/config/configtelemetry v0.114.0
)

Expand Down Expand Up @@ -651,6 +653,7 @@ require (
github.com/DataDog/datadog-agent/comp/core/secrets v0.59.0
github.com/DataDog/datadog-agent/comp/core/status v0.59.0-rc.6
github.com/DataDog/datadog-agent/comp/core/status/statusimpl v0.56.0-rc.3
github.com/DataDog/datadog-agent/comp/core/tagger/origindetection v0.56.0-rc.3
github.com/DataDog/datadog-agent/comp/core/tagger/tags v0.0.0-00010101000000-000000000000
github.com/DataDog/datadog-agent/comp/core/tagger/types v0.59.0
github.com/DataDog/datadog-agent/comp/core/telemetry v0.59.0
Expand Down
2 changes: 2 additions & 0 deletions modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ modules:
comp/core/status:
used_by_otel: true
comp/core/status/statusimpl: default
comp/core/tagger/origindetection:
used_by_otel: true
comp/core/tagger/tags:
used_by_otel: true
comp/core/tagger/types:
Expand Down
16 changes: 16 additions & 0 deletions pkg/proto/datadog/api/v1/api.proto
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,22 @@ service AgentSecure {
};
};

// Generates a container ID from External Data.
// can be called through the HTTP gateway, and entity will be returned as JSON:
// $ curl -H "authorization: Bearer $(cat /etc/datadog-agent/auth_token)" \
// -XPOST -k -H "Content-Type: application/json" \
// --data '{"init": false,"podUID": "54383382-cea3-49e3-9dda-325436ddd5b8","containerName": "dd-trace-py"}' \
// https://localhost:5001/v1/grpc/tagger/generate_container_id_from_external_data
// {
// "containerID":"7e61cd59e787ba99bd8420df93eabdea9307c436c0a27cfcbe2235aa0c80e36f"
// }
rpc TaggerGenerateContainerIDFromExternalData(datadog.model.v1.GenerateContainerIDFromExternalDataRequest) returns (datadog.model.v1.GenerateContainerIDFromExternalDataResponse) {
option (google.api.http) = {
post: "/v1/grpc/tagger/generate_container_id_from_external_data"
body: "*"
};
};

// fetches an entity from the Tagger with the desired cardinality tags.
// can be called through the HTTP gateway, and entity will be returned as JSON:
// $ curl -H "authorization: Bearer $(cat /etc/datadog-agent/auth_token)" \
Expand Down
Loading

0 comments on commit 1134007

Please sign in to comment.