Skip to content

Commit

Permalink
Auto Multiline v2 - Add Legacy Regex Support (#29573)
Browse files Browse the repository at this point in the history
  • Loading branch information
gh123man authored and grantseltzer committed Oct 2, 2024
1 parent f68aa2a commit 01b950d
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 11 deletions.
52 changes: 41 additions & 11 deletions pkg/logs/internal/decoder/auto_multiline_detection/user_samples.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
package automultilinedetection

import (
"regexp"

"github.com/DataDog/datadog-agent/pkg/config/model"
"github.com/DataDog/datadog-agent/pkg/logs/internal/decoder/auto_multiline_detection/tokens"
"github.com/DataDog/datadog-agent/pkg/util/log"
Expand All @@ -22,6 +24,8 @@ type UserSample struct {
// From a user perspective, this is how similar the log has to be to the sample to be considered a match.
// Optional - Default value is 0.75.
MatchThreshold *float64 `mapstructure:"match_threshold,omitempty"`
// Regex is a pattern used to aggregate logs. NOTE that you can use either a sample or a regex, but not both.
Regex string `mapstructure:"regex,omitempty"`
// Label is the label to apply to the log message if it matches the sample.
// Optional - Default value is "start_group".
Label *string `mapstructure:"label,omitempty"`
Expand All @@ -30,6 +34,7 @@ type UserSample struct {
tokens []tokens.Token
matchThreshold float64
label Label
compiledRegex *regexp.Regexp
}

// UserSamples is a heuristic that represents a collection of user-defined samples for auto multi-line aggreagtion.
Expand All @@ -50,21 +55,40 @@ func NewUserSamples(config model.Reader) *UserSamples {
}
}

legacyAdditionalPatterns := config.GetStringSlice("logs_config.auto_multi_line_extra_patterns")
if len(legacyAdditionalPatterns) > 0 {
log.Warn("Found deprecated logs_config.auto_multi_line_extra_patterns converting to logs_config.auto_multi_line_detection_custom_samples")
for _, pattern := range legacyAdditionalPatterns {
s = append(s, &UserSample{
Regex: pattern,
})
}
}

parsedSamples := make([]*UserSample, 0, len(s))
for _, sample := range s {
if sample.Sample == "" {
log.Warn("Sample was empty, skipping sample")
continue
}
sample.tokens, _ = tokenizer.tokenize([]byte(sample.Sample))
if sample.MatchThreshold != nil {
if *sample.MatchThreshold <= 0 || *sample.MatchThreshold > 1 {
log.Warnf("Invalid match threshold %f, skipping sample", *sample.MatchThreshold)
if sample.Sample != "" {
sample.tokens, _ = tokenizer.tokenize([]byte(sample.Sample))

if sample.MatchThreshold != nil {
if *sample.MatchThreshold <= 0 || *sample.MatchThreshold > 1 {
log.Warnf("Invalid match threshold %f, skipping sample", *sample.MatchThreshold)
continue
}
sample.matchThreshold = *sample.MatchThreshold
} else {
sample.matchThreshold = defaultMatchThreshold
}
} else if sample.Regex != "" {
compiled, err := regexp.Compile("^" + sample.Regex)
if err != nil {
log.Warn(sample.Regex, " is not a valid regular expression - skipping")
continue
}
sample.matchThreshold = *sample.MatchThreshold
sample.compiledRegex = compiled
} else {
sample.matchThreshold = defaultMatchThreshold
log.Warn("Sample and regex was empty, skipping")
continue
}

if sample.Label != nil {
Expand Down Expand Up @@ -100,7 +124,13 @@ func (j *UserSamples) ProcessAndContinue(context *messageContext) bool {
}

for _, sample := range j.samples {
if isMatch(sample.tokens, context.tokens, sample.matchThreshold) {
if sample.compiledRegex != nil {
if sample.compiledRegex.Match(context.rawMessage) {
context.label = sample.label
context.labelAssignedBy = "user_sample"
return false
}
} else if isMatch(sample.tokens, context.tokens, sample.matchThreshold) {
context.label = sample.label
context.labelAssignedBy = "user_sample"
return false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,80 @@ logs_config:
assert.Equal(t, test.expectedLabel, context.label, "Expected label %v, got %v", test.expectedLabel, context.label)
}
}

func TestUserPatternsRegexProcess(t *testing.T) {

datadogYaml := `
logs_config:
auto_multi_line_extra_patterns:
- "le\\wacy"
auto_multi_line_detection_custom_samples:
- regex: "(foo|bar)test\\d+"
`

mockConfig := mock.NewFromYAML(t, datadogYaml)
samples := NewUserSamples(mockConfig)
tokenizer := NewTokenizer(60)

tests := []struct {
expectedLabel Label
shouldStop bool
input string
}{
{aggregate, true, ""},
{aggregate, true, "some random log line"},
{aggregate, true, "2023-03-28T14:33:53.743350Z App started successfully"},
{startGroup, false, "footest123 some other log line"},
{startGroup, false, "bartest123 some other log line"},
{startGroup, false, "legacy pattern should match me"},
{aggregate, true, "!!![$Not_close_enough%] some other log line"},
}

for _, test := range tests {
context := &messageContext{
rawMessage: []byte(test.input),
label: aggregate,
}

assert.True(t, tokenizer.ProcessAndContinue(context))
assert.Equal(t, test.shouldStop, samples.ProcessAndContinue(context), "Expected stop %v, got %v", test.shouldStop, samples.ProcessAndContinue(context))
assert.Equal(t, test.expectedLabel, context.label, "Expected label %v, got %v", test.expectedLabel, context.label)
}
}

func TestUserPatternsProcessRegexCustomSettings(t *testing.T) {

datadogYaml := `
logs_config:
auto_multi_line_detection_custom_samples:
- regex: "(foo|bar)test\\d+"
label: no_aggregate
`

mockConfig := mock.NewFromYAML(t, datadogYaml)
samples := NewUserSamples(mockConfig)
tokenizer := NewTokenizer(60)

tests := []struct {
expectedLabel Label
shouldStop bool
input string
}{
{aggregate, true, ""},
{aggregate, true, "some random log line"},
{aggregate, true, "2023-03-28T14:33:53.743350Z App started successfully"},
{noAggregate, false, "footest123 some other log line"},
{noAggregate, false, "bartest123 some other log line"},
}

for _, test := range tests {
context := &messageContext{
rawMessage: []byte(test.input),
label: aggregate,
}

assert.True(t, tokenizer.ProcessAndContinue(context))
assert.Equal(t, test.shouldStop, samples.ProcessAndContinue(context), "Expected stop %v, got %v", test.shouldStop, samples.ProcessAndContinue(context))
assert.Equal(t, test.expectedLabel, context.label, "Expected label %v, got %v", test.expectedLabel, context.label)
}
}

0 comments on commit 01b950d

Please sign in to comment.