Skip to content

Commit

Permalink
Auto-create tracing spans for log groups
Browse files Browse the repository at this point in the history
This will make it easy for CI users to trace and analyze their CI jobs
without requiring additional work or scripting on their part. They can
just look at tracing information on their tracing provider.
  • Loading branch information
goodspark committed Aug 22, 2023
1 parent d3d0341 commit 4ee6553
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 4 deletions.
1 change: 1 addition & 0 deletions agent/agent_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,5 @@ type AgentConfiguration struct {
AcquireJob string
TracingBackend string
TracingServiceName string
TraceLogGroups bool
}
13 changes: 12 additions & 1 deletion clicommand/agent_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ type AgentStartConfig struct {
MetricsDatadogDistributions bool `cli:"metrics-datadog-distributions"`
TracingBackend string `cli:"tracing-backend"`
TracingServiceName string `cli:"tracing-service-name"`
TraceLogGroups bool `cli:"trace-log-groups"`

// Global flags
Debug bool `cli:"debug"`
Expand Down Expand Up @@ -582,6 +583,11 @@ var AgentStartCommand = cli.Command{
EnvVar: "BUILDKITE_TRACING_SERVICE_NAME",
Value: "buildkite-agent",
},
cli.BoolFlag{
Name: "trace-log-groups",
Usage: "Automatically creates tracing spans for log groups when tracing is enabled.",
EnvVar: "BUILDKITE_TRACE_LOG_GROUPS",
},
cli.StringFlag{
Name: "job-verification-key-path",
Usage: "Path to a file containing a verification key. Passing this flag enables job verification. For hmac-sha256, the raw file content is used as the shared key",
Expand Down Expand Up @@ -800,9 +806,13 @@ var AgentStartCommand = cli.Command{
})

// Sense check supported tracing backends, we don't want bootstrapped jobs to silently have no tracing
if _, has := tracetools.ValidTracingBackends[cfg.TracingBackend]; !has {
_, tracingEnabled := tracetools.ValidTracingBackends[cfg.TracingBackend]
if !tracingEnabled {
l.Fatal("The given tracing backend %q is not supported. Valid backends are: %q", cfg.TracingBackend, maps.Keys(tracetools.ValidTracingBackends))
}
if !tracingEnabled && cfg.TraceLogGroups {
l.Fatal("Log group tracing cannot be enabled without a tracing backend.")
}

if experiments.IsEnabled(experiments.AgentAPI) {
shutdown := runAgentAPI(ctx, l, cfg.SocketsPath)
Expand Down Expand Up @@ -864,6 +874,7 @@ var AgentStartCommand = cli.Command{
AcquireJob: cfg.AcquireJob,
TracingBackend: cfg.TracingBackend,
TracingServiceName: cfg.TracingServiceName,
TraceLogGroups: cfg.TraceLogGroups,
JobVerificationNoSignatureBehavior: cfg.JobVerificationNoSignatureBehavior,
JobVerificationInvalidSignatureBehavior: cfg.JobVerificationInvalidSignatureBehavior,

Expand Down
7 changes: 7 additions & 0 deletions clicommand/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ type BootstrapConfig struct {
RedactedVars []string `cli:"redacted-vars" normalize:"list"`
TracingBackend string `cli:"tracing-backend"`
TracingServiceName string `cli:"tracing-service-name"`
TraceLogGroups bool `cli:"trace-log-groups"`
}

var BootstrapCommand = cli.Command{
Expand Down Expand Up @@ -353,6 +354,11 @@ var BootstrapCommand = cli.Command{
EnvVar: "BUILDKITE_TRACING_SERVICE_NAME",
Value: "buildkite-agent",
},
cli.BoolFlag{
Name: "trace-log-groups",
Usage: "Automatically creates tracing spans for log groups when tracing is enabled.",
EnvVar: "BUILDKITE_TRACE_LOG_GROUPS",
},
DebugFlag,
LogLevelFlag,
ExperimentsFlag,
Expand Down Expand Up @@ -437,6 +443,7 @@ var BootstrapCommand = cli.Command{
Tag: cfg.Tag,
TracingBackend: cfg.TracingBackend,
TracingServiceName: cfg.TracingServiceName,
TraceLogGroups: cfg.TraceLogGroups,
})

cctx, cancel := context.WithCancel(ctx)
Expand Down
3 changes: 3 additions & 0 deletions internal/job/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ type ExecutorConfig struct {

// Service name to use when reporting traces.
TracingServiceName string

// Whether to auto-create tracing spans for log groups.
TraceLogGroups bool
}

// ReadFromEnvironment reads configuration from the Environment, returns a map
Expand Down
1 change: 1 addition & 0 deletions internal/job/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ func (e *Executor) Run(ctx context.Context) (exitCode int) {
e.shell.Debug = e.ExecutorConfig.Debug
e.shell.InterruptSignal = e.ExecutorConfig.CancelSignal
e.shell.SignalGracePeriod = e.ExecutorConfig.SignalGracePeriod
e.shell.TraceLogGroups = e.ExecutorConfig.TraceLogGroups
}
if experiments.IsEnabled(experiments.KubernetesExec) {
kubernetesClient := &kubernetes.Client{}
Expand Down
37 changes: 34 additions & 3 deletions internal/job/shell/shell.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ type Shell struct {

// Amount of time to wait between sending the InterruptSignal and SIGKILL
SignalGracePeriod time.Duration

// Whether to auto create tracing spans for log groups
TraceLogGroups bool
}

// New returns a new Shell
Expand Down Expand Up @@ -532,6 +535,32 @@ func round(d time.Duration) time.Duration {
}
}

// spanMakerWriter is an io.Writer shim that captures logs and automatically creates trace spans for the log group.
type spanMakerWriter struct {
w io.Writer
ctx context.Context
span opentracing.Span
}

func (s *spanMakerWriter) Write(p []byte) (n int, err error) {
if bytes.HasPrefix(p, []byte("~~~ ")) || bytes.HasPrefix(p, []byte("--- ")) || bytes.HasPrefix(p, []byte("+++ ")) {
s.FinishIfActive()
operationName, _, _ := strings.Cut(string(p[4:]), "\r\n")
// We don't store the context bc we don't need to create child spans (yet). If we stored it, every log group would
// look like a child of the previous log group, where they're all more like siblings under the same parent span,
// since Buildkite itself has no concept of log group hierarchy.
s.span, _ = opentracing.StartSpanFromContext(s.ctx, operationName)
}
return s.w.Write(p)
}

func (s *spanMakerWriter) FinishIfActive() {
if s.span != nil {
s.span.Finish()
s.span = nil
}
}

func (s *Shell) executeCommand(
ctx context.Context,
cmd *command,
Expand All @@ -542,6 +571,8 @@ func (s *Shell) executeCommand(
tracedEnv := env.FromSlice(cmd.Env)
s.injectTraceCtx(ctx, tracedEnv)
cmd.Env = tracedEnv.ToSlice()
logToSpanWriter := &spanMakerWriter{w: w, ctx: ctx, span: nil}
defer func() { logToSpanWriter.FinishIfActive() }()

s.cmdLock.Lock()
s.cmd = cmd
Expand All @@ -561,11 +592,11 @@ func (s *Shell) executeCommand(
// Modify process config based on execution flags
if flags.PTY {
cfg.PTY = true
cfg.Stdout = w
cfg.Stdout = logToSpanWriter
} else {
// Show stdout if requested or via debug
if flags.Stdout {
cfg.Stdout = w
cfg.Stdout = logToSpanWriter
} else if s.Debug {
stdOutStreamer := NewLoggerStreamer(s.Logger)
defer stdOutStreamer.Close()
Expand All @@ -574,7 +605,7 @@ func (s *Shell) executeCommand(

// Show stderr if requested or via debug
if flags.Stderr {
cfg.Stderr = w
cfg.Stderr = logToSpanWriter
} else if s.Debug {
stdErrStreamer := NewLoggerStreamer(s.Logger)
defer stdErrStreamer.Close()
Expand Down

0 comments on commit 4ee6553

Please sign in to comment.