diff --git a/go.work b/go.work index d3bb1d0..4560742 100644 --- a/go.work +++ b/go.work @@ -2,6 +2,7 @@ go 1.19 use ( . - ./godeltaprof/ - ./godeltaprof/compat + godeltaprof + godeltaprof/compat + otelpyroscope ) diff --git a/go.work.sum b/go.work.sum index 3b7ea78..e7e4260 100644 --- a/go.work.sum +++ b/go.work.sum @@ -4,9 +4,13 @@ github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3I github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI= github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= +github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= +github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= github.com/gobwas/ws v1.2.1 h1:F2aeBZrm2NDsc7vbovKrWSogd4wvfAxg0FQ89/iqOTk= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/ianlancetaylor/demangle v0.0.0-20230524184225-eabc099b10ab h1:BA4a7pe6ZTd9F8kXETBoijjFJ/ntaa//1wiH9BZu4zU= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= @@ -15,9 +19,11 @@ github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhA github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= +golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck= golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= -golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= +golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8= golang.org/x/term v0.4.0 h1:O7UWfv5+A2qiuulQk30kVinPoMtoIPeVaKLEgLpVkvg= +golang.org/x/term v0.12.0 h1:/ZfYdc3zq+q02Rv9vGqTeSItdzZTSNDmfTi0mBAuidU= golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 h1:9zdDQZ7Thm29KFXgAX/+yaf3eVbP7djjWp/dXAppNCc= diff --git a/otelpyroscope/README.md b/otelpyroscope/README.md new file mode 100644 index 0000000..211678e --- /dev/null +++ b/otelpyroscope/README.md @@ -0,0 +1,22 @@ +# OpenTelemetry tracing integration + +The package provides means to integrate tracing with profiling. More specifically, a `TracerProvider` implementation, +that annotates profiling data with span IDs: when a new trace span emerges, the tracer adds a `span_id` [pprof tag](https://github.com/google/pprof/blob/master/doc/README.md#tag-filtering) +that points to the span. This makes it possible to filter out a profile of a particular trace span in [Pyroscope](https://pyroscope.io). + +## Example + +You can find a complete example setup in the [Pyroscope repository](https://github.com/grafana/pyroscope/tree/main/examples/tracing/tempo). + +## Other Notes + +Note that the module does not control `pprof` profiler itself – it still needs to be started for profiles to be +collected. This can be done either via `runtime/pprof` package, or using the [Pyroscope client](https://github.com/grafana/pyroscope-go). + +By default, only the root span gets labeled (the first span created locally): such spans are marked with the +`pyroscope.profiling.enabled` attribute. Please note that presence of the attribute does not indicate that the +span has a profile: stack trace samples might not be collected, if the actual utilized CPU time is less than the +sample interval (10ms). + +Limitations: + - Only CPU profiling is fully supported at the moment. diff --git a/otelpyroscope/go.mod b/otelpyroscope/go.mod new file mode 100644 index 0000000..e787e49 --- /dev/null +++ b/otelpyroscope/go.mod @@ -0,0 +1,16 @@ +module github.com/grafana/pyroscope-go/otelpyroscope + +go 1.19 + +require ( + go.opentelemetry.io/otel v1.19.0 + go.opentelemetry.io/otel/sdk v1.19.0 + go.opentelemetry.io/otel/trace v1.19.0 +) + +require ( + github.com/go-logr/logr v1.2.4 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + go.opentelemetry.io/otel/metric v1.19.0 // indirect + golang.org/x/sys v0.12.0 // indirect +) diff --git a/otelpyroscope/go.sum b/otelpyroscope/go.sum new file mode 100644 index 0000000..92c4a81 --- /dev/null +++ b/otelpyroscope/go.sum @@ -0,0 +1,20 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= +github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= +go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= +go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= +go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8= +go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o= +go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A= +go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= +go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= +golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/otelpyroscope/otelpyroscope.go b/otelpyroscope/otelpyroscope.go new file mode 100644 index 0000000..d174f7d --- /dev/null +++ b/otelpyroscope/otelpyroscope.go @@ -0,0 +1,173 @@ +package otelpyroscope + +import ( + "context" + "runtime/pprof" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" +) + +const ( + spanIDLabelName = "span_id" + spanNameLabelName = "span_name" +) + +var profilingEnabledSpanAttributeKey = attribute.Key("pyroscope.profiling.enabled") + +type Option func(*tracerProvider) + +// tracerProvider satisfies open telemetry TracerProvider interface. +type tracerProvider struct { + tp trace.TracerProvider + config config +} + +type config struct { + spanNameScope scope + spanIDScope scope +} + +// NewTracerProvider creates a new tracer provider that annotates pprof +// samples with span_id label. This allows to establish a relationship +// between pprof profiles and reported tracing spans. +func NewTracerProvider(tp trace.TracerProvider, options ...Option) trace.TracerProvider { + p := tracerProvider{ + tp: tp, + config: config{ + spanNameScope: scopeRootSpan, + spanIDScope: scopeRootSpan, + }, + } + for _, o := range options { + o(&p) + } + return &p +} + +func (w *tracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.Tracer { + return &profileTracer{p: w, tr: w.tp.Tracer(name, opts...)} +} + +type profileTracer struct { + p *tracerProvider + tr trace.Tracer +} + +func (w *profileTracer) Start(ctx context.Context, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { + ctx, span := w.tr.Start(ctx, spanName, opts...) + spanCtx := span.SpanContext() + addSpanIDLabel := w.p.config.spanIDScope != scopeNone && spanCtx.IsSampled() + addSpanNameLabel := w.p.config.spanNameScope != scopeNone && spanName != "" + if !(addSpanIDLabel || addSpanNameLabel) { + return ctx, span + } + + spanID := spanCtx.SpanID().String() + s := spanWrapper{ + Span: span, + ctx: ctx, + p: w.p, + } + + rs, ok := rootSpanFromContext(ctx) + if !ok { + // This is the first local span. + rs.id = spanID + rs.name = spanName + ctx = withRootSpan(ctx, rs) + } + + // We can't skip labeling goroutines, even if we use the + // parent's attributes, because the root span can finish + // before all the descendants started (and inherited the + // goroutine labels). + labels := make([]string, 0, 4) + if addSpanNameLabel { + if w.p.config.spanNameScope == scopeRootSpan { + spanName = rs.name + } + labels = append(labels, spanNameLabelName, spanName) + } + + if addSpanIDLabel { + if w.p.config.spanIDScope == scopeRootSpan { + spanID = rs.id + } + labels = append(labels, spanIDLabelName, spanID) + } + + // We mark spans with "pyroscope.profiling.enabled" attribute, + // only if they can have profiles. Note that the presence + // of the attribute does not indicate that we actually have + // collected any samples for the span. + if (w.p.config.spanIDScope == scopeRootSpan && spanID == rs.id) || + w.p.config.spanIDScope == scopeAllSpans { + span.SetAttributes(profilingEnabledSpanAttributeKey.Bool(true)) + } + + ctx = pprof.WithLabels(ctx, pprof.Labels(labels...)) + pprof.SetGoroutineLabels(ctx) + return ctx, &s +} + +type spanWrapper struct { + trace.Span + ctx context.Context + p *tracerProvider +} + +func (s spanWrapper) End(options ...trace.SpanEndOption) { + s.Span.End(options...) + pprof.SetGoroutineLabels(s.ctx) +} + +type rootSpanCtxKey struct{} + +type rootSpan struct { + id string + name string +} + +func withRootSpan(ctx context.Context, s rootSpan) context.Context { + return context.WithValue(ctx, rootSpanCtxKey{}, s) +} + +func rootSpanFromContext(ctx context.Context) (rootSpan, bool) { + s, ok := ctx.Value(rootSpanCtxKey{}).(rootSpan) + return s, ok +} + +// TODO(kolesnikovae): Make options public. + +// withSpanNameLabelScope specifies whether the current span name should be +// added to the profile labels. If the name is dynamic, i.e. includes +// span-specific identifiers, such as URL or SQL query, this may significantly +// deteriorate performance. +// +// By default, only the local root span name is recorded. Samples collected +// during the child span execution will be included into the root span profile. +func withSpanNameLabelScope(scope scope) Option { + return func(tp *tracerProvider) { + tp.config.spanNameScope = scope + } +} + +// withSpanIDScope specifies whether the current span ID should be added to +// the profile labels. +// +// By default, only the local root span ID is recorded. Samples collected +// during the child span execution will be included into the root span profile. +func withSpanIDScope(scope scope) Option { + return func(tp *tracerProvider) { + tp.config.spanNameScope = scope + } +} + +type scope uint + +const ( + scopeNone = iota + scopeRootSpan + scopeAllSpans +) diff --git a/otelpyroscope/otelpyroscope_test.go b/otelpyroscope/otelpyroscope_test.go new file mode 100644 index 0000000..5a62072 --- /dev/null +++ b/otelpyroscope/otelpyroscope_test.go @@ -0,0 +1,69 @@ +package otelpyroscope + +import ( + "context" + "runtime/pprof" + "testing" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/sdk/trace" +) + +func Test_tracerProvider(t *testing.T) { + otel.SetTracerProvider(NewTracerProvider(trace.NewTracerProvider())) + + tracer := otel.Tracer("") + labels := make(map[string]string) + + ctx, spanR := tracer.Start(context.Background(), "RootSpan") + pprof.ForLabels(ctx, func(key, value string) bool { + labels[key] = value + return true + }) + spanID, ok := labels[spanIDLabelName] + if !ok { + t.Fatal("span ID label not found") + } + if len(spanID) != 16 { + t.Fatalf("invalid span ID: %q", spanID) + } + name, ok := labels[spanNameLabelName] + if !ok { + t.Fatal("span name label not found") + } + if name != "RootSpan" { + t.Fatalf("invalid span name: %q", name) + } + + // Nested child span has the same labels. + ctx, spanA := tracer.Start(ctx, "SpanA") + pprof.ForLabels(ctx, func(key, value string) bool { + if v, ok := labels[key]; !ok || v != value { + t.Fatalf("nested span labels mismatch: %q=%q", key, value) + } + return true + }) + + spanA.End() + spanR.End() + + // Child span created after the root span end using its context. + ctx, spanB := tracer.Start(ctx, "SpanB") + pprof.ForLabels(ctx, func(key, value string) bool { + if v, ok := labels[key]; !ok || v != value { + t.Fatalf("nested span labels mismatch: %q=%q", key, value) + } + return true + }) + spanB.End() + + // A new root span. + ctx, spanC := tracer.Start(context.Background(), "SpanC") + pprof.ForLabels(ctx, func(key, value string) bool { + if v, ok := labels[key]; !ok || v == value { + t.Fatalf("unexpected match: %q=%q", key, value) + } + return true + }) + spanC.End() +}