In observability, (distributed) tracing with its spans covering a range of time, has quite a significant overlap with logging. This is especially true if you buy into wide events where each span or log line carries a large amount of extra information in structured attributes.
All this got me wondering, why should I log everything two or more times? For example, in Go you pass the error to tracing, to logs, and maybe pass it back up the stack (whether you both log and pass back up is debatable):
1func foo(ctx context.Context) error {
2 ctx, span := tracer.Start(ctx, "foo")
3 defer span.End()
4
5 err := bar("fizzz")
6 if err != nil {
7 span.RecordError(err, trace.WithAttributes(
8 attribute.String("arg", "fizzz")
9 ))
10 span.SetStatus(codes.Error, "doing bar failed")
11 slog.Debug("doing bar failed", "err", err, slog.String("arg", "fizzz"))
12 return fmt.Errorf("doing bar failed: %w", err)
13 }
14
15 return nil
16}
What I also found to be an inconvenience was the differing attribute systems between tracing with OpenTelemetry and logging with log/slog. So it occurred to me that if I pass the same information to both, I could drive the lower fidelty logging from the information I pass to tracing.
The first point of decision is do we make a span exporter or processor? It didn't take too much thinking to conclude that a processor was more appropriate since it was somewhat closer to realtime rather than receiving batched events for export. This was something I considered important with logging as you want logs at the point of a crash.
The second point was do we take a slog.Logger or slog.Handler? I started with Logger, but soon realized I needed use the time recorded from events, so switched to Handler.
And finally, what level do we log at? I went with debug for most, info for span ends, and warn/error when a span was marked for error.
I think the output most works, but I did see some issues. First was the (potentially) out of order output of events in parallel: events are only processed on span end, so they may be delayed by quite a bit before they are output. Second was it was unclear how to log singular events, e.g. I like to log the port when an http server starts, but since an http server blocks until the end, where do I put a span. Plus, all the other systems that expect a simple logger, what do I give them (so far, it ends up being we still need a normal logger).
1package spanlog
2
3import (
4 "context"
5 "log/slog"
6 "path"
7
8 "go.opentelemetry.io/otel/attribute"
9 "go.opentelemetry.io/otel/codes"
10 "go.opentelemetry.io/otel/sdk/trace"
11 semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
12)
13
14var _ trace.SpanProcessor = &Processor{}
15
16// Processor is a [go.opentelemetry.io/otel/sdk/trace.SpanProcessor]
17// that converts span start/end and events into log lines.
18// Span start is [log/slog.LevelDebug].
19// Span end is [log/slog.LevelInfo] or [log/slog.LevelError] if the span status is
20// [go.opentelemetry.io/otel/codes.Error].
21// Events are normally [log/slog.LevelDebug], and [log/slog.LevelWarn] if the span status is
22// [go.opentelemetry.io/otel/codes.Error],
23// and [log/slog.LevelError] if created by [go.opentelemetry.io/otel/trace.Span.RecordError].
24type Processor struct {
25 // LogID controls whether span_id and trace_id are logged
26 LogID bool
27 // Handler is where the output is sent.
28 // Span metadata such as trace_id, span_id, span type, duration, etc.
29 // are grouped under "span".
30 // Span names are used as the message,
31 // span attributes grouped under the instrumentation scope base name.
32 Handler slog.Handler
33}
34
35func (p *Processor) appendCommon(attrs []slog.Attr, s trace.ReadOnlySpan) []slog.Attr {
36 if p.LogID {
37 attrs = append(attrs,
38 slog.String("trace_id", s.SpanContext().TraceID().String()),
39 slog.String("span_id", s.SpanContext().SpanID().String()),
40 )
41 }
42 return attrs
43}
44
45func (p *Processor) OnStart(parent context.Context, s trace.ReadWriteSpan) {
46 ctx := context.Background()
47 // span start: debug
48 if p.Handler.Enabled(ctx, slog.LevelDebug) {
49 attrs := p.appendCommon(make([]slog.Attr, 0, 10), s)
50 attrs = append(attrs,
51 slog.Group("span", slog.String("type", "start")),
52 slog.Any(path.Base(s.InstrumentationScope().Name), slog.GroupValue(attr2attr(s.Attributes())...)),
53 )
54 rec := slog.NewRecord(s.StartTime(), slog.LevelDebug, s.Name(), 0)
55 rec.AddAttrs(attrs...)
56 p.Handler.Handle(ctx, rec)
57 }
58}
59
60func (p *Processor) OnEnd(s trace.ReadOnlySpan) {
61 ctx := context.Background()
62
63 // events: debug or warn
64 level := slog.LevelDebug
65 if s.Status().Code == codes.Error {
66 level = slog.LevelWarn
67 }
68 if p.Handler.Enabled(ctx, level) {
69 attrs := p.appendCommon(make([]slog.Attr, 0, 10), s)
70 attrs = append(attrs, slog.Group("span", slog.String("type", "event")))
71 attrs = p.appendCommon(attrs, s)
72 h := p.Handler.WithAttrs(attrs).WithGroup(path.Base(s.InstrumentationScope().Name))
73 for _, event := range s.Events() {
74 level := level
75 if event.Name == semconv.ExceptionEventName {
76 level = slog.LevelError
77 }
78 rec := slog.NewRecord(event.Time, level, event.Name, 0)
79
80 rec.AddAttrs(attr2attr(event.Attributes)...)
81 h.Handle(ctx, rec)
82 }
83 }
84
85 // span end: info or error
86 level = slog.LevelInfo
87 if s.Status().Code == codes.Error {
88 level = slog.LevelError
89 }
90 if p.Handler.Enabled(ctx, level) {
91 spanAttrs := make([]slog.Attr, 0, 3)
92 spanAttrs = append(spanAttrs,
93 slog.String("type", "end"),
94 slog.Duration("duration", s.EndTime().Sub(s.StartTime())),
95 )
96 if s.Status().Code != codes.Unset {
97 spanAttrs = append(spanAttrs, slog.Group("status",
98 slog.String("code", s.Status().Code.String()),
99 slog.String("description", s.Status().Description),
100 ))
101 }
102
103 attrs := p.appendCommon(make([]slog.Attr, 0, 10), s)
104 attrs = append(attrs,
105 slog.Any("span", slog.GroupValue(spanAttrs...)),
106 slog.Any(path.Base(s.InstrumentationScope().Name), slog.GroupValue(attr2attr(s.Attributes())...)),
107 )
108 rec := slog.NewRecord(s.EndTime(), level, s.Name(), 0)
109 rec.AddAttrs(attrs...)
110 p.Handler.Handle(ctx, rec)
111 }
112}
113
114func (p *Processor) Shutdown(ctx context.Context) error { return nil }
115func (p *Processor) ForceFlush(ctx context.Context) error { return nil }
116
117func attr2attr(attrs []attribute.KeyValue) []slog.Attr {
118 out := make([]slog.Attr, 0, len(attrs))
119 for _, attr := range attrs {
120 switch attr.Value.Type() {
121 case attribute.BOOL:
122 out = append(out, slog.Bool(string(attr.Key), attr.Value.AsBool()))
123 case attribute.INT64:
124 out = append(out, slog.Int64(string(attr.Key), attr.Value.AsInt64()))
125 case attribute.FLOAT64:
126 out = append(out, slog.Float64(string(attr.Key), attr.Value.AsFloat64()))
127 case attribute.STRING:
128 out = append(out, slog.String(string(attr.Key), attr.Value.AsString()))
129 default:
130 out = append(out, slog.Any(string(attr.Key), attr.Value.AsInterface()))
131 }
132 }
133 return out
134}