SEANK.H.LIAO

metrics and tracing

metrics and tracing with prometheus, jaeger, opentracing, opencensus, and opentelemetry

metrics and tracing

So you created a mess of services that talk to each other and now you want to know what actually happens.

defs

metrics are...

counters providing an aggregate view of events

tracing is...

records of individual execuation

parts

there are 2 parts:

collectors

These are separate applications that run to collect metrics/traces. Usually the things with name recognition, ex prometheus for metrics and jaeger for tracing

TODO: look at opentelemetry collector for collecting all metrics / traces and reexport them to collectors

sdk/client libraries

these run as part of application code and generate/expose metrics/traces. Subject of ongoing standardization, ex

OpenCensus and OpenTelemetry implement metrics and tracing themselves and just expose tracing / metrics in a way the collectors can understand, no official clients required (might not always be true)

examples

with different library combinatioons:

note: there are 3 main methods of trace propagation

Prometheus / Jaeger libraries

using prometheus/client_golang and jaegertracing/jaeger-client-go

jaeger officially uses opentracing/opentracing-go

only way to push metrics to prometheus pushgateway

 1package main
 2
 3import (
 4        "net/http"
 5
 6        "github.com/opentracing/opentracing-go"
 7        "github.com/opentracing/opentracing-go/ext"
 8        "github.com/prometheus/client_golang/prometheus"
 9        "github.com/prometheus/client_golang/prometheus/promauto"
10        "github.com/prometheus/client_golang/prometheus/promhttp"
11        "github.com/prometheus/client_golang/prometheus/push"
12        "github.com/uber/jaeger-client-go"
13        jaegercfg "github.com/uber/jaeger-client-go/config"
14        jprom "github.com/uber/jaeger-lib/metrics/prometheus"
15)
16
17func ExamplePrometheusJaeger() {
18        // stats: prometheus exporter
19        http.Handle("/metrics", promhttp.Handler())
20
21        // stats: custom
22        counter0 := promauto.NewCounter(prometheus.CounterOpts{
23                Name: "myapp_processed_ops_total",
24                Help: "The total number of processed events",
25        })
26
27        // trace: jaeger exporter
28        // or FromEnv()
29        cfg := jaegercfg.Configuration{
30                ServiceName: "service",
31                Sampler: &jaegercfg.SamplerConfig{
32                        Type:  jaeger.SamplerTypeConst,
33                        Param: 1,
34                },
35        }
36        tracer, closer, _ := cfg.NewTracer(jaegercfg.Metrics(jprom.New()))
37        defer closer.Close()
38        opentracing.InitGlobalTracer(tracer)
39
40        http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
41                // extract span from incoming context
42                spanctx, _ := tracer.Extract(opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(r.Header))
43                span := tracer.StartSpan("child", ext.RPCServerOption(spanctx))
44                defer span.Finish()
45
46                // update metrics
47                counter0.Inc()
48
49                // propagate trace into outgoing request
50                req, _ := http.NewRequest("GET", "https://opencensus.io/", nil)
51                tracer.Inject(span.Context(), opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(req.Header))
52
53                http.DefaultClient.Do(req)
54        })
55
56        // serve
57        http.ListenAndServe(":8080", nil)
58
59        // or push metrics to pushgateway
60        pusher := push.New("push-gateway:1234", "job-name").Gatherer(prometheus.DefaultGatherer)
61        pusher.Push()
62}

OpenCensus

using go.opencensus.io

Metrics looks clunky without increment/decrement. Also have to replace http client/server

 1package main
 2
 3import (
 4        "net/http"
 5
 6        "contrib.go.opencensus.io/exporter/jaeger"
 7        "contrib.go.opencensus.io/exporter/prometheus"
 8        "go.opencensus.io/plugin/ochttp"
 9        "go.opencensus.io/stats"
10        "go.opencensus.io/stats/view"
11        "go.opencensus.io/trace"
12)
13
14func ExampleOpenCensus() {
15        // stats: prometheus exporter
16        promexp, _ := prometheus.NewExporter(prometheus.Options{})
17        http.Handle("/metrics", promexp)
18
19        // stats: ochttp defaults
20        view.Register(ochttp.DefaultClientViews...)
21        view.Register(ochttp.DefaultServerViews...)
22
23        // stats: custom
24        float0 := stats.Float64("float0", "A float", "ms")
25
26        // trace: jaeger exporter
27        jaegerexp, _ := jaeger.NewExporter(jaeger.Options{
28                AgentEndpoint: "localhost:6831",
29                ServiceName:   "demo",
30        })
31        trace.RegisterExporter(jaegerexp)
32        trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()})
33
34        // propagation
35        client := &http.Client{Transport: &ochttp.Transport{}}
36        handler := &ochttp.Handler{Handler: http.DefaultServeMux}
37
38        http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
39                // extract span from incoming request
40                ctx, span := trace.StartSpan(r.Context(), "span-name")
41                defer span.End()
42
43                // update metrics
44                stats.Record(ctx, float0.M(1.2))
45
46                // propagate trace into outgoing request
47                req, _ := http.NewRequest("GET", "https://opencensus.io/", nil)
48                req = req.WithContext(ctx)
49
50                client.Do(req)
51        })
52
53        http.ListenAndServe(":8080", handler)
54}

OpenTelemetry

using go.opentelemetry.io/otel

Okayish but api feels unstable.

 1package main
 2
 3import (
 4        "net/http"
 5
 6        "go.opentelemetry.io/otel/api/global"
 7        "go.opentelemetry.io/otel/api/metric"
 8        "go.opentelemetry.io/otel/api/propagation"
 9        "go.opentelemetry.io/otel/exporters/metric/prometheus"
10        "go.opentelemetry.io/otel/exporters/trace/jaeger"
11        sdktrace "go.opentelemetry.io/otel/sdk/trace"
12)
13
14func ExampleOpenTelemetry() {
15        // stats: prometheus exporter
16        controller, promhandle, _ := prometheus.InstallNewPipeline(prometheus.Config{})
17        defer controller.Stop()
18        http.Handle("/metrics", promhandle)
19
20        // stats: custom
21        meter := global.Meter("service")
22        counter0 := metric.Must(meter).NewInt64Counter("counter0")
23
24        // trace: jaeger exporter
25        _, flush, _ := jaeger.NewExportPipeline(
26                jaeger.WithCollectorEndpoint("http://localhost:14268/api/traces"),
27                jaeger.RegisterAsGlobal(),
28                jaeger.WithSDK(&sdktrace.Config{
29                        DefaultSampler: sdktrace.AlwaysSample(),
30                }),
31        )
32        defer flush()
33        tracer := global.Tracer("service")
34
35        http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
36                // extract span from incoming request
37                ctx := propagation.ExtractHTTP(r.Context(), global.Propagators(), r.Header)
38                ctx, span := tracer.Start(ctx, "span")
39                defer span.End()
40
41                // update metrics
42                counter0.Add(ctx, 1)
43
44                // propagate trace into outgoing request
45                req, _ := http.NewRequest("GET", "https://opencensus.io/", nil)
46                propagation.InjectHTTP(ctx, global.Propagators(), req.Header)
47
48                http.DefaultClient.Do(req)
49        })
50
51        http.ListenAndServe(":8080", handler)
52}