Skip to content

Instantly share code, notes, and snippets.

@axw
Last active September 13, 2018 05:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save axw/236938de788a423f64687738aba246d3 to your computer and use it in GitHub Desktop.
Save axw/236938de788a423f64687738aba246d3 to your computer and use it in GitHub Desktop.
Diff adding Elastic APM tracing to Cockroach DB
diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html
index 68d2949eaa..84d0f67f56 100644
--- a/docs/generated/settings/settings.html
+++ b/docs/generated/settings/settings.html
@@ -77,6 +77,7 @@
<tr><td><code>timeseries.storage.30m_resolution_ttl</code></td><td>duration</td><td><code>2160h0m0s</code></td><td>the maximum age of time series data stored at the 30 minute resolution. Data older than this is subject to deletion.</td></tr>
<tr><td><code>timeseries.storage.enabled</code></td><td>boolean</td><td><code>true</code></td><td>if set, periodic timeseries data is stored within the cluster; disabling is not recommended unless you are storing the data elsewhere</td></tr>
<tr><td><code>trace.debug.enable</code></td><td>boolean</td><td><code>false</code></td><td>if set, traces for recent requests can be seen in the /debug page</td></tr>
+<tr><td><code>trace.elastic.server</code></td><td>string</td><td><code></code></td><td>if set, traces go to the given Elastic APM server; ignored itrace.lightstep.token or trace.zipkin.collector are set.</td></tr>
<tr><td><code>trace.lightstep.token</code></td><td>string</td><td><code></code></td><td>if set, traces go to Lightstep using this token</td></tr>
<tr><td><code>trace.zipkin.collector</code></td><td>string</td><td><code></code></td><td>if set, traces go to the given Zipkin instance (example: '127.0.0.1:9411'); ignored if trace.lightstep.token is set.</td></tr>
<tr><td><code>version</code></td><td>custom validation</td><td><code>2.0-13</code></td><td>set the active cluster version in the format '<major>.<minor>'.</td></tr>
diff --git a/pkg/util/tracing/shadow.go b/pkg/util/tracing/shadow.go
index 1239cc1232..813734b525 100644
--- a/pkg/util/tracing/shadow.go
+++ b/pkg/util/tracing/shadow.go
@@ -25,6 +25,10 @@ import (
"fmt"
"os"
+ "github.com/cockroachdb/cockroach/pkg/build"
+ elasticapm "github.com/elastic/apm-agent-go"
+ "github.com/elastic/apm-agent-go/module/apmot"
+ "github.com/elastic/apm-agent-go/transport"
lightstep "github.com/lightstep/lightstep-tracer-go"
opentracing "github.com/opentracing/opentracing-go"
zipkin "github.com/openzipkin/zipkin-go-opentracing"
@@ -57,6 +61,18 @@ func (m *zipkinManager) Close(tr opentracing.Tracer) {
_ = m.collector.Close()
}
+type elasticapmManager struct {
+ t *elasticapm.Tracer
+}
+
+func (elasticapmManager) Name() string {
+ return "elasticapm"
+}
+
+func (m elasticapmManager) Close(tr opentracing.Tracer) {
+ m.t.Close()
+}
+
type shadowTracer struct {
opentracing.Tracer
manager shadowTracerManager
@@ -110,6 +126,20 @@ func createLightStepTracer(token string) (shadowTracerManager, opentracing.Trace
})
}
+func createElasticAPMTracer(serverURL string) (shadowTracerManager, opentracing.Tracer) {
+ // TODO(axw) token config
+ transport, err := transport.NewHTTPTransport(serverURL, "")
+ if err != nil {
+ panic(err)
+ }
+ t, err := elasticapm.NewTracer("cockroach", build.GetInfo().Tag)
+ if err != nil {
+ panic(err)
+ }
+ t.Transport = transport
+ return elasticapmManager{t}, apmot.New(apmot.WithTracer(t))
+}
+
func createZipkinTracer(collectorAddr string) (shadowTracerManager, opentracing.Tracer) {
// Create our HTTP collector.
collector, err := zipkin.NewHTTPCollector(
diff --git a/pkg/util/tracing/tracer.go b/pkg/util/tracing/tracer.go
index 348c2017a8..dfd0d4bbd3 100644
--- a/pkg/util/tracing/tracer.go
+++ b/pkg/util/tracing/tracer.go
@@ -79,6 +79,12 @@ var zipkinCollector = settings.RegisterStringSetting(
envutil.EnvOrDefaultString("COCKROACH_TEST_ZIPKIN_COLLECTOR", ""),
)
+var elasticapmServer = settings.RegisterStringSetting(
+ "trace.elastic.server",
+ "if set, traces go to the given Elastic APM server; ignored itrace.lightstep.token or trace.zipkin.collector are set.",
+ envutil.EnvOrDefaultString("COCKROACH_TEST_ELASTIC_APM_SERVER", ""),
+)
+
// Tracer is our own custom implementation of opentracing.Tracer. It supports:
//
// - forwarding events to x/net/trace instances
@@ -134,6 +140,8 @@ func (t *Tracer) Configure(sv *settings.Values) {
t.setShadowTracer(createLightStepTracer(lsToken))
} else if zipkinAddr := zipkinCollector.Get(sv); zipkinAddr != "" {
t.setShadowTracer(createZipkinTracer(zipkinAddr))
+ } else if elasticServerURL := elasticapmServer.Get(sv); elasticServerURL != "" {
+ t.setShadowTracer(createElasticAPMTracer(elasticServerURL))
} else {
t.setShadowTracer(nil, nil)
}
@@ -149,6 +157,7 @@ func (t *Tracer) Configure(sv *settings.Values) {
enableNetTrace.SetOnChange(sv, reconfigure)
lightstepToken.SetOnChange(sv, reconfigure)
zipkinCollector.SetOnChange(sv, reconfigure)
+ elasticapmServer.SetOnChange(sv, reconfigure)
}
func (t *Tracer) useNetTrace() bool {
@axw
Copy link
Author

axw commented Sep 13, 2018

Example trace with two CockroachDB nodes, using httptrace:

httptrace -trace=fe34447cc6ae9812d404a9504abe602c
adopt-job (cockroach)
├── txn coordinator send (cockroach)
│   └── dist sender send (cockroach)
│       └── /cockroach.roachpb.Internal/Batch (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
├── txn coordinator send (cockroach)
│   └── dist sender send (cockroach)
│       └── /cockroach.roachpb.Internal/Batch (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
├── txn coordinator send (cockroach)
│   └── dist sender send (cockroach)
│       └── /cockroach.roachpb.Internal/Batch (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
├── sql txn (cockroach)
│   ├── txn coordinator send (cockroach)
│   │   └── dist sender send (cockroach)
│   │       └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   ├── flow (cockroach)
│   │   └── noop (cockroach)
│   ├── txn coordinator send (cockroach)
│   │   └── dist sender send (cockroach)
│   │       └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   ├── /cockroach.sql.distsqlrun.DistSQL/SetupFlow (cockroach)
│   │   └── /cockroach.sql.distsqlrun.DistSQL/SetupFlow (cockroach)
│   │       └── flow (cockroach)
│   │           ├── table reader (cockroach)
│   │           │   └── txn coordinator send (cockroach)
│   │           │       └── dist sender send (cockroach)
│   │           │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           ├── sortAll (cockroach)
│   │           └── index joiner (cockroach)
│   ├── txn coordinator send (cockroach)
│   │   └── dist sender send (cockroach)
│   │       └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   ├── txn coordinator send (cockroach)
│   ├── txn coordinator send (cockroach)
│   │   └── dist sender send (cockroach)
│   │       └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   └── txn coordinator send (cockroach)
│       └── dist sender send (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
│               └── /cockroach.roachpb.Internal/Batch (cockroach)
├── txn coordinator send (cockroach)
│   └── dist sender send (cockroach)
│       └── /cockroach.roachpb.Internal/Batch (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
├── txn coordinator send (cockroach)
│   └── dist sender send (cockroach)
│       └── /cockroach.roachpb.Internal/Batch (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
└── txn coordinator send (cockroach)

<orphaned>
└── [async] drain (cockroach)

Can't see it in the text as colour is lost, each of those seemingly duplicated spans starting with "/cockroach..." is a gRPC (client-span, transaction) pair.

@axw
Copy link
Author

axw commented Sep 13, 2018

This one's more interesting, as there are spans under the SetupFlow transaction:

httptrace -trace=ec14f2a64d60dead7a674fe1701f0ceb
adopt-job (cockroach)
├── sql txn (cockroach)
│   ├── txn coordinator send (cockroach)
│   │   └── dist sender send (cockroach)
│   │       └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   ├── txn coordinator send (cockroach)
│   │   └── dist sender send (cockroach)
│   │       └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   ├── /cockroach.sql.distsqlrun.DistSQL/SetupFlow (cockroach)
│   │   └── /cockroach.sql.distsqlrun.DistSQL/SetupFlow (cockroach)
│   │       └── flow (cockroach)
│   │           ├── sortAll (cockroach)
│   │           ├── table reader (cockroach)
│   │           │   └── txn coordinator send (cockroach)
│   │           │       └── dist sender send (cockroach)
│   │           │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── index joiner (cockroach)
│   ├── txn coordinator send (cockroach)
│   │   └── dist sender send (cockroach)
│   │       └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   ├── txn coordinator send (cockroach)
│   │   └── dist sender send (cockroach)
│   │       └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   ├── txn coordinator send (cockroach)
│   │   └── dist sender send (cockroach)
│   │       └── /cockroach.roachpb.Internal/Batch (cockroach)
│   │           └── /cockroach.roachpb.Internal/Batch (cockroach)
│   ├── flow (cockroach)
│   │   └── noop (cockroach)
│   └── txn coordinator send (cockroach)
├── txn coordinator send (cockroach)
│   └── dist sender send (cockroach)
│       └── /cockroach.roachpb.Internal/Batch (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
├── txn coordinator send (cockroach)
│   └── dist sender send (cockroach)
│       └── /cockroach.roachpb.Internal/Batch (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
├── txn coordinator send (cockroach)
│   └── dist sender send (cockroach)
│       └── /cockroach.roachpb.Internal/Batch (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
├── txn coordinator send (cockroach)
│   └── dist sender send (cockroach)
│       └── /cockroach.roachpb.Internal/Batch (cockroach)
│           └── /cockroach.roachpb.Internal/Batch (cockroach)
├── txn coordinator send (cockroach)
└── txn coordinator send (cockroach)
    └── dist sender send (cockroach)
        └── /cockroach.roachpb.Internal/Batch (cockroach)
            └── /cockroach.roachpb.Internal/Batch (cockroach)

<orphaned>
└── [async] drain (cockroach)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment