From 67d1893281c9c5a455e5c9fbce8224f8039c04d5 Mon Sep 17 00:00:00 2001 From: ale Date: Sun, 14 Apr 2019 08:02:04 +0100 Subject: [PATCH] Add some instrumentation to the statusManager --- node/instrumentation.go | 14 ++++++++++++++ node/status.go | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/node/instrumentation.go b/node/instrumentation.go index 3791be9..cca48e3 100644 --- a/node/instrumentation.go +++ b/node/instrumentation.go @@ -52,6 +52,20 @@ var ( }, ) + // Status protocol (gossip) metrics. + gossipNumNodes = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "gossip_peer_count", + Help: "Number of peers seen by the gossip protocol.", + }, + ) + gossipOldestTS = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "gossip_oldest_ts", + Help: "Timestamp of the oldest update from any active peer.", + }, + ) + // Descriptors for the nodeCollector below. numListenersDesc = prometheus.NewDesc( "status_num_listeners", diff --git a/node/status.go b/node/status.go index 014e5cf..3083480 100644 --- a/node/status.go +++ b/node/status.go @@ -149,6 +149,24 @@ func (m *statusManager) mergeRemoteStatuses(remote []*pb.Status) { m.statuses = s } +// Export the number of known nodes, and the timestamp of the oldest +// update seen, to monitoring, so we can have a rough idea of when the +// gossip protocol isn't working. +func (m *statusManager) updateMetrics() { + m.mx.Lock() + var oldest uint64 + for _, s := range m.statuses { + t := s.Timestamp + if oldest == 0 || t < oldest { + oldest = t + } + } + + gossipNumNodes.Set(float64(len(m.statuses))) + gossipOldestTS.Set(float64(oldest)) + m.mx.Unlock() +} + func (m *statusManager) tick(ctx context.Context) { pctx, cancel := context.WithTimeout(ctx, gossipTimeout) defer cancel() @@ -157,6 +175,8 @@ func (m *statusManager) tick(ctx context.Context) { if err != nil && err != context.Canceled { log.Printf("status: gossip error: %v", err) } + + m.updateMetrics() } func (m *statusManager) Exchange(ctx context.Context, req *pb.ExchangeRequest) (*pb.ExchangeResponse, error) { -- GitLab