Commit f6076be6 authored by ale's avatar ale

Initial commit

parents
cgroups-exporter
================
A simple standalone daemon that reads cgroup resource accounting data
and exports it to Prometheus.
cgroups-exporter (0.1) unstable; urgency=low
* Initial Release.
-- ale <ale@incal.net> Wed, 09 Aug 2017 12:21:15 +0000
Source: cgroups-exporter
Section: admin
Priority: optional
Maintainer: ale <ale@incal.net>
Build-Depends: debhelper (>= 9), dh-golang, golang-go
Standards-Version: 3.9.5
Package: cgroups-exporter
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}
Description: Cgroups metrics exporter
Exports cgroups resource accounting metrics to Prometheus.
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: cgroups-exporter
Source: <url://example.com>
Files: *
Copyright: <years> <put author's name and email here>
<years> <likewise for another author>
License: GPL-3.0+
Files: debian/*
Copyright: 2017 ale <ale@desktop.m.investici.org>
License: GPL-3.0+
License: GPL-3.0+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
.
This package is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
.
On Debian systems, the complete text of the GNU General
Public License version 3 can be found in "/usr/share/common-licenses/GPL-3".
# Please also look if there are files or directories which have a
# different copyright/license attached and list them here.
# Please avoid to pick license terms that are more restrictive than the
# packaged work, as it may make Debian's contributions unacceptable upstream.
#!/usr/bin/make -f
DH_GOPKG = git.autistici.org/ale/cgroups-exporter
# main packaging script based on dh7 syntax
%:
dh $@ --buildsystem=golang
package main
import (
"bufio"
"bytes"
"flag"
"io"
"io/ioutil"
"log"
"net"
"net/http"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
"golang.org/x/net/context"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var addr = flag.String("addr", ":3909", "address to listen on")
func splitServiceName(path string) (string, string) {
slice, name := filepath.Split(path)
slice = strings.Trim(slice, "/")
return slice, name
}
func parseMapFile(path string) (map[string]int64, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
result := make(map[string]int64)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Bytes()
parts := bytes.Split(line, []byte(" "))
if len(parts) != 2 {
continue
}
value, err := strconv.ParseInt(string(parts[1]), 10, 64)
if err != nil {
continue
}
result[string(parts[0])] = value
}
return result, scanner.Err()
}
func parseBlkioMapFile(path string) (map[string]int64, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
// Aggregate counts by operation type (sum by device).
result := make(map[string]int64)
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Bytes()
parts := bytes.Split(line, []byte(" "))
if len(parts) != 3 {
continue
}
value, err := strconv.ParseInt(string(parts[2]), 10, 64)
if err != nil {
continue
}
result[string(parts[1])] += value
}
return result, scanner.Err()
}
func parseSingleValueFile(path string) (int64, error) {
data, err := ioutil.ReadFile(path)
if err != nil {
return 0, err
}
return strconv.ParseInt(string(data), 10, 64)
}
func cgroupStatPath(cgroupPath, collector, path string) string {
return filepath.Join("/sys/fs/cgroup", collector, cgroupPath, path)
}
type cpuParser struct {
desc *prometheus.Desc
}
func newCPUParser() *cpuParser {
return &cpuParser{
desc: prometheus.NewDesc(
"cgroup_cpu_usage",
"Cgroup CPU usage.",
[]string{"mode", "slice", "service"},
nil,
),
}
}
func (p *cpuParser) describe(ch chan<- *prometheus.Desc) {
ch <- p.desc
}
func (p *cpuParser) parse(path string) ([]prometheus.Metric, error) {
usage, err := parseMapFile(cgroupStatPath(path, "cpu,cpuacct", "cpuacct.stat"))
if err != nil {
return nil, err
}
slice, name := splitServiceName(path)
return []prometheus.Metric{
prometheus.MustNewConstMetric(
p.desc,
prometheus.GaugeValue,
float64(usage["user"]),
"user", slice, name,
),
prometheus.MustNewConstMetric(
p.desc,
prometheus.GaugeValue,
float64(usage["system"]),
"system", slice, name,
),
}, nil
}
type memoryParser struct {
desc *prometheus.Desc
}
func newMemoryParser() *memoryParser {
return &memoryParser{
desc: prometheus.NewDesc(
"cgroup_memory_usage",
"Cgroup memory usage (RSS, in bytes).",
[]string{"slice", "service"},
nil,
),
}
}
func (p *memoryParser) describe(ch chan<- *prometheus.Desc) {
ch <- p.desc
}
func (p *memoryParser) parse(path string) ([]prometheus.Metric, error) {
mstat, err := parseMapFile(cgroupStatPath(path, "memory", "memory.stat"))
if err != nil {
return nil, err
}
slice, name := splitServiceName(path)
return []prometheus.Metric{
prometheus.MustNewConstMetric(
p.desc,
prometheus.GaugeValue,
float64(mstat["total_rss"]),
slice, name,
),
}, nil
}
type blkioParser struct {
bytesDesc, latencyDesc *prometheus.Desc
}
func newBlkioParser() *blkioParser {
return &blkioParser{
bytesDesc: prometheus.NewDesc(
"cgroup_blkio_bytes",
"Bytes read/written by blkio.",
[]string{"mode", "slice", "service"},
nil,
),
latencyDesc: prometheus.NewDesc(
"cgroup_blkio_latency_ns",
"Average blkio operation latency (in nanoseconds).",
[]string{"mode", "slice", "service"},
nil,
),
}
}
func (p *blkioParser) describe(ch chan<- *prometheus.Desc) {
ch <- p.bytesDesc
ch <- p.latencyDesc
}
func (p *blkioParser) parse(path string) ([]prometheus.Metric, error) {
ops, err := parseBlkioMapFile(cgroupStatPath(path, "blkio", "blkio.io_serviced"))
if err != nil {
return nil, err
}
times, err := parseBlkioMapFile(cgroupStatPath(path, "blkio", "blkio.io_service_time"))
if err != nil {
return nil, err
}
totBytes, err := parseBlkioMapFile(cgroupStatPath(path, "blkio", "blkio.io_service_bytes"))
if err != nil {
return nil, err
}
slice, name := splitServiceName(path)
m := []prometheus.Metric{
prometheus.MustNewConstMetric(
p.bytesDesc,
prometheus.CounterValue,
float64(totBytes["Write"]),
"write", slice, name,
),
prometheus.MustNewConstMetric(
p.bytesDesc,
prometheus.CounterValue,
float64(totBytes["Read"]),
"read", slice, name,
),
}
// This is unfortunately an average.
if ops["Write"] > 0 {
m = append(m, prometheus.MustNewConstMetric(
p.latencyDesc,
prometheus.GaugeValue,
float64(times["Write"])/float64(ops["Write"]),
"write", slice, name,
))
}
if ops["Read"] > 0 {
m = append(m, prometheus.MustNewConstMetric(
p.latencyDesc,
prometheus.GaugeValue,
float64(times["Read"])/float64(ops["Read"]),
"read", slice, name,
))
}
return m, nil
}
type subsystem interface {
parse(string) ([]prometheus.Metric, error)
describe(chan<- *prometheus.Desc)
}
var subsystems = []subsystem{
newCPUParser(),
newMemoryParser(),
newBlkioParser(),
}
func walkCGroups() ([]prometheus.Metric, error) {
rootDir := "/sys/fs/cgroup/systemd"
var metrics []prometheus.Metric
err := filepath.Walk(rootDir, func(path string, info os.FileInfo, err error) error {
if err != nil || !info.IsDir() || !strings.HasSuffix(path, ".service") {
return nil
}
// Do not track systemd internal services.
if strings.HasPrefix(info.Name(), "systemd-") {
return nil
}
m, err := walkCGroup(path[len(rootDir)+1:])
if err != nil {
return nil
}
metrics = append(metrics, m...)
return nil
})
return metrics, err
}
func walkCGroup(path string) ([]prometheus.Metric, error) {
log.Printf("found service %s", path)
var metrics []prometheus.Metric
for _, s := range subsystems {
m, err := s.parse(path)
if err != nil {
log.Printf("service %s, subsystem %v: error: %v", path, s, err)
continue
}
metrics = append(metrics, m...)
}
return metrics, nil
}
// Keep a pre-rendered snapshot of all the metrics, so that scraping
// and updates can be independent of each other (but still serve a
// coherent view of all the metrics).
type collector struct {
mx sync.Mutex
metrics []prometheus.Metric
}
func (c *collector) Describe(ch chan<- *prometheus.Desc) {
for _, s := range subsystems {
s.describe(ch)
}
}
func (c *collector) update(metrics []prometheus.Metric) {
c.mx.Lock()
c.metrics = metrics
c.mx.Unlock()
}
func (c *collector) Collect(ch chan<- prometheus.Metric) {
c.mx.Lock()
defer c.mx.Unlock()
for _, m := range c.metrics {
ch <- m
}
}
func (c *collector) loop(ctx context.Context) {
ticker := time.NewTicker(60 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
if m, err := walkCGroups(); err == nil {
c.update(m)
}
}
}
}
func main() {
log.SetFlags(0)
flag.Parse()
// Create a very simple HTTP server that only exposes the
// Prometheus metrics handler.
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/" {
io.WriteString(w, `<html>
<body><h1>cgroups-exporter</h1><p><a href=\"/metrics\">/metrics</a></p>
</body>`)
} else {
http.NotFound(w, r)
}
})
// Set up the Listener separately to work around Go 1.7's lack
// of the http.Server.Close() function.
l, err := net.Listen("tcp", *addr)
if err != nil {
log.Fatal(err)
}
srv := &http.Server{
Handler: mux,
ReadTimeout: 10 * time.Second,
WriteTimeout: 20 * time.Second,
MaxHeaderBytes: 1 << 20,
}
// Create a cancelable Context and cancel it when we receive a
// termination signal. This will stop the metrics updater.
ctx, cancel := context.WithCancel(context.Background())
sigCh := make(chan os.Signal, 1)
go func() {
<-sigCh
cancel()
l.Close()
}()
signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGINT)
// Run the update loop in a goroutine.
c := &collector{}
go c.loop(ctx)
reg := prometheus.NewRegistry()
reg.MustRegister(c)
log.Fatal(srv.Serve(l))
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment