Skip to content
Snippets Groups Projects
Commit 9f9f51ce authored by ale's avatar ale
Browse files

Add support for structured logs

All logs (structured and not) are stored in the same logs.syslog table.
parent f9457d7a
No related branches found
No related tags found
1 merge request!288Draft: Clickhouse as log-collector backend
Pipeline #54569 failed
...@@ -2,33 +2,27 @@ CREATE DATABASE IF NOT EXISTS `logs`; ...@@ -2,33 +2,27 @@ CREATE DATABASE IF NOT EXISTS `logs`;
SET allow_experimental_object_type=1; SET allow_experimental_object_type=1;
CREATE TABLE IF NOT EXISTS logs.structured (
timestamp Date,
event JSON
) ENGINE = MergeTree()
PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (timestamp);
CREATE TABLE IF NOT EXISTS logs.syslog ( CREATE TABLE IF NOT EXISTS logs.syslog (
timestamp Date, timestamp DateTime,
facility LowCardinality(String), facility LowCardinality(String),
severity LowCardinality(String), severity LowCardinality(String),
hostname LowCardinality(String), hostname LowCardinality(String),
program String, program String,
tag String, tag String,
message String message String,
data JSON
) ENGINE = MergeTree() ) ENGINE = MergeTree()
PARTITION BY toYYYYMMDD(timestamp) PARTITION BY toYYYYMMDD(timestamp)
ORDER BY (timestamp); ORDER BY (timestamp);
CREATE TABLE IF NOT EXISTS logs.http ( CREATE TABLE IF NOT EXISTS logs.http (
timestamp Date, timestamp DateTime,
hostname LowCardinality(String), hostname LowCardinality(String),
method LowCardinality(String), method LowCardinality(String),
vhost String, vhost String,
uri String, uri String,
status Integer, status UInt16,
bytes Integer, bytes UInt64,
referer String, referer String,
user_agent String user_agent String
) ENGINE = MergeTree() ) ENGINE = MergeTree()
......
...@@ -45,7 +45,7 @@ module( ...@@ -45,7 +45,7 @@ module(
load="omclickhouse" load="omclickhouse"
) )
template(name="clickhouseSyslog" type="list" option.stdsql="on") { template(name="clickhouseSyslog" type="list" option.stdsql="on") {
constant(value="INSERT INTO logs.syslog (timestamp, hostname, facility, severity, program, tag, message) VALUES ('") constant(value="INSERT INTO logs.syslog (timestamp, hostname, facility, severity, program, tag, message, data) VALUES ('")
property(name="timereported" dateFormat="pgsql" date.inUTC="on") property(name="timereported" dateFormat="pgsql" date.inUTC="on")
constant(value="','") constant(value="','")
property(name="hostname") property(name="hostname")
...@@ -59,6 +59,8 @@ template(name="clickhouseSyslog" type="list" option.stdsql="on") { ...@@ -59,6 +59,8 @@ template(name="clickhouseSyslog" type="list" option.stdsql="on") {
property(name="syslogtag") property(name="syslogtag")
constant(value="','") constant(value="','")
property(name="msg") property(name="msg")
constant(value="','")
property(name="$!")
constant(value="')") constant(value="')")
} }
...@@ -85,6 +87,10 @@ template(name="clickhouseHTTP" type="list" option.stdsql="on") { ...@@ -85,6 +87,10 @@ template(name="clickhouseHTTP" type="list" option.stdsql="on") {
} }
{% endif %} {% endif %}
module(
load="mmrm1stspace"
)
module( module(
load="mmjsonparse" load="mmjsonparse"
) )
...@@ -109,6 +115,8 @@ include( ...@@ -109,6 +115,8 @@ include(
# - autodetect Lumberjack structured logs and parse them # - autodetect Lumberjack structured logs and parse them
# - forward everything to Elasticsearch # - forward everything to Elasticsearch
ruleset(name="incoming"){ ruleset(name="incoming"){
action(type="mmrm1stspace")
# Anonymize logs here. # Anonymize logs here.
# TODO: whitelist the log sources that need anonymization (mail services). # TODO: whitelist the log sources that need anonymization (mail services).
action(type="mmanon" action(type="mmanon"
...@@ -129,26 +137,10 @@ ruleset(name="incoming"){ ...@@ -129,26 +137,10 @@ ruleset(name="incoming"){
stop stop
} }
if (substring($msg, 1, 5) == "@cee:") then {
action(type="mmjsonparse")
if ($syslogfacility-text == "auth" and $programname == "audit") then {
# Structured audit logs go to a dedicated Elasticsearch index.
stop
} else {
# Extension point for rules applying to structured logs.
include(
file="/etc/rsyslog-collector/rules-structured.d/*.conf"
mode="optional"
)
# Normal structured log present in the default syslog flow. Send
# straight to Elasticsearch, skipping the log normalization step.
stop
}
} else if ($syslogfacility-text == "local3") then {
# HTTP logs from the front-end. Run it through mmnormalize to # HTTP logs from the front-end. Run it through mmnormalize to
# convert the standard CommonLog format into JSON, then send it to # convert the standard CommonLog format into JSON, then send it to
# Elasticsearch. # indexing.
if ($syslogfacility-text == "local3") then {
action(type="mmnormalize" action(type="mmnormalize"
rulebase="/etc/rsyslog-collector-lognorm/http.rb") rulebase="/etc/rsyslog-collector-lognorm/http.rb")
# Anonymize sso_login requests by dropping the query string. # Anonymize sso_login requests by dropping the query string.
...@@ -171,11 +163,24 @@ ruleset(name="incoming"){ ...@@ -171,11 +163,24 @@ ruleset(name="incoming"){
queue.mindequeuebatchsize.timeout="3000" queue.mindequeuebatchsize.timeout="3000"
queue.filename="clickhouse-http" queue.filename="clickhouse-http"
action.resumeretrycount="-1") action.resumeretrycount="-1")
stop
}
# Structured logs and unstructured logs end up in the same indexed table,
# they differ just in the way the attributes are generated.
if (substring($msg, 0, 5) == "@cee:") then {
action(type="mmjsonparse")
unset $!msg;
# Extension point for rules applying to structured logs.
include(
file="/etc/rsyslog-collector/rules-structured.d/*.conf"
mode="optional"
)
} else { } else {
# Traditional syslog message. Run it through mmnormalize to # Traditional syslog message. Run it through mmnormalize to
# extract interesting bits of metadata according to user-defined # extract interesting bits of metadata according to user-defined
# patterns (a bit like logstash), then send the result as JSON to # patterns (a bit like logstash).
# Elasticsearch.
# Apply any blacklists first. # Apply any blacklists first.
{% for expr in log_collector_filter_exprs|default([]) %} {% for expr in log_collector_filter_exprs|default([]) %}
...@@ -196,13 +201,18 @@ ruleset(name="incoming"){ ...@@ -196,13 +201,18 @@ ruleset(name="incoming"){
rulebase="/etc/rsyslog-collector-lognorm/auth.rb") rulebase="/etc/rsyslog-collector-lognorm/auth.rb")
action(type="mmnormalize" action(type="mmnormalize"
rulebase="/etc/rsyslog-collector-lognorm/postfix.rb") rulebase="/etc/rsyslog-collector-lognorm/postfix.rb")
}
# Drop these fields as they're just duplicating the original message. # Drop these fields as they're just duplicating the original message.
unset $!originalmsg; unset $!originalmsg;
unset $!unparsed-data; unset $!unparsed-data;
# Slightly silly: we have to set a variable anyway in the # Slightly silly: we have to set a variable anyway in the
# resulting JSON otherwise the esTemplate won't be syntactially # resulting JSON otherwise the esTemplate won't be syntactially
# valid and ES will refuse it. # valid and ES will refuse it.
# set $!ignore = "1"; # set $!ignore = "1";
# Send the log to the index.
action(type="omclickhouse" action(type="omclickhouse"
server="127.0.0.1" server="127.0.0.1"
port="9780" port="9780"
...@@ -219,7 +229,6 @@ ruleset(name="incoming"){ ...@@ -219,7 +229,6 @@ ruleset(name="incoming"){
queue.mindequeuebatchsize.timeout="3000" queue.mindequeuebatchsize.timeout="3000"
queue.filename="clickhouse-syslog" queue.filename="clickhouse-syslog"
action.resumeretrycount="-1") action.resumeretrycount="-1")
}
{% endif %} {% endif %}
} }
......
...@@ -88,10 +88,14 @@ log-collector-e2e: ...@@ -88,10 +88,14 @@ log-collector-e2e:
scheduling_group: all scheduling_group: all
containers: containers:
- name: prober - name: prober
image: registry.git.autistici.org/ai3/tools/dye-injector:master image: registry.git.autistici.org/ai3/tools/dye-injector:clickhouse
port: 7094 port: 7094
env: env:
ADDR: ":7094" ADDR: ":7094"
DRIVER: "clickhouse"
CLICKHOUSE_ADDR: "log-collector.{{ domain }}:9700"
CLICKHOUSE_USER: "clickhouse"
CLICKHOUSE_PASSWORD: "{{ clickhouse_password }}"
monitoring_endpoints: monitoring_endpoints:
- name: log-collector-e2e-prober - name: log-collector-e2e-prober
port: 7094 port: 7094
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment