diff --git a/README.md b/README.md index cd86f54a25b81f666bf131472c8f6c76c9259011..38df0aea42386b1cf35e1049ad682174cfb9b19f 100644 --- a/README.md +++ b/README.md @@ -7,16 +7,22 @@ Supports receiving reports over HTTPS for the following mechanisms: * [Browser Reporting API (NEL, CSP, etc)](https://www.w3.org/TR/reporting/) * [TLS-RPT](https://tools.ietf.org/html/rfc8460) -* [DMARC](https://tools.ietf.org/html/rfc7489) **(TODO)** +* [DMARC](https://tools.ietf.org/html/rfc7489) The reports are logged in structured denormalized form to standard output, the idea being that a log collection pipeline would be in place to get them into Elasticsearch or some other index. +The source IP address is not included in the output, but for user +reports we add instead the ASN, to allow for some meaningful +non-deanonymizing aggregation of the reports. + Things that are still to do: -* Add support for DMARC reports -* Augment records with additional information such as AS / GeoIP that - would be useful in aggregated views (also drop the user IP at that - point) +* More testing with real-world reports * Maybe add support for email ingestion + +Right now the server offers a single intake endpoint at */ingest/v1*, +and then looks at the Content-Type of the request to figure out what +kind of report it is. It might be just simpler to switch to separate +endpoints per report type. diff --git a/browser.go b/browser.go index 00f90ced8430b5f880df471ac681c67ab1f698d1..36da13e0b63f0a9295ac23544be6fc433e2fe975 100644 --- a/browser.go +++ b/browser.go @@ -31,15 +31,18 @@ func (h *ReportHandler) Parse(contentType string, req *http.Request) ([]Event, e var events []Event for _, r := range reports { - events = append(events, h.eventFromReport(r)) + events = append(events, h.eventFromReport(req, r)) } return events, nil } -func (h *ReportHandler) eventFromReport(report *report) Event { +func (h *ReportHandler) eventFromReport(req *http.Request, report *report) Event { ts := time.Now().Add(time.Duration(-report.Age) * time.Second) e := make(Event) + if asn, ok := lookupASN(getRemoteIP(req)); ok { + e.Set("asn", asn) + } e.Set("type", report.Type) e.Set("event_timestamp", ts) e.Set("url", report.URL) diff --git a/cmd/reports-collector/main.go b/cmd/reports-collector/main.go index d53649e138ed1ca30f718872b37cac66190882c3..f0cc25a5a9086ef2ae2a405ec9b6483ffeacd326 100644 --- a/cmd/reports-collector/main.go +++ b/cmd/reports-collector/main.go @@ -35,6 +35,7 @@ func main() { new(rc.LogSink), new(rc.ReportHandler), new(rc.TLSRPTHandler), + new(rc.DMARCHandler), ) // Create the http.Server. diff --git a/collector.go b/collector.go index 21d06a64df9deb4badd99eede2404c9423c8956f..f90447a79aa6f33ef1d5d9968b806a1bd5a327b7 100644 --- a/collector.go +++ b/collector.go @@ -76,11 +76,7 @@ hloop: // Augment the Events with additional information obtained // from the HTTP request, and send them to the forwarder. - ip := getRemoteIP(req) for _, e := range events { - if asn, ok := lookupASN(ip); ok { - e.Set("asn", asn) - } c.sink.Send(e) reportsByType.WithLabelValues( e.GetString("type"), e.GetString("domain")).Inc() diff --git a/collector_test.go b/collector_test.go index 74c9cdac11fd0b60a41e84179976d53da61d3548..d4c05aa9192840a5795d97209171fb4570efe463 100644 --- a/collector_test.go +++ b/collector_test.go @@ -15,6 +15,37 @@ func (c *countingSink) Send(e Event) { c.counter++ } +func createTestCollector() (*countingSink, string, func()) { + sink := new(countingSink) + c := NewCollector( + sink, + new(ReportHandler), + new(TLSRPTHandler), + new(DMARCHandler), + ) + srv := httptest.NewServer(c) + return sink, srv.URL, func() { + srv.Close() + } +} + +func doRequest(t *testing.T, uri, contentType, data string) { + req, err := http.NewRequest("POST", uri, strings.NewReader(data)) + if err != nil { + t.Fatalf("NewRequest failed: %v", err) + } + req.Header.Set("Content-Type", contentType) + client := new(http.Client) + resp, err := client.Do(req) + if err != nil { + t.Fatalf("request failed: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode >= 300 { + t.Fatalf("request failed with status %d: %s", resp.StatusCode, resp.Status) + } +} + var reportsTestData = `[{ "type": "csp", "age": 10, @@ -59,29 +90,10 @@ var reportsTestData = `[{ }]` func TestBrowserReports(t *testing.T) { - sink := new(countingSink) - c := NewCollector( - sink, - new(ReportHandler), - new(TLSRPTHandler), - ) - srv := httptest.NewServer(c) - defer srv.Close() + sink, uri, cleanup := createTestCollector() + defer cleanup() - req, err := http.NewRequest("POST", srv.URL, strings.NewReader(reportsTestData)) - if err != nil { - t.Fatalf("NewRequest failed: %v", err) - } - req.Header.Set("Content-Type", "application/reports+json") - client := new(http.Client) - resp, err := client.Do(req) - if err != nil { - t.Fatalf("request failed: %v", err) - } - if resp.StatusCode >= 300 { - t.Fatalf("request failed with status %d: %s", resp.StatusCode, resp.Status) - } - defer resp.Body.Close() + doRequest(t, uri, "application/reports+json", reportsTestData) if sink.counter != 3 { t.Fatalf("parsed %d events, expected 3", sink.counter) @@ -131,32 +143,148 @@ var tlsrptTestData = `{ }] }` -func TestTLSRPTReports(t *testing.T) { - sink := new(countingSink) - c := NewCollector( - sink, - new(ReportHandler), - new(TLSRPTHandler), - ) - srv := httptest.NewServer(c) - defer srv.Close() +func TestTLSRPT(t *testing.T) { + sink, uri, cleanup := createTestCollector() + defer cleanup() - req, err := http.NewRequest("POST", srv.URL, strings.NewReader(tlsrptTestData)) - if err != nil { - t.Fatalf("NewRequest failed: %v", err) - } - req.Header.Set("Content-Type", "application/tlsrpt+json") - client := new(http.Client) - resp, err := client.Do(req) - if err != nil { - t.Fatalf("request failed: %v", err) - } - if resp.StatusCode >= 300 { - t.Fatalf("request failed with status %d: %s", resp.StatusCode, resp.Status) - } - defer resp.Body.Close() + doRequest(t, uri, "application/tlsrpt+json", tlsrptTestData) if sink.counter != 3 { t.Fatalf("parsed %d events, expected 3", sink.counter) } } + +var dmarcTestData1 = `<?xml version="1.0"?> +<feedback> + <version>0.1</version> + <report_metadata> + <org_name>AMAZON-SES</org_name> + <email>postmaster@amazonses.com</email> + <report_id>bf9cb2b6-cdd5-49c3-bfb4-38cc8c29b8e4</report_id> + <date_range> + <begin>1602806400</begin> + <end>1602892800</end> + </date_range> + </report_metadata> + <policy_published> + <domain>example.com</domain> + <adkim>r</adkim> + <aspf>r</aspf> + <p>none</p> + <sp>none</sp> + <pct>0</pct> + <fo>0</fo> + </policy_published> + <record> + <row> + <source_ip>1.2.3.4</source_ip> + <count>2</count> + <policy_evaluated> + <disposition>none</disposition> + <dkim>fail</dkim> + <spf>fail</spf> + </policy_evaluated> + </row> + <identifiers> + <envelope_from>example.com</envelope_from> + <header_from>example.com</header_from> + </identifiers> + <auth_results> + <spf> + <domain>example.com</domain> + <result>fail</result> + </spf> + </auth_results> + </record> +</feedback>` + +var dmarcTestData2 = `<?xml version="1.0" encoding="UTF-8" ?> +<feedback> + <report_metadata> + <org_name>vodafone.it</org_name> + <email>noreply-dmarc-support@vodafone.it</email> + <report_id>lists.example.com:1602943261</report_id> + <date_range> + <begin>1602857214</begin> + <end>1602940407</end> + </date_range> + </report_metadata> + <policy_published> + <domain>lists.example.com</domain> + <adkim>r</adkim> + <aspf>r</aspf> + <p>none</p> + <sp>none</sp> + <pct>0</pct> + </policy_published> + <record> + <row> + <source_ip>1.2.3.4</source_ip> + <count>1</count> + <policy_evaluated> + <disposition>none</disposition> + <dkim>pass</dkim> + <spf>pass</spf> + </policy_evaluated> + </row> + <identifiers> + <header_from>lists.example.com</header_from> + </identifiers> + <auth_results> + <spf> + <domain>lists.example.com</domain> + <result>pass</result> + </spf> + <dkim> + <domain>lists.example.com</domain> + <result>pass</result> + </dkim> + </auth_results> + </record> + <record> + <row> + <source_ip>1.2.3.4</source_ip> + <count>1</count> + <policy_evaluated> + <disposition>none</disposition> + <dkim>pass</dkim> + <spf>pass</spf> + </policy_evaluated> + </row> + <identifiers> + <header_from>lists.example.com</header_from> + </identifiers> + <auth_results> + <spf> + <domain>lists.example.com</domain> + <result>pass</result> + </spf> + <dkim> + <domain>lists.example.com</domain> + <result>pass</result> + </dkim> + </auth_results> + </record> +</feedback>` + +func TestDMARC(t *testing.T) { + sink, uri, cleanup := createTestCollector() + defer cleanup() + + doRequest(t, uri, "text/xml", dmarcTestData1) + + if sink.counter != 1 { + t.Fatalf("parsed %d events, expected 1", sink.counter) + } +} + +func TestDMARC_IgnoreSuccesses(t *testing.T) { + sink, uri, cleanup := createTestCollector() + defer cleanup() + + doRequest(t, uri, "text/xml", dmarcTestData2) + + if sink.counter != 0 { + t.Fatalf("parsed %d events, expected 0", sink.counter) + } +} diff --git a/dmarc.go b/dmarc.go new file mode 100644 index 0000000000000000000000000000000000000000..e50d7546e82c8d3f518f98d73de3129aff9da745 --- /dev/null +++ b/dmarc.go @@ -0,0 +1,116 @@ +package reportscollector + +import ( + "compress/gzip" + "encoding/xml" + "io" + "net/http" + "time" +) + +type dmarcRecord struct { + Row struct { + SourceIP string `xml:"source_ip"` + Count int `xml:"count"` + PolicyEvaluated struct { + Disposition string `xml:"disposition"` + DKIM string `xml:"dkim"` + SPF string `xml:"spf"` + } `xml:"policy_evaluated"` + } + Identifiers struct { + EnvelopeFrom string `xml:"envelope_from"` + HeaderFrom string `xml:"header_from"` + } `xml:"identifiers"` + AuthResults struct { + DKIM struct { + Domain string `xml:"domain"` + Result string `xml:"result"` + } `xml:"dkim"` + SPF struct { + Domain string `xml:"domain"` + Result string `xml:"result"` + } `xml:"spf"` + } `xml:"auth_results"` +} + +func (r *dmarcRecord) isFailure() bool { + return (r.AuthResults.DKIM.Result == "fail" || + r.AuthResults.SPF.Result == "fail") +} + +type dmarcPolicy struct { + Domain string `xml:"domain"` + ADKIM string `xml:"adkim"` + ASPF string `xml:"aspf"` + P string `xml:"p"` + SP string `xml:"sp"` + PCT float64 `xml:"pct"` + FO float64 `xml:"fo"` +} + +type dmarcReport struct { + Name xml.Name `xml:"feedback"` + + Version string `xml:"version"` + Metadata struct { + ReportID string `xml:"report_id"` + Organization string `xml:"org_name"` + Email string `xml:"email"` + DateRange struct { + StartSecs int64 `xml:"begin"` + EndSecs int64 `xml:"end"` + } `xml:"date_range"` + } `xml:"report_metadata"` + + Policy *dmarcPolicy `xml:"policy_published"` + Records []*dmarcRecord `xml:"record"` +} + +type DMARCHandler struct{} + +func (h *DMARCHandler) Parse(contentType string, req *http.Request) ([]Event, error) { + var r io.Reader + switch contentType { + case "text/xml": + r = req.Body + case "application/gzip": + var err error + r, err = gzip.NewReader(req.Body) + if err != nil { + return nil, err + } + default: + return nil, ErrNoMatch + } + + var report dmarcReport + if err := xml.NewDecoder(r).Decode(&report); err != nil { + return nil, err + } + + var events []Event + for _, rec := range report.Records { + if rec.isFailure() { + events = append(events, h.eventFromRecord(&report, rec)) + } + } + return events, nil +} + +func (h *DMARCHandler) eventFromRecord(report *dmarcReport, rec *dmarcRecord) Event { + e := make(Event) + e.Set("type", "dmarc") + e.Set("event_timestamp", time.Unix(report.Metadata.DateRange.EndSecs, 0)) + e.Set("domain", report.Policy.Domain) + e.Set("report_id", report.Metadata.ReportID) + e.Set("report_organization", report.Metadata.Organization) + + e.Set("dmarc_envelope_from", rec.Identifiers.EnvelopeFrom) + e.Set("dmarc_header_from", rec.Identifiers.HeaderFrom) + e.Set("dmarc_dkim", rec.AuthResults.DKIM.Result) + e.Set("dmarc_spf", rec.AuthResults.SPF.Result) + e.Set("failed_session_count", rec.Row.Count) + + return e +}