Commit e6fbab0e authored by ale's avatar ale

Initial commit

parents
usermetadb
==========
The *User Metadata Database* (`usermetadb`) stores long-term information
about user access patterns in order to detect anomalous behavior and
implement other safety checks. It strives to do so while respecting
the anonymity of the users, focusing on information that is actually
useful to them.
In practical terms, it stores the following information on every
successful login:
* *timestamps*, quantized/fuzzed to a sufficiently large amount (1h?)
to make correlation difficult
* *location*, stored at the country level based on user IP
* *device information* based on long-term cookies
The idea is that this is enough information to provide users with
meaningful summaries such as "you have just logged in from a
new/unknown device" and "you logged in earlier today with Chrome on a
mobile Android device", without storing de-anonymizing information on
the server side.
The cookie-based device detection might present an issue from this
point of view, because it allows to establish a forensic link between
a specific device and an account if one is in possession of the
server-side log database (only partially mitigated by the fact that
the cookie is encrypted).
## API
The server exports an API over HTTP/HTTPS, all requests should be made
using the POST method and an *application/json* Content-Type. The
request body should contain a JSON-encoded request object. Responses
will similarly be JSON-encoded.
The API is split into two conceptually separate sets, the *log* API
and the *analysis* API.
### Log API
`/api/add_log` (*AddLogRequest*)
Stores a new log entry for a user in the database. The request must
be a `LogEntry` object. The method returns an empty response. If
the log entry contains device information, the list of devices for
the specified user is updated with that information.
`/api/get_user_logs` (*GetUserLogsRequest*) -> *GetUserLogsResponse*
Returns recent logs for a specific user.
`/api/get_user_devices` (*GetUserDevicesRequest*) -> *GetUserDevicesResponse*
Returns the list of known devices for a user
## Analysis API
`/api/check_device` (*CheckDeviceRequest*) -> *CheckDeviceResponse*
Returns information about a device, whether we have seen it before,
if the localization information matches the historical trend, etc.
package client
import (
"crypto/tls"
"net/http"
"net/url"
"time"
"git.autistici.org/ai3/go-common/clientutil"
"git.autistici.org/id/auth"
"git.autistici.org/id/usermetadb"
)
type Client struct {
*http.Client
backendURL string
}
type Config struct {
BackendURL string `yaml:"backend_url"`
TLSConfig *clientutil.TLSClientConfig `yaml:"tls_config"`
}
func New(backendURL string, tlsConfig *tls.Config) (*Client, error) {
u, err := url.Parse(backendURL)
if err != nil {
return nil, err
}
c := &http.Client{
Transport: clientutil.NewTransport([]string{u.Host}, tlsConfig, nil),
Timeout: 10 * time.Second,
}
return &Client{
Client: c,
backendURL: backendURL,
}, nil
}
func (c *Client) CheckDevice(username string, dev *auth.DeviceInfo) (bool, error) {
req := usermetadb.CheckDeviceRequest{
Username: username,
DeviceInfo: dev,
}
var resp usermetadb.CheckDeviceResponse
err := clientutil.DoJSONHTTPRequest(c.Client, c.backendURL+"/api/check_device", &req, &resp)
return resp.Seen, err
}
func (c *Client) AddLog(entry *usermetadb.LogEntry) error {
req := usermetadb.AddLogRequest{Log: entry}
return clientutil.DoJSONHTTPRequest(c.Client, c.backendURL+"/api/add_log", &req, nil)
}
func (c *Client) GetUserDevices(username string) ([]*usermetadb.MetaDeviceInfo, error) {
req := usermetadb.GetUserDevicesRequest{Username: username}
var resp usermetadb.GetUserDevicesResponse
err := clientutil.DoJSONHTTPRequest(c.Client, c.backendURL+"/api/get_user_devices", &req, &resp)
return resp.Devices, err
}
func (c *Client) GetUserLogs(username string, maxDays, limit int) ([]*usermetadb.LogEntry, error) {
req := usermetadb.GetUserLogsRequest{
Username: username,
MaxDays: maxDays,
Limit: limit,
}
var resp usermetadb.GetUserLogsResponse
err := clientutil.DoJSONHTTPRequest(c.Client, c.backendURL+"/api/get_user_logs", &req, &resp)
return resp.Results, err
}
package main
import (
"flag"
"io/ioutil"
"log"
"os"
"strings"
"git.autistici.org/ai3/go-common/serverutil"
"gopkg.in/yaml.v2"
"git.autistici.org/id/usermetadb/server"
)
var (
addr = flag.String("addr", ":5005", "address to listen on")
configFile = flag.String("config", "/etc/user-meta-server.yml", "path of config file")
)
// Read the YAML configuration file.
func loadConfig() (*server.Config, error) {
data, err := ioutil.ReadFile(*configFile)
if err != nil {
return nil, err
}
var config server.Config
if err := yaml.Unmarshal(data, &config); err != nil {
return nil, err
}
return &config, nil
}
// Set defaults for command-line flags using variables from the environment.
func setFlagDefaultsFromEnv() {
flag.VisitAll(func(f *flag.Flag) {
envVar := "USERMETADB_" + strings.ToUpper(strings.Replace(f.Name, "-", "_", -1))
if value := os.Getenv(envVar); value != "" {
f.DefValue = value
f.Value.Set(value)
}
})
}
func main() {
setFlagDefaultsFromEnv()
flag.Parse()
config, err := loadConfig()
if err != nil {
log.Fatal(err)
}
db, err := server.New(config)
if err != nil {
log.Fatal(err)
}
defer db.Close()
if err := serverutil.Serve(db.Handler(), config.TLSConfig, *addr); err != nil {
log.Fatal(err)
}
}
DROP INDEX idx_devices_id_username;
DROP TABLE devices;
DROP INDEX idx_userlog_username;
DROP INDEX idx_userlog_device_id;
DROP TABLE userlog;
-- We store the raw denormalized user logs, because device information
-- might change over time (think a version update, or restoring a
-- backup to a different OS) and it may be useful to see the
-- historical variation. Unique device information is also aggregated
-- incrementally to a separate table, to provide a quick way to obtain
-- the list of devices for a user without a large table scan.
CREATE TABLE devices (
id VARCHAR(64) NOT NULL,
username TEXT NOT NULL,
device_browser TEXT,
device_os TEXT,
device_mobile BOOL,
first_seen DATETIME,
last_seen DATETIME,
last_device_remote_zone TEXT,
last_device_user_agent TEXT
);
CREATE UNIQUE INDEX idx_devices_id_username ON devices (id, username);
CREATE TABLE userlog (
username TEXT NOT NULL,
service TEXT NOT NULL,
log_type TEXT NOT NULL,
login_method TEXT,
message TEXT,
device_id VARCHAR(64) NOT NULL,
device_remote_zone TEXT,
device_user_agent TEXT,
device_browser TEXT,
device_os TEXT,
device_mobile BOOL,
timestamp DATETIME
);
CREATE INDEX idx_userlog_username ON userlog (username);
CREATE INDEX idx_userlog_device_id ON userlog (device_id);
// Code generated by go-bindata.
// sources:
// migrations/1_initialize_schema.down.sql
// migrations/1_initialize_schema.up.sql
// DO NOT EDIT!
package migrations
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
)
type asset struct {
bytes []byte
info os.FileInfo
}
type bindataFileInfo struct {
name string
size int64
mode os.FileMode
modTime time.Time
}
func (fi bindataFileInfo) Name() string {
return fi.name
}
func (fi bindataFileInfo) Size() int64 {
return fi.size
}
func (fi bindataFileInfo) Mode() os.FileMode {
return fi.mode
}
func (fi bindataFileInfo) ModTime() time.Time {
return fi.modTime
}
func (fi bindataFileInfo) IsDir() bool {
return false
}
func (fi bindataFileInfo) Sys() interface{} {
return nil
}
var __1_initialize_schemaDownSql = []byte(`DROP INDEX idx_devices_id_username;
DROP TABLE devices;
DROP INDEX idx_userlog_username;
DROP INDEX idx_userlog_device_id;
DROP TABLE userlog;
`)
func _1_initialize_schemaDownSqlBytes() ([]byte, error) {
return __1_initialize_schemaDownSql, nil
}
func _1_initialize_schemaDownSql() (*asset, error) {
bytes, err := _1_initialize_schemaDownSqlBytes()
if err != nil {
return nil, err
}
info := bindataFileInfo{name: "1_initialize_schema.down.sql", size: 144, mode: os.FileMode(420), modTime: time.Unix(1511642368, 0)}
a := &asset{bytes: bytes, info: info}
return a, nil
}
var __1_initialize_schemaUpSql = []byte(`
-- We store the raw denormalized user logs, because device information
-- might change over time (think a version update, or restoring a
-- backup to a different OS) and it may be useful to see the
-- historical variation. Unique device information is also aggregated
-- incrementally to a separate table, to provide a quick way to obtain
-- the list of devices for a user without a large table scan.
CREATE TABLE devices (
id VARCHAR(64) NOT NULL,
username TEXT NOT NULL,
device_browser TEXT,
device_os TEXT,
device_mobile BOOL,
first_seen DATETIME,
last_seen DATETIME,
last_device_remote_zone TEXT,
last_device_user_agent TEXT
);
CREATE UNIQUE INDEX idx_devices_id_username ON devices (id, username);
CREATE TABLE userlog (
username TEXT NOT NULL,
service TEXT NOT NULL,
log_type TEXT NOT NULL,
login_method TEXT,
message TEXT,
device_id VARCHAR(64) NOT NULL,
device_remote_zone TEXT,
device_user_agent TEXT,
device_browser TEXT,
device_os TEXT,
device_mobile BOOL,
timestamp DATETIME
);
CREATE INDEX idx_userlog_username ON userlog (username);
CREATE INDEX idx_userlog_device_id ON userlog (device_id);
`)
func _1_initialize_schemaUpSqlBytes() ([]byte, error) {
return __1_initialize_schemaUpSql, nil
}
func _1_initialize_schemaUpSql() (*asset, error) {
bytes, err := _1_initialize_schemaUpSqlBytes()
if err != nil {
return nil, err
}
info := bindataFileInfo{name: "1_initialize_schema.up.sql", size: 1258, mode: os.FileMode(420), modTime: time.Unix(1511675275, 0)}
a := &asset{bytes: bytes, info: info}
return a, nil
}
// Asset loads and returns the asset for the given name.
// It returns an error if the asset could not be found or
// could not be loaded.
func Asset(name string) ([]byte, error) {
cannonicalName := strings.Replace(name, "\\", "/", -1)
if f, ok := _bindata[cannonicalName]; ok {
a, err := f()
if err != nil {
return nil, fmt.Errorf("Asset %s can't read by error: %v", name, err)
}
return a.bytes, nil
}
return nil, fmt.Errorf("Asset %s not found", name)
}
// MustAsset is like Asset but panics when Asset would return an error.
// It simplifies safe initialization of global variables.
func MustAsset(name string) []byte {
a, err := Asset(name)
if err != nil {
panic("asset: Asset(" + name + "): " + err.Error())
}
return a
}
// AssetInfo loads and returns the asset info for the given name.
// It returns an error if the asset could not be found or
// could not be loaded.
func AssetInfo(name string) (os.FileInfo, error) {
cannonicalName := strings.Replace(name, "\\", "/", -1)
if f, ok := _bindata[cannonicalName]; ok {
a, err := f()
if err != nil {
return nil, fmt.Errorf("AssetInfo %s can't read by error: %v", name, err)
}
return a.info, nil
}
return nil, fmt.Errorf("AssetInfo %s not found", name)
}
// AssetNames returns the names of the assets.
func AssetNames() []string {
names := make([]string, 0, len(_bindata))
for name := range _bindata {
names = append(names, name)
}
return names
}
// _bindata is a table, holding each asset generator, mapped to its name.
var _bindata = map[string]func() (*asset, error){
"1_initialize_schema.down.sql": _1_initialize_schemaDownSql,
"1_initialize_schema.up.sql": _1_initialize_schemaUpSql,
}
// AssetDir returns the file names below a certain
// directory embedded in the file by go-bindata.
// For example if you run go-bindata on data/... and data contains the
// following hierarchy:
// data/
// foo.txt
// img/
// a.png
// b.png
// then AssetDir("data") would return []string{"foo.txt", "img"}
// AssetDir("data/img") would return []string{"a.png", "b.png"}
// AssetDir("foo.txt") and AssetDir("notexist") would return an error
// AssetDir("") will return []string{"data"}.
func AssetDir(name string) ([]string, error) {
node := _bintree
if len(name) != 0 {
cannonicalName := strings.Replace(name, "\\", "/", -1)
pathList := strings.Split(cannonicalName, "/")
for _, p := range pathList {
node = node.Children[p]
if node == nil {
return nil, fmt.Errorf("Asset %s not found", name)
}
}
}
if node.Func != nil {
return nil, fmt.Errorf("Asset %s not found", name)
}
rv := make([]string, 0, len(node.Children))
for childName := range node.Children {
rv = append(rv, childName)
}
return rv, nil
}
type bintree struct {
Func func() (*asset, error)
Children map[string]*bintree
}
var _bintree = &bintree{nil, map[string]*bintree{
"1_initialize_schema.down.sql": &bintree{_1_initialize_schemaDownSql, map[string]*bintree{}},
"1_initialize_schema.up.sql": &bintree{_1_initialize_schemaUpSql, map[string]*bintree{}},
}}
// RestoreAsset restores an asset under the given directory
func RestoreAsset(dir, name string) error {
data, err := Asset(name)
if err != nil {
return err
}
info, err := AssetInfo(name)
if err != nil {
return err
}
err = os.MkdirAll(_filePath(dir, filepath.Dir(name)), os.FileMode(0755))
if err != nil {
return err
}
err = ioutil.WriteFile(_filePath(dir, name), data, info.Mode())
if err != nil {
return err
}
err = os.Chtimes(_filePath(dir, name), info.ModTime(), info.ModTime())
if err != nil {
return err
}
return nil
}
// RestoreAssets restores an asset under the given directory recursively
func RestoreAssets(dir, name string) error {
children, err := AssetDir(name)
// File
if err != nil {
return RestoreAsset(dir, name)
}
// Dir
for _, child := range children {
err = RestoreAssets(dir, filepath.Join(name, child))
if err != nil {
return err
}
}
return nil
}
func _filePath(dir, name string) string {
cannonicalName := strings.Replace(name, "\\", "/", -1)
return filepath.Join(append([]string{dir}, strings.Split(cannonicalName, "/")...)...)
}
package usermetadb
//go:generate go-bindata --nocompress --pkg migrations --ignore \.go$ -o migrations/bindata.go -prefix migrations/ ./migrations
import (
"errors"
"time"
"git.autistici.org/id/auth"
)
type CheckDeviceRequest struct {
Username string `json:"username"`
DeviceInfo *auth.DeviceInfo `json:"device_info"`
}
type CheckDeviceResponse struct {
Seen bool `json:"seen"`
}
const (
LogTypeLogin = "login"
LogTypeLogout = "logout"
LogTypePasswordReset = "password_reset"
LogTypePasswordChange = "password_change"
LogTypeOTPEnabled = "otp_enabled"
LogTypeOTPDisabled = "otp_disabled"
)
const (
LoginMethodPassword = "password"
LoginMethodOTP = "otp"
LoginMethodU2F = "u2f"
)
type LogEntry struct {
Timestamp time.Time `json:"timestamp"`
Username string `json:"username"`
Type string `json:"log_type"`
Message string `json:"message,omitempty"`
Service string `json:"service,omitempty"`
LoginMethod string `json:"login_method,omitempty"`
DeviceInfo *auth.DeviceInfo `json:"device_info,omitempty"`
}
func (e *LogEntry) Validate() error {
if e.Username == "" {
return errors.New("invalid log entry: missing username")
}
switch e.Type {
case LogTypeLogin, LogTypeLogout, LogTypePasswordReset, LogTypePasswordChange, LogTypeOTPEnabled, LogTypeOTPDisabled:
default:
return errors.New("invalid log entry: unknown log type")
}
if e.DeviceInfo != nil {
if e.DeviceInfo.ID == "" {
return errors.New("invalid device info in log entry")
}
}
return nil
}
type AddLogRequest struct {
Log *LogEntry `json:"log'`
}
type AddLogResponse struct{}
type GetUserDevicesRequest struct {
Username string `json:"username"`
}
type MetaDeviceInfo struct {
DeviceInfo *auth.DeviceInfo `json:"device_info"`
FirstSeen time.Time `json:"first_seen"`
LastSeen time.Time `json:"last_seen"`
NumLogins int `json:"num_logins"`
}
type GetUserDevicesResponse struct {
Devices []*MetaDeviceInfo `json:"devices"`
}
type GetUserLogsRequest struct {
Username string `json:"username"`
MaxDays int `json:"max_days"`
Limit int `json:"limit"`
}
type GetUserLogsResponse struct {
Results []*LogEntry `json:"result"`
}
package server
import (
"database/sql"
"git.autistici.org/id/auth"
)
var analysisStatements = map[string]string{
"check_device_info": `SELECT 1 FROM devices WHERE username = ? AND id = ?`,
}
type analysisService struct {
db *sql.DB
stmts statementMap
}
func newAnalysisService(db *sql.DB) (*analysisService, error) {
stmts, err := newStatementMap(db, analysisStatements)
if err != nil {
return nil, err
}
return &analysisService{
db: db,
stmts: stmts,
}, nil
}
func (d *analysisService) Close() {
d.stmts.Close()
}
func (d *analysisService) CheckDevice(username string, deviceInfo *auth.DeviceInfo) (bool, error) {
tx, err := d.db.Begin()
if err != nil {
return false, err
}
defer tx.Rollback()
var seen bool
err = d.stmts.get(tx, "check_device_info").QueryRow(username, deviceInfo.ID).Scan(&seen)
if err != nil && err != sql.ErrNoRows {
return false, err
}
return seen, nil
}
package server
import (
"os"
"testing"
)
func TestAnalysis_CheckDevice(t *testing.T) {
defer os.Remove("test.db")
db, err := openDB("test.db")
if err != nil {
t.Fatal(err)
}
defer db.Close()
e := bulkLoadTestLogs(t, db)
svc, err := newAnalysisService(db)
if err != nil {
t.Fatal(err)
}
defer svc.Close()
// Check if we've seen a known device.
seen, err := svc.CheckDevice(e.Username, e.DeviceInfo)
if err != nil {
t.Fatal("CheckDevice():", err)
}
if !seen {
t.Fatal("CheckDevice returned seen=false, expected true")
}
// Check an unknown device.
seen, err = svc.CheckDevice("unknown_user", e.DeviceInfo)
if err != nil {
t.Fatal("CheckDevice():", err)
}
if seen {
t.Fatal("CheckDevice returned seen=true, expected false")
}
}
package server
import (
"database/sql"
"log"
"net/http"
"git.autistici.org/ai3/go-common/serverutil"
"git.autistici.org/id/usermetadb"
)
// Config for the UserMetaServer.
type Config struct {
DBURI string `yaml:"db_uri"`
TLSConfig *serverutil.TLSServerConfig `yaml:"tls"`
}
// UserMetaServer exposes the analysis service and the user metadata
// database over an HTTP API.
type UserMetaServer struct {
db *sql.DB
config *Config
analysis *analysisService
userlog *userlogDB
}
// New returns a new UserMetaServer with the given configuration.
func New(config *Config) (*UserMetaServer, error) {
db, err := openDB(config.DBURI)
if err != nil {
return nil, err
}
analysis, err := newAnalysisService(db)
if err != nil {
db.Close()
return nil, err
}
userlog, err := newUserlogDB(db)
if err != nil {