Skip to content

FATAL error when metrics cannot be delivered #320

@simonsparks

Description

@simonsparks

In our deployment scenario, Fabio is configured to deliver metrics to a remote StatsD collector.
When the infrastructure is provisioned, Fabio and other core services are started before the StatsD service so there is a period of time when metrics would not be collected.

The problem we found is that, if Fabio can't find the StatsD endpoint on startup, it logs a fatal error and exits. Presumably this might also happen after startup if the StatsD service was temporarily unavailable. We haven't tested whether this occurs for other supported metrics implementations as well.

I think it would be preferable for Fabio to continue operating without delivering its metrics rather than exiting.

An example log extract of the observed behaviour:

2017/07/17 23:22:26 [INFO] Runtime config
{
    "Proxy": {
        "Strategy": "rnd",
        "Matcher": "prefix",
        "NoRouteStatus": 404,
        "MaxConn": 10000,
        "ShutdownWait": 0,
        "DialTimeout": 30000000000,
        "ResponseHeaderTimeout": 0,
        "KeepAliveTimeout": 0,
        "FlushInterval": 1000000000,
        "LocalIP": "10.180.10.133",
        "ClientIPHeader": "",
        "TLSHeader": "",
        "TLSHeaderValue": "",
        "GZIPContentTypes": null,
        "RequestID": ""
    },
    "Registry": {
        "Backend": "consul",
        "Static": {
            "Routes": ""
        },
        "File": {
            "Path": ""
        },
        "Consul": {
            "Addr": "localhost:8500",
            "Scheme": "http",
            "Token": "",
            "KVPath": "/fabio/config",
            "TagPrefix": "urlprefix-",
            "Register": true,
            "ServiceAddr": ":9998",
            "ServiceName": "fabio",
            "ServiceTags": null,
            "ServiceStatus": [
                "passing"
            ],
            "CheckInterval": 1000000000,
            "CheckTimeout": 3000000000,
            "CheckScheme": "http",
            "CheckTLSSkipVerify": false
        },
        "Timeout": 10000000000,
        "Retry": 500000000
    },
    "Listen": [
        {
            "Addr": ":9999",
            "Proto": "http",
            "ReadTimeout": 0,
            "WriteTimeout": 0,
            "CertSource": {
                "Name": "",
                "Type": "",
                "CertPath": "",
                "KeyPath": "",
                "ClientCAPath": "",
                "CAUpgradeCN": "",
                "Refresh": 0,
                "Header": null
            },
            "StrictMatch": false,
            "TLSMinVersion": 0,
            "TLSMaxVersion": 0,
            "TLSCiphers": null
        },
        {
            "Addr": ":443",
            "Proto": "https",
            "ReadTimeout": 0,
            "WriteTimeout": 0,
            "CertSource": {
                "Name": "public",
                "Type": "path",
                "CertPath": "/etc/fabio.d/certs/server",
                "KeyPath": "",
                "ClientCAPath": "/etc/fabio.d/certs/client",
                "CAUpgradeCN": "ApiGateway",
                "Refresh": 5000000000,
                "Header": null
            },
            "StrictMatch": false,
            "TLSMinVersion": 0,
            "TLSMaxVersion": 0,
            "TLSCiphers": null
        }
    ],
    "Log": {
        "AccessFormat": "common",
        "AccessTarget": "stdout",
        "RoutesFormat": "delta"
    },
    "Metrics": {
        "Target": "statsd",
        "Prefix": "{{clean .Exec}}_{{clean .Hostname}}",
        "Names": "{{clean .Service}}.{{clean .Host}}.{{clean .Path}}.{{clean .TargetURL.Host}}",
        "Interval": 30000000000,
        "GraphiteAddr": "",
        "StatsDAddr": "metrics-statsd.service.consul:9125",
        "Circonus": {
            "APIKey": "",
            "APIApp": "fabio",
            "APIURL": "",
            "CheckID": "",
            "BrokerID": ""
        }
    },
    "UI": {
        "Listen": {
            "Addr": ":9998",
            "Proto": "http",
            "ReadTimeout": 0,
            "WriteTimeout": 0,
            "CertSource": {
                "Name": "",
                "Type": "",
                "CertPath": "",
                "KeyPath": "",
                "ClientCAPath": "",
                "CAUpgradeCN": "",
                "Refresh": 0,
                "Header": null
            },
            "StrictMatch": false,
            "TLSMinVersion": 0,
            "TLSMaxVersion": 0,
            "TLSCiphers": null
        },
        "Color": "teal",
        "Title": "Load Balancer",
        "Access": "rw"
    },
    "Runtime": {
        "GOGC": 800,
        "GOMAXPROCS": 1
    },
    "ProfileMode": "",
    "ProfilePath": "/tmp"
}
2017/07/17 23:22:26 [INFO] Version 1.5.1 starting
2017/07/17 23:22:26 [INFO] Go runtime is go1.8.3
2017/07/17 23:22:26 [INFO] Sending metrics to StatsD on metrics-statsd.service.consul:9125 as "fabio_ip-10-180-10-133"
2017/07/17 23:22:26 [FATAL]  cannot connect to StatsD: lookup metrics-statsd.service.consul on 127.0.0.1:53: no such host

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions