nebula/main.go

421 lines
14 KiB
Go
Raw Normal View History

2019-11-19 10:00:20 -07:00
package nebula
import (
"context"
2019-11-19 10:00:20 -07:00
"encoding/binary"
"fmt"
"net"
"time"
"github.com/sirupsen/logrus"
"github.com/slackhq/nebula/config"
"github.com/slackhq/nebula/iputil"
"github.com/slackhq/nebula/overlay"
"github.com/slackhq/nebula/sshd"
"github.com/slackhq/nebula/udp"
"github.com/slackhq/nebula/util"
"gopkg.in/yaml.v2"
2019-11-19 10:00:20 -07:00
)
type m map[string]interface{}
func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logger, tunFd *int) (retcon *Control, reterr error) {
ctx, cancel := context.WithCancel(context.Background())
// Automatically cancel the context if Main returns an error, to signal all created goroutines to quit.
defer func() {
if reterr != nil {
cancel()
}
}()
2021-03-26 08:46:30 -06:00
l := logger
2019-11-19 10:00:20 -07:00
l.Formatter = &logrus.TextFormatter{
FullTimestamp: true,
}
// Print the config if in test, the exit comes later
if configTest {
b, err := yaml.Marshal(c.Settings)
2019-11-19 10:00:20 -07:00
if err != nil {
2020-09-18 08:20:09 -06:00
return nil, err
2019-11-19 10:00:20 -07:00
}
// Print the final config
2019-11-19 10:00:20 -07:00
l.Println(string(b))
}
err := configLogger(l, c)
2019-11-19 10:00:20 -07:00
if err != nil {
return nil, util.NewContextualError("Failed to configure the logger", nil, err)
2019-11-19 10:00:20 -07:00
}
c.RegisterReloadCallback(func(c *config.C) {
err := configLogger(l, c)
2019-11-19 10:00:20 -07:00
if err != nil {
l.WithError(err).Error("Failed to configure the logger")
}
})
caPool, err := loadCAFromConfig(l, c)
2019-11-19 10:00:20 -07:00
if err != nil {
//The errors coming out of loadCA are already nicely formatted
return nil, util.NewContextualError("Failed to load ca from config", nil, err)
2019-11-19 10:00:20 -07:00
}
2021-03-29 11:10:19 -06:00
l.WithField("fingerprints", caPool.GetFingerprints()).Debug("Trusted CA fingerprints")
2019-11-19 10:00:20 -07:00
cs, err := NewCertStateFromConfig(c)
2019-11-19 10:00:20 -07:00
if err != nil {
//The errors coming out of NewCertStateFromConfig are already nicely formatted
return nil, util.NewContextualError("Failed to load certificate from config", nil, err)
2019-11-19 10:00:20 -07:00
}
l.WithField("cert", cs.certificate).Debug("Client nebula certificate")
fw, err := NewFirewallFromConfig(l, cs.certificate, c)
2019-11-19 10:00:20 -07:00
if err != nil {
return nil, util.NewContextualError("Error while loading firewall rules", nil, err)
2019-11-19 10:00:20 -07:00
}
l.WithField("firewallHash", fw.GetRuleHash()).Info("Firewall started")
// TODO: make sure mask is 4 bytes
tunCidr := cs.certificate.Details.Ips[0]
ssh, err := sshd.NewSSHServer(l.WithField("subsystem", "sshd"))
wireSSHReload(l, ssh, c)
var sshStart func()
if c.GetBool("sshd.enabled", false) {
sshStart, err = configSSH(l, ssh, c)
2019-11-19 10:00:20 -07:00
if err != nil {
return nil, util.NewContextualError("Error while configuring the sshd", nil, err)
2019-11-19 10:00:20 -07:00
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// All non system modifying configuration consumption should live above this line
// tun config, listeners, anything modifying the computer should be below
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
var routines int
// If `routines` is set, use that and ignore the specific values
if routines = c.GetInt("routines", 0); routines != 0 {
if routines < 1 {
routines = 1
}
if routines > 1 {
l.WithField("routines", routines).Info("Using multiple routines")
}
} else {
// deprecated and undocumented
tunQueues := c.GetInt("tun.routines", 1)
udpQueues := c.GetInt("listen.routines", 1)
if tunQueues > udpQueues {
routines = tunQueues
} else {
routines = udpQueues
}
if routines != 1 {
l.WithField("routines", routines).Warn("Setting tun.routines and listen.routines is deprecated. Use `routines` instead")
}
}
// EXPERIMENTAL
// Intentionally not documented yet while we do more testing and determine
// a good default value.
conntrackCacheTimeout := c.GetDuration("firewall.conntrack.routine_cache_timeout", 0)
if routines > 1 && !c.IsSet("firewall.conntrack.routine_cache_timeout") {
// Use a different default if we are running with multiple routines
conntrackCacheTimeout = 1 * time.Second
}
if conntrackCacheTimeout > 0 {
l.WithField("duration", conntrackCacheTimeout).Info("Using routine-local conntrack cache")
}
var tun overlay.Device
if !configTest {
c.CatchHUP(ctx)
tun, err = overlay.NewDeviceFromConfig(c, l, tunCidr, tunFd, routines)
if err != nil {
return nil, util.NewContextualError("Failed to get a tun/tap device", nil, err)
}
2019-11-19 10:00:20 -07:00
}
defer func() {
if reterr != nil {
tun.Close()
}
}()
2019-11-19 10:00:20 -07:00
// set up our UDP listener
udpConns := make([]*udp.Conn, routines)
port := c.GetInt("listen.port", 0)
if !configTest {
for i := 0; i < routines; i++ {
udpServer, err := udp.NewListener(l, c.GetString("listen.host", "0.0.0.0"), port, routines > 1, c.GetInt("listen.batch", 64))
if err != nil {
return nil, util.NewContextualError("Failed to open udp listener", m{"queue": i}, err)
}
udpServer.ReloadConfig(c)
udpConns[i] = udpServer
// If port is dynamic, discover it
if port == 0 {
uPort, err := udpServer.LocalAddr()
if err != nil {
return nil, util.NewContextualError("Failed to get listening port", nil, err)
}
port = int(uPort.Port)
}
}
2019-11-19 10:00:20 -07:00
}
// Set up my internal host map
var preferredRanges []*net.IPNet
rawPreferredRanges := c.GetStringSlice("preferred_ranges", []string{})
2019-11-19 10:00:20 -07:00
// First, check if 'preferred_ranges' is set and fallback to 'local_range'
if len(rawPreferredRanges) > 0 {
for _, rawPreferredRange := range rawPreferredRanges {
_, preferredRange, err := net.ParseCIDR(rawPreferredRange)
if err != nil {
return nil, util.NewContextualError("Failed to parse preferred ranges", nil, err)
2019-11-19 10:00:20 -07:00
}
preferredRanges = append(preferredRanges, preferredRange)
}
}
// local_range was superseded by preferred_ranges. If it is still present,
// merge the local_range setting into preferred_ranges. We will probably
// deprecate local_range and remove in the future.
rawLocalRange := c.GetString("local_range", "")
2019-11-19 10:00:20 -07:00
if rawLocalRange != "" {
_, localRange, err := net.ParseCIDR(rawLocalRange)
if err != nil {
return nil, util.NewContextualError("Failed to parse local_range", nil, err)
2019-11-19 10:00:20 -07:00
}
// Check if the entry for local_range was already specified in
// preferred_ranges. Don't put it into the slice twice if so.
var found bool
for _, r := range preferredRanges {
if r.String() == localRange.String() {
found = true
break
}
}
if !found {
preferredRanges = append(preferredRanges, localRange)
}
}
2021-03-26 08:46:30 -06:00
hostMap := NewHostMap(l, "main", tunCidr, preferredRanges)
hostMap.metricsEnabled = c.GetBool("stats.message_metrics", false)
2019-12-12 09:34:17 -07:00
2019-11-19 10:00:20 -07:00
l.WithField("network", hostMap.vpnCIDR).WithField("preferredRanges", hostMap.preferredRanges).Info("Main HostMap created")
/*
config.SetDefault("promoter.interval", 10)
go hostMap.Promoter(config.GetInt("promoter.interval"))
*/
punchy := NewPunchyFromConfig(c)
if punchy.Punch && !configTest {
2019-11-19 10:00:20 -07:00
l.Info("UDP hole punching enabled")
go hostMap.Punchy(ctx, udpConns[0])
2019-11-19 10:00:20 -07:00
}
amLighthouse := c.GetBool("lighthouse.am_lighthouse", false)
// fatal if am_lighthouse is enabled but we are using an ephemeral port
if amLighthouse && (c.GetInt("listen.port", 0) == 0) {
return nil, util.NewContextualError("lighthouse.am_lighthouse enabled on node but no port number is set in config", nil, nil)
}
// warn if am_lighthouse is enabled but upstream lighthouses exists
rawLighthouseHosts := c.GetStringSlice("lighthouse.hosts", []string{})
if amLighthouse && len(rawLighthouseHosts) != 0 {
l.Warn("lighthouse.am_lighthouse enabled on node but upstream lighthouses exist in config")
}
lighthouseHosts := make([]iputil.VpnIp, len(rawLighthouseHosts))
for i, host := range rawLighthouseHosts {
ip := net.ParseIP(host)
if ip == nil {
return nil, util.NewContextualError("Unable to parse lighthouse host entry", m{"host": host, "entry": i + 1}, nil)
}
if !tunCidr.Contains(ip) {
return nil, util.NewContextualError("lighthouse host is not in our subnet, invalid", m{"vpnIp": ip, "network": tunCidr.String()}, nil)
}
lighthouseHosts[i] = iputil.Ip2VpnIp(ip)
}
if !amLighthouse && len(lighthouseHosts) == 0 {
l.Warn("No lighthouses.hosts configured, this host will only be able to initiate tunnels with static_host_map entries")
}
2019-11-19 10:00:20 -07:00
lightHouse := NewLightHouse(
2021-03-26 08:46:30 -06:00
l,
2019-11-19 10:00:20 -07:00
amLighthouse,
2021-04-01 09:23:31 -06:00
tunCidr,
lighthouseHosts,
2019-11-19 10:00:20 -07:00
//TODO: change to a duration
c.GetInt("lighthouse.interval", 10),
uint32(port),
udpConns[0],
punchy.Respond,
punchy.Delay,
c.GetBool("stats.lighthouse_metrics", false),
2019-11-19 10:00:20 -07:00
)
remoteAllowList, err := NewRemoteAllowListFromConfig(c, "lighthouse.remote_allow_list", "lighthouse.remote_allow_ranges")
Add lighthouse.{remoteAllowList,localAllowList} (#217) These settings make it possible to blacklist / whitelist IP addresses that are used for remote connections. `lighthouse.remoteAllowList` filters which remote IPs are allow when fetching from the lighthouse (or, if you are the lighthouse, which IPs you store and forward to querying hosts). By default, any remote IPs are allowed. You can provide CIDRs here with `true` to allow and `false` to deny. The most specific CIDR rule applies to each remote. If all rules are "allow", the default will be "deny", and vice-versa. If both "allow" and "deny" rules are present, then you MUST set a rule for "0.0.0.0/0" as the default. lighthouse: remoteAllowList: # Example to block IPs from this subnet from being used for remote IPs. "172.16.0.0/12": false # A more complicated example, allow public IPs but only private IPs from a specific subnet "0.0.0.0/0": true "10.0.0.0/8": false "10.42.42.0/24": true `lighthouse.localAllowList` has the same logic as above, but it applies to the local addresses we advertise to the lighthouse. Additionally, you can specify an `interfaces` map of regular expressions to match against interface names. The regexp must match the entire name. All interface rules must be either true or false (and the default rule will be the inverse). CIDR rules are matched after interface name rules. Default is all local IP addresses. lighthouse: localAllowList: # Example to blacklist docker interfaces. interfaces: 'docker.*': false # Example to only advertise IPs in this subnet to the lighthouse. "10.0.0.0/8": true
2020-04-08 13:36:43 -06:00
if err != nil {
return nil, util.NewContextualError("Invalid lighthouse.remote_allow_list", nil, err)
Add lighthouse.{remoteAllowList,localAllowList} (#217) These settings make it possible to blacklist / whitelist IP addresses that are used for remote connections. `lighthouse.remoteAllowList` filters which remote IPs are allow when fetching from the lighthouse (or, if you are the lighthouse, which IPs you store and forward to querying hosts). By default, any remote IPs are allowed. You can provide CIDRs here with `true` to allow and `false` to deny. The most specific CIDR rule applies to each remote. If all rules are "allow", the default will be "deny", and vice-versa. If both "allow" and "deny" rules are present, then you MUST set a rule for "0.0.0.0/0" as the default. lighthouse: remoteAllowList: # Example to block IPs from this subnet from being used for remote IPs. "172.16.0.0/12": false # A more complicated example, allow public IPs but only private IPs from a specific subnet "0.0.0.0/0": true "10.0.0.0/8": false "10.42.42.0/24": true `lighthouse.localAllowList` has the same logic as above, but it applies to the local addresses we advertise to the lighthouse. Additionally, you can specify an `interfaces` map of regular expressions to match against interface names. The regexp must match the entire name. All interface rules must be either true or false (and the default rule will be the inverse). CIDR rules are matched after interface name rules. Default is all local IP addresses. lighthouse: localAllowList: # Example to blacklist docker interfaces. interfaces: 'docker.*': false # Example to only advertise IPs in this subnet to the lighthouse. "10.0.0.0/8": true
2020-04-08 13:36:43 -06:00
}
lightHouse.SetRemoteAllowList(remoteAllowList)
localAllowList, err := NewLocalAllowListFromConfig(c, "lighthouse.local_allow_list")
Add lighthouse.{remoteAllowList,localAllowList} (#217) These settings make it possible to blacklist / whitelist IP addresses that are used for remote connections. `lighthouse.remoteAllowList` filters which remote IPs are allow when fetching from the lighthouse (or, if you are the lighthouse, which IPs you store and forward to querying hosts). By default, any remote IPs are allowed. You can provide CIDRs here with `true` to allow and `false` to deny. The most specific CIDR rule applies to each remote. If all rules are "allow", the default will be "deny", and vice-versa. If both "allow" and "deny" rules are present, then you MUST set a rule for "0.0.0.0/0" as the default. lighthouse: remoteAllowList: # Example to block IPs from this subnet from being used for remote IPs. "172.16.0.0/12": false # A more complicated example, allow public IPs but only private IPs from a specific subnet "0.0.0.0/0": true "10.0.0.0/8": false "10.42.42.0/24": true `lighthouse.localAllowList` has the same logic as above, but it applies to the local addresses we advertise to the lighthouse. Additionally, you can specify an `interfaces` map of regular expressions to match against interface names. The regexp must match the entire name. All interface rules must be either true or false (and the default rule will be the inverse). CIDR rules are matched after interface name rules. Default is all local IP addresses. lighthouse: localAllowList: # Example to blacklist docker interfaces. interfaces: 'docker.*': false # Example to only advertise IPs in this subnet to the lighthouse. "10.0.0.0/8": true
2020-04-08 13:36:43 -06:00
if err != nil {
return nil, util.NewContextualError("Invalid lighthouse.local_allow_list", nil, err)
Add lighthouse.{remoteAllowList,localAllowList} (#217) These settings make it possible to blacklist / whitelist IP addresses that are used for remote connections. `lighthouse.remoteAllowList` filters which remote IPs are allow when fetching from the lighthouse (or, if you are the lighthouse, which IPs you store and forward to querying hosts). By default, any remote IPs are allowed. You can provide CIDRs here with `true` to allow and `false` to deny. The most specific CIDR rule applies to each remote. If all rules are "allow", the default will be "deny", and vice-versa. If both "allow" and "deny" rules are present, then you MUST set a rule for "0.0.0.0/0" as the default. lighthouse: remoteAllowList: # Example to block IPs from this subnet from being used for remote IPs. "172.16.0.0/12": false # A more complicated example, allow public IPs but only private IPs from a specific subnet "0.0.0.0/0": true "10.0.0.0/8": false "10.42.42.0/24": true `lighthouse.localAllowList` has the same logic as above, but it applies to the local addresses we advertise to the lighthouse. Additionally, you can specify an `interfaces` map of regular expressions to match against interface names. The regexp must match the entire name. All interface rules must be either true or false (and the default rule will be the inverse). CIDR rules are matched after interface name rules. Default is all local IP addresses. lighthouse: localAllowList: # Example to blacklist docker interfaces. interfaces: 'docker.*': false # Example to only advertise IPs in this subnet to the lighthouse. "10.0.0.0/8": true
2020-04-08 13:36:43 -06:00
}
lightHouse.SetLocalAllowList(localAllowList)
2019-11-23 16:55:23 -07:00
//TODO: Move all of this inside functions in lighthouse.go
for k, v := range c.GetMap("static_host_map", map[interface{}]interface{}{}) {
ip := net.ParseIP(fmt.Sprintf("%v", k))
vpnIp := iputil.Ip2VpnIp(ip)
if !tunCidr.Contains(ip) {
return nil, util.NewContextualError("static_host_map key is not in our subnet, invalid", m{"vpnIp": vpnIp, "network": tunCidr.String()}, nil)
}
2019-11-19 10:00:20 -07:00
vals, ok := v.([]interface{})
if ok {
for _, v := range vals {
ip, port, err := udp.ParseIPAndPort(fmt.Sprintf("%v", v))
if err != nil {
return nil, util.NewContextualError("Static host address could not be parsed", m{"vpnIp": vpnIp}, err)
2019-11-19 10:00:20 -07:00
}
lightHouse.AddStaticRemote(vpnIp, udp.NewAddr(ip, port))
2019-11-19 10:00:20 -07:00
}
} else {
ip, port, err := udp.ParseIPAndPort(fmt.Sprintf("%v", v))
if err != nil {
return nil, util.NewContextualError("Static host address could not be parsed", m{"vpnIp": vpnIp}, err)
2019-11-19 10:00:20 -07:00
}
lightHouse.AddStaticRemote(vpnIp, udp.NewAddr(ip, port))
2019-11-19 10:00:20 -07:00
}
}
2019-11-23 16:55:23 -07:00
err = lightHouse.ValidateLHStaticEntries()
if err != nil {
2019-11-23 16:55:23 -07:00
l.WithError(err).Error("Lighthouse unreachable")
}
var messageMetrics *MessageMetrics
if c.GetBool("stats.message_metrics", false) {
messageMetrics = newMessageMetrics()
} else {
messageMetrics = newMessageMetricsOnlyRecvError()
}
handshakeConfig := HandshakeConfig{
tryInterval: c.GetDuration("handshakes.try_interval", DefaultHandshakeTryInterval),
retries: c.GetInt("handshakes.retries", DefaultHandshakeRetries),
triggerBuffer: c.GetInt("handshakes.trigger_buffer", DefaultHandshakeTriggerBuffer),
messageMetrics: messageMetrics,
}
2021-03-26 08:46:30 -06:00
handshakeManager := NewHandshakeManager(l, tunCidr, preferredRanges, hostMap, lightHouse, udpConns[0], handshakeConfig)
lightHouse.handshakeTrigger = handshakeManager.trigger
2019-11-19 10:00:20 -07:00
//TODO: These will be reused for psk
//handshakeMACKey := config.GetString("handshake_mac.key", "")
//handshakeAcceptedMACKeys := config.GetStringSlice("handshake_mac.accepted_keys", []string{})
2019-11-19 10:00:20 -07:00
serveDns := false
if c.GetBool("lighthouse.serve_dns", false) {
if c.GetBool("lighthouse.am_lighthouse", false) {
serveDns = true
} else {
l.Warn("DNS server refusing to run because this host is not a lighthouse.")
}
}
checkInterval := c.GetInt("timers.connection_alive_interval", 5)
pendingDeletionInterval := c.GetInt("timers.pending_deletion_interval", 10)
2019-11-19 10:00:20 -07:00
ifConfig := &InterfaceConfig{
HostMap: hostMap,
Inside: tun,
Outside: udpConns[0],
certState: cs,
Cipher: c.GetString("cipher", "aes"),
Firewall: fw,
ServeDns: serveDns,
HandshakeManager: handshakeManager,
lightHouse: lightHouse,
checkInterval: checkInterval,
pendingDeletionInterval: pendingDeletionInterval,
DropLocalBroadcast: c.GetBool("tun.drop_local_broadcast", false),
DropMulticast: c.GetBool("tun.drop_multicast", false),
routines: routines,
MessageMetrics: messageMetrics,
2020-09-18 08:20:09 -06:00
version: buildVersion,
2021-03-29 11:10:19 -06:00
caPool: caPool,
disconnectInvalid: c.GetBool("pki.disconnect_invalid", false),
ConntrackCacheTimeout: conntrackCacheTimeout,
2021-03-26 08:46:30 -06:00
l: l,
2019-11-19 10:00:20 -07:00
}
switch ifConfig.Cipher {
case "aes":
2020-03-30 12:23:55 -06:00
noiseEndianness = binary.BigEndian
2019-11-19 10:00:20 -07:00
case "chachapoly":
2020-03-30 12:23:55 -06:00
noiseEndianness = binary.LittleEndian
2019-11-19 10:00:20 -07:00
default:
2020-09-18 08:20:09 -06:00
return nil, fmt.Errorf("unknown cipher: %v", ifConfig.Cipher)
2019-11-19 10:00:20 -07:00
}
var ifce *Interface
if !configTest {
ifce, err = NewInterface(ctx, ifConfig)
if err != nil {
2020-09-18 08:20:09 -06:00
return nil, fmt.Errorf("failed to initialize interface: %s", err)
}
2019-11-19 10:00:20 -07:00
// TODO: Better way to attach these, probably want a new interface in InterfaceConfig
// I don't want to make this initial commit too far-reaching though
ifce.writers = udpConns
ifce.RegisterConfigChangeCallbacks(c)
2019-11-19 10:00:20 -07:00
go handshakeManager.Run(ctx, ifce)
go lightHouse.LhUpdateWorker(ctx, ifce)
}
2019-11-19 10:00:20 -07:00
// TODO - stats third-party modules start uncancellable goroutines. Update those libs to accept
// a context so that they can exit when the context is Done.
statsStart, err := startStats(l, c, buildVersion, configTest)
2019-11-19 10:00:20 -07:00
if err != nil {
return nil, util.NewContextualError("Failed to start stats emitter", nil, err)
2019-11-19 10:00:20 -07:00
}
if configTest {
2020-09-18 08:20:09 -06:00
return nil, nil
}
2019-11-19 10:00:20 -07:00
//TODO: check if we _should_ be emitting stats
go ifce.emitStats(ctx, c.GetDuration("stats.interval", time.Second*10))
2019-11-19 10:00:20 -07:00
2021-03-26 08:46:30 -06:00
attachCommands(l, ssh, hostMap, handshakeManager.pendingHostMap, lightHouse, ifce)
2019-11-19 10:00:20 -07:00
// Start DNS server last to allow using the nebula IP as lighthouse.dns.host
var dnsStart func()
if amLighthouse && serveDns {
l.Debugln("Starting dns server")
dnsStart = dnsMain(l, hostMap, c)
}
return &Control{ifce, l, cancel, sshStart, statsStart, dnsStart}, nil
2019-11-19 10:00:20 -07:00
}