nebula/interface.go

417 lines
11 KiB
Go
Raw Normal View History

2019-11-19 10:00:20 -07:00
package nebula
import (
"context"
2019-11-19 10:00:20 -07:00
"errors"
"fmt"
"io"
"net"
2019-11-19 10:00:20 -07:00
"os"
"runtime"
2021-11-08 11:36:31 -07:00
"sync/atomic"
2019-11-19 10:00:20 -07:00
"time"
"github.com/rcrowley/go-metrics"
2021-03-26 08:46:30 -06:00
"github.com/sirupsen/logrus"
2021-03-29 11:10:19 -06:00
"github.com/slackhq/nebula/cert"
"github.com/slackhq/nebula/config"
"github.com/slackhq/nebula/firewall"
"github.com/slackhq/nebula/header"
"github.com/slackhq/nebula/iputil"
"github.com/slackhq/nebula/overlay"
"github.com/slackhq/nebula/udp"
2019-11-19 10:00:20 -07:00
)
const mtu = 9001
type InterfaceConfig struct {
HostMap *HostMap
Outside *udp.Conn
Inside overlay.Device
certState *CertState
Cipher string
Firewall *Firewall
ServeDns bool
HandshakeManager *HandshakeManager
lightHouse *LightHouse
checkInterval time.Duration
pendingDeletionInterval time.Duration
DropLocalBroadcast bool
DropMulticast bool
routines int
MessageMetrics *MessageMetrics
2020-09-18 08:20:09 -06:00
version string
2021-03-29 11:10:19 -06:00
caPool *cert.NebulaCAPool
disconnectInvalid bool
relayManager *relayManager
punchy *Punchy
ConntrackCacheTimeout time.Duration
2021-03-26 08:46:30 -06:00
l *logrus.Logger
2019-11-19 10:00:20 -07:00
}
type Interface struct {
hostMap *HostMap
outside *udp.Conn
inside overlay.Device
certState atomic.Pointer[CertState]
cipher string
firewall *Firewall
connectionManager *connectionManager
handshakeManager *HandshakeManager
serveDns bool
createTime time.Time
lightHouse *LightHouse
localBroadcast iputil.VpnIp
myVpnIp iputil.VpnIp
dropLocalBroadcast bool
dropMulticast bool
routines int
2021-03-29 11:10:19 -06:00
caPool *cert.NebulaCAPool
disconnectInvalid bool
closed atomic.Bool
relayManager *relayManager
sendRecvErrorConfig sendRecvErrorConfig
// rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
rebindCount int8
version string
2019-11-19 10:00:20 -07:00
conntrackCacheTimeout time.Duration
writers []*udp.Conn
readers []io.ReadWriteCloser
metricHandshakes metrics.Histogram
messageMetrics *MessageMetrics
cachedPacketMetrics *cachedPacketMetrics
l *logrus.Logger
2019-11-19 10:00:20 -07:00
}
type EncWriter interface {
SendVia(via *HostInfo,
relay *Relay,
ad,
nb,
out []byte,
nocopy bool,
)
SendMessageToVpnIp(t header.MessageType, st header.MessageSubType, vpnIp iputil.VpnIp, p, nb, out []byte)
Handshake(vpnIp iputil.VpnIp)
}
type sendRecvErrorConfig uint8
const (
sendRecvErrorAlways sendRecvErrorConfig = iota
sendRecvErrorNever
sendRecvErrorPrivate
)
func (s sendRecvErrorConfig) ShouldSendRecvError(ip net.IP) bool {
switch s {
case sendRecvErrorPrivate:
return ip.IsPrivate()
case sendRecvErrorAlways:
return true
case sendRecvErrorNever:
return false
default:
panic(fmt.Errorf("invalid sendRecvErrorConfig value: %d", s))
}
}
func (s sendRecvErrorConfig) String() string {
switch s {
case sendRecvErrorAlways:
return "always"
case sendRecvErrorNever:
return "never"
case sendRecvErrorPrivate:
return "private"
default:
return fmt.Sprintf("invalid(%d)", s)
}
}
func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
2019-11-19 10:00:20 -07:00
if c.Outside == nil {
return nil, errors.New("no outside connection")
}
if c.Inside == nil {
return nil, errors.New("no inside interface (tun)")
}
if c.certState == nil {
return nil, errors.New("no certificate state")
}
if c.Firewall == nil {
return nil, errors.New("no firewall rules")
}
myVpnIp := iputil.Ip2VpnIp(c.certState.certificate.Details.Ips[0].IP)
2019-11-19 10:00:20 -07:00
ifce := &Interface{
hostMap: c.HostMap,
outside: c.Outside,
inside: c.Inside,
cipher: c.Cipher,
firewall: c.Firewall,
serveDns: c.ServeDns,
handshakeManager: c.HandshakeManager,
createTime: time.Now(),
lightHouse: c.lightHouse,
localBroadcast: myVpnIp | ^iputil.Ip2VpnIp(c.certState.certificate.Details.Ips[0].Mask),
dropLocalBroadcast: c.DropLocalBroadcast,
dropMulticast: c.DropMulticast,
routines: c.routines,
2020-09-18 08:20:09 -06:00
version: c.version,
writers: make([]*udp.Conn, c.routines),
readers: make([]io.ReadWriteCloser, c.routines),
2021-03-29 11:10:19 -06:00
caPool: c.caPool,
disconnectInvalid: c.disconnectInvalid,
myVpnIp: myVpnIp,
relayManager: c.relayManager,
2019-11-19 10:00:20 -07:00
conntrackCacheTimeout: c.ConntrackCacheTimeout,
metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
messageMetrics: c.MessageMetrics,
cachedPacketMetrics: &cachedPacketMetrics{
sent: metrics.GetOrRegisterCounter("hostinfo.cached_packets.sent", nil),
dropped: metrics.GetOrRegisterCounter("hostinfo.cached_packets.dropped", nil),
},
l: c.l,
2019-11-19 10:00:20 -07:00
}
ifce.certState.Store(c.certState)
ifce.connectionManager = newConnectionManager(ctx, c.l, ifce, c.checkInterval, c.pendingDeletionInterval, c.punchy)
2019-11-19 10:00:20 -07:00
return ifce, nil
}
// activate creates the interface on the host. After the interface is created, any
// other services that want to bind listeners to its IP may do so successfully. However,
// the interface isn't going to process anything until run() is called.
func (f *Interface) activate() {
2019-11-19 10:00:20 -07:00
// actually turn on tun dev
addr, err := f.outside.LocalAddr()
if err != nil {
2021-03-26 08:46:30 -06:00
f.l.WithError(err).Error("Failed to get udp listen address")
}
f.l.WithField("interface", f.inside.Name()).WithField("network", f.inside.Cidr().String()).
2020-09-18 08:20:09 -06:00
WithField("build", f.version).WithField("udpAddr", addr).
2019-11-19 10:00:20 -07:00
Info("Nebula interface is active")
metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))
// Prepare n tun queues
var reader io.ReadWriteCloser = f.inside
for i := 0; i < f.routines; i++ {
if i > 0 {
reader, err = f.inside.NewMultiQueueReader()
if err != nil {
2021-03-26 08:46:30 -06:00
f.l.Fatal(err)
}
}
f.readers[i] = reader
}
if err := f.inside.Activate(); err != nil {
2021-11-08 11:36:31 -07:00
f.inside.Close()
2021-03-26 08:46:30 -06:00
f.l.Fatal(err)
2019-11-19 10:00:20 -07:00
}
}
func (f *Interface) run() {
// Launch n queues to read packets from udp
for i := 0; i < f.routines; i++ {
go f.listenOut(i)
}
// Launch n queues to read packets from tun dev
for i := 0; i < f.routines; i++ {
go f.listenIn(f.readers[i], i)
}
2019-11-19 10:00:20 -07:00
}
func (f *Interface) listenOut(i int) {
runtime.LockOSThread()
2019-11-19 10:00:20 -07:00
var li *udp.Conn
// TODO clean this up with a coherent interface for each outside connection
2019-11-19 10:00:20 -07:00
if i > 0 {
li = f.writers[i]
2019-11-19 10:00:20 -07:00
} else {
li = f.outside
}
lhh := f.lightHouse.NewRequestHandler()
conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
li.ListenOut(readOutsidePackets(f), lhHandleRequest(lhh, f), conntrackCache, i)
2019-11-19 10:00:20 -07:00
}
func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
runtime.LockOSThread()
2019-11-19 10:00:20 -07:00
packet := make([]byte, mtu)
out := make([]byte, mtu)
fwPacket := &firewall.Packet{}
2019-11-19 10:00:20 -07:00
nb := make([]byte, 12, 12)
conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
2019-11-19 10:00:20 -07:00
for {
n, err := reader.Read(packet)
2019-11-19 10:00:20 -07:00
if err != nil {
if errors.Is(err, os.ErrClosed) && f.closed.Load() {
2021-11-08 11:36:31 -07:00
return
}
2021-03-26 08:46:30 -06:00
f.l.WithError(err).Error("Error while reading outbound packet")
2019-11-19 10:00:20 -07:00
// This only seems to happen when something fatal happens to the fd, so exit.
os.Exit(2)
}
2021-03-26 08:46:30 -06:00
f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
2019-11-19 10:00:20 -07:00
}
}
func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
2019-11-19 10:00:20 -07:00
c.RegisterReloadCallback(f.reloadCA)
c.RegisterReloadCallback(f.reloadCertKey)
c.RegisterReloadCallback(f.reloadFirewall)
c.RegisterReloadCallback(f.reloadSendRecvError)
for _, udpConn := range f.writers {
c.RegisterReloadCallback(udpConn.ReloadConfig)
}
2019-11-19 10:00:20 -07:00
}
func (f *Interface) reloadCA(c *config.C) {
2019-11-19 10:00:20 -07:00
// reload and check regardless
// todo: need mutex?
2021-03-26 08:46:30 -06:00
newCAs, err := loadCAFromConfig(f.l, c)
2019-11-19 10:00:20 -07:00
if err != nil {
2021-03-26 08:46:30 -06:00
f.l.WithError(err).Error("Could not refresh trusted CA certificates")
2019-11-19 10:00:20 -07:00
return
}
2021-03-29 11:10:19 -06:00
f.caPool = newCAs
f.l.WithField("fingerprints", f.caPool.GetFingerprints()).Info("Trusted CA certificates refreshed")
2019-11-19 10:00:20 -07:00
}
func (f *Interface) reloadCertKey(c *config.C) {
2019-11-19 10:00:20 -07:00
// reload and check in all cases
cs, err := NewCertStateFromConfig(c)
if err != nil {
2021-03-26 08:46:30 -06:00
f.l.WithError(err).Error("Could not refresh client cert")
2019-11-19 10:00:20 -07:00
return
}
// did IP in cert change? if so, don't set
currentCert := f.certState.Load().certificate
oldIPs := currentCert.Details.Ips
2019-11-19 10:00:20 -07:00
newIPs := cs.certificate.Details.Ips
if len(oldIPs) > 0 && len(newIPs) > 0 && oldIPs[0].String() != newIPs[0].String() {
2021-03-26 08:46:30 -06:00
f.l.WithField("new_ip", newIPs[0]).WithField("old_ip", oldIPs[0]).Error("IP in new cert was different from old")
2019-11-19 10:00:20 -07:00
return
}
f.certState.Store(cs)
2021-03-26 08:46:30 -06:00
f.l.WithField("cert", cs.certificate).Info("Client cert refreshed from disk")
2019-11-19 10:00:20 -07:00
}
func (f *Interface) reloadFirewall(c *config.C) {
2019-11-19 10:00:20 -07:00
//TODO: need to trigger/detect if the certificate changed too
if c.HasChanged("firewall") == false {
2021-03-26 08:46:30 -06:00
f.l.Debug("No firewall config change detected")
2019-11-19 10:00:20 -07:00
return
}
fw, err := NewFirewallFromConfig(f.l, f.certState.Load().certificate, c)
2019-11-19 10:00:20 -07:00
if err != nil {
2021-03-26 08:46:30 -06:00
f.l.WithError(err).Error("Error while creating firewall during reload")
2019-11-19 10:00:20 -07:00
return
}
oldFw := f.firewall
conntrack := oldFw.Conntrack
conntrack.Lock()
defer conntrack.Unlock()
fw.rulesVersion = oldFw.rulesVersion + 1
// If rulesVersion is back to zero, we have wrapped all the way around. Be
// safe and just reset conntrack in this case.
if fw.rulesVersion == 0 {
2021-03-26 08:46:30 -06:00
f.l.WithField("firewallHash", fw.GetRuleHash()).
WithField("oldFirewallHash", oldFw.GetRuleHash()).
WithField("rulesVersion", fw.rulesVersion).
Warn("firewall rulesVersion has overflowed, resetting conntrack")
} else {
fw.Conntrack = conntrack
}
2019-11-19 10:00:20 -07:00
f.firewall = fw
oldFw.Destroy()
2021-03-26 08:46:30 -06:00
f.l.WithField("firewallHash", fw.GetRuleHash()).
2019-11-19 10:00:20 -07:00
WithField("oldFirewallHash", oldFw.GetRuleHash()).
WithField("rulesVersion", fw.rulesVersion).
2019-11-19 10:00:20 -07:00
Info("New firewall has been installed")
}
func (f *Interface) reloadSendRecvError(c *config.C) {
if c.InitialLoad() || c.HasChanged("listen.send_recv_error") {
stringValue := c.GetString("listen.send_recv_error", "always")
switch stringValue {
case "always":
f.sendRecvErrorConfig = sendRecvErrorAlways
case "never":
f.sendRecvErrorConfig = sendRecvErrorNever
case "private":
f.sendRecvErrorConfig = sendRecvErrorPrivate
default:
if c.GetBool("listen.send_recv_error", true) {
f.sendRecvErrorConfig = sendRecvErrorAlways
} else {
f.sendRecvErrorConfig = sendRecvErrorNever
}
}
f.l.WithField("sendRecvError", f.sendRecvErrorConfig.String()).
Info("Loaded send_recv_error config")
}
}
func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
2019-11-19 10:00:20 -07:00
ticker := time.NewTicker(i)
defer ticker.Stop()
udpStats := udp.NewUDPStatsEmitter(f.writers)
certExpirationGauge := metrics.GetOrRegisterGauge("certificate.ttl_seconds", nil)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
f.firewall.EmitStats()
f.handshakeManager.EmitStats()
udpStats()
certExpirationGauge.Update(int64(f.certState.Load().certificate.Details.NotAfter.Sub(time.Now()) / time.Second))
}
2019-11-19 10:00:20 -07:00
}
}
2021-11-08 11:36:31 -07:00
func (f *Interface) Close() error {
f.closed.Store(true)
2021-11-08 11:36:31 -07:00
// Release the tun device
return f.inside.Close()
}