diff --git a/overlay/route.go b/overlay/route.go index 793c8fd..64c624c 100644 --- a/overlay/route.go +++ b/overlay/route.go @@ -1,6 +1,7 @@ package overlay import ( + "bytes" "fmt" "math" "net" @@ -21,6 +22,35 @@ type Route struct { Install bool } +// Equal determines if a route that could be installed in the system route table is equal to another +// Via is ignored since that is only consumed within nebula itself +func (r Route) Equal(t Route) bool { + if !r.Cidr.IP.Equal(t.Cidr.IP) { + return false + } + if !bytes.Equal(r.Cidr.Mask, t.Cidr.Mask) { + return false + } + if r.Metric != t.Metric { + return false + } + if r.MTU != t.MTU { + return false + } + if r.Install != t.Install { + return false + } + return true +} + +func (r Route) String() string { + s := r.Cidr.String() + if r.Metric != 0 { + s += fmt.Sprintf(" metric: %v", r.Metric) + } + return s +} + func makeRouteTree(l *logrus.Logger, routes []Route, allowMTU bool) (*cidr.Tree4[iputil.VpnIp], error) { routeTree := cidr.NewTree4[iputil.VpnIp]() for _, r := range routes { diff --git a/overlay/tun.go b/overlay/tun.go index ca1a64a..cedd7fe 100644 --- a/overlay/tun.go +++ b/overlay/tun.go @@ -10,60 +10,63 @@ import ( const DefaultMTU = 1300 +// TODO: We may be able to remove routines type DeviceFactory func(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) func NewDeviceFromConfig(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) { - routes, err := parseRoutes(c, tunCidr) - if err != nil { - return nil, util.NewContextualError("Could not parse tun.routes", nil, err) - } - - unsafeRoutes, err := parseUnsafeRoutes(c, tunCidr) - if err != nil { - return nil, util.NewContextualError("Could not parse tun.unsafe_routes", nil, err) - } - routes = append(routes, unsafeRoutes...) - switch { case c.GetBool("tun.disabled", false): tun := newDisabledTun(tunCidr, c.GetInt("tun.tx_queue", 500), c.GetBool("stats.message_metrics", false), l) return tun, nil default: - return newTun( - l, - c.GetString("tun.dev", ""), - tunCidr, - c.GetInt("tun.mtu", DefaultMTU), - routes, - c.GetInt("tun.tx_queue", 500), - routines > 1, - c.GetBool("tun.use_system_route_table", false), - ) + return newTun(c, l, tunCidr, routines > 1) } } func NewFdDeviceFromConfig(fd *int) DeviceFactory { return func(c *config.C, l *logrus.Logger, tunCidr *net.IPNet, routines int) (Device, error) { - routes, err := parseRoutes(c, tunCidr) - if err != nil { - return nil, util.NewContextualError("Could not parse tun.routes", nil, err) - } - - unsafeRoutes, err := parseUnsafeRoutes(c, tunCidr) - if err != nil { - return nil, util.NewContextualError("Could not parse tun.unsafe_routes", nil, err) - } - routes = append(routes, unsafeRoutes...) - return newTunFromFd( - l, - *fd, - tunCidr, - c.GetInt("tun.mtu", DefaultMTU), - routes, - c.GetInt("tun.tx_queue", 500), - c.GetBool("tun.use_system_route_table", false), - ) - + return newTunFromFd(c, l, *fd, tunCidr) } } + +func getAllRoutesFromConfig(c *config.C, cidr *net.IPNet, initial bool) (bool, []Route, error) { + if !initial && !c.HasChanged("tun.routes") && !c.HasChanged("tun.unsafe_routes") { + return false, nil, nil + } + + routes, err := parseRoutes(c, cidr) + if err != nil { + return true, nil, util.NewContextualError("Could not parse tun.routes", nil, err) + } + + unsafeRoutes, err := parseUnsafeRoutes(c, cidr) + if err != nil { + return true, nil, util.NewContextualError("Could not parse tun.unsafe_routes", nil, err) + } + + routes = append(routes, unsafeRoutes...) + return true, routes, nil +} + +// findRemovedRoutes will return all routes that are not present in the newRoutes list and would affect the system route table. +// Via is not used to evaluate since it does not affect the system route table. +func findRemovedRoutes(newRoutes, oldRoutes []Route) []Route { + var removed []Route + has := func(entry Route) bool { + for _, check := range newRoutes { + if check.Equal(entry) { + return true + } + } + return false + } + + for _, oldEntry := range oldRoutes { + if !has(oldEntry) { + removed = append(removed, oldEntry) + } + } + + return removed +} diff --git a/overlay/tun_android.go b/overlay/tun_android.go index c5c52db..c15827f 100644 --- a/overlay/tun_android.go +++ b/overlay/tun_android.go @@ -8,45 +8,57 @@ import ( "io" "net" "os" + "sync/atomic" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" ) type tun struct { io.ReadWriteCloser fd int cidr *net.IPNet - routeTree *cidr.Tree4[iputil.VpnIp] + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] l *logrus.Logger } -func newTunFromFd(l *logrus.Logger, deviceFd int, cidr *net.IPNet, _ int, routes []Route, _ int, _ bool) (*tun, error) { - routeTree, err := makeRouteTree(l, routes, false) - if err != nil { - return nil, err - } - +func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr *net.IPNet) (*tun, error) { // XXX Android returns an fd in non-blocking mode which is necessary for shutdown to work properly. // Be sure not to call file.Fd() as it will set the fd to blocking mode. file := os.NewFile(uintptr(deviceFd), "/dev/net/tun") - return &tun{ + t := &tun{ ReadWriteCloser: file, fd: deviceFd, cidr: cidr, l: l, - routeTree: routeTree, - }, nil + } + + err := t.reload(c, true) + if err != nil { + return nil, err + } + + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil } -func newTun(_ *logrus.Logger, _ string, _ *net.IPNet, _ int, _ []Route, _ int, _ bool, _ bool) (*tun, error) { +func newTun(_ *config.C, _ *logrus.Logger, _ *net.IPNet, _ bool) (*tun, error) { return nil, fmt.Errorf("newTun not supported in Android") } func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - _, r := t.routeTree.MostSpecificContains(ip) + _, r := t.routeTree.Load().MostSpecificContains(ip) return r } @@ -54,6 +66,27 @@ func (t tun) Activate() error { return nil } +func (t *tun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } + + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes + t.Routes.Store(&routes) + t.routeTree.Store(routeTree) + return nil +} + func (t *tun) Cidr() *net.IPNet { return t.cidr } diff --git a/overlay/tun_darwin.go b/overlay/tun_darwin.go index caec580..1c63828 100644 --- a/overlay/tun_darwin.go +++ b/overlay/tun_darwin.go @@ -9,12 +9,15 @@ import ( "io" "net" "os" + "sync/atomic" "syscall" "unsafe" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" netroute "golang.org/x/net/route" "golang.org/x/sys/unix" ) @@ -24,8 +27,9 @@ type tun struct { Device string cidr *net.IPNet DefaultMTU int - Routes []Route - routeTree *cidr.Tree4[iputil.VpnIp] + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] + linkAddr *netroute.LinkAddr l *logrus.Logger // cache out buffer since we need to prepend 4 bytes for tun metadata @@ -69,12 +73,8 @@ type ifreqMTU struct { pad [8]byte } -func newTun(l *logrus.Logger, name string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (*tun, error) { - routeTree, err := makeRouteTree(l, routes, false) - if err != nil { - return nil, err - } - +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*tun, error) { + name := c.GetString("tun.dev", "") ifIndex := -1 if name != "" && name != "utun" { _, err := fmt.Sscanf(name, "utun%d", &ifIndex) @@ -142,17 +142,27 @@ func newTun(l *logrus.Logger, name string, cidr *net.IPNet, defaultMTU int, rout file := os.NewFile(uintptr(fd), "") - tun := &tun{ + t := &tun{ ReadWriteCloser: file, Device: name, cidr: cidr, - DefaultMTU: defaultMTU, - Routes: routes, - routeTree: routeTree, + DefaultMTU: c.GetInt("tun.mtu", DefaultMTU), l: l, } - return tun, nil + err = t.reload(c, true) + if err != nil { + return nil, err + } + + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil } func (t *tun) deviceBytes() (o [16]byte) { @@ -162,7 +172,7 @@ func (t *tun) deviceBytes() (o [16]byte) { return } -func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*tun, error) { +func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*tun, error) { return nil, fmt.Errorf("newTunFromFd not supported in Darwin") } @@ -260,6 +270,7 @@ func (t *tun) Activate() error { if linkAddr == nil { return fmt.Errorf("unable to discover link_addr for tun interface") } + t.linkAddr = linkAddr copy(routeAddr.IP[:], addr[:]) copy(maskAddr.IP[:], mask[:]) @@ -278,33 +289,48 @@ func (t *tun) Activate() error { } // Unsafe path routes - for _, r := range t.Routes { - if r.Via == nil || !r.Install { - // We don't allow route MTUs so only install routes with a via - continue - } + return t.addRoutes(false) +} - copy(routeAddr.IP[:], r.Cidr.IP.To4()) - copy(maskAddr.IP[:], net.IP(r.Cidr.Mask).To4()) +func (t *tun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } - err = addRoute(routeSock, routeAddr, maskAddr, linkAddr) + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) if err != nil { - if errors.Is(err, unix.EEXIST) { - t.l.WithField("route", r.Cidr). - Warnf("unable to add unsafe_route, identical route already exists") - } else { - return err - } + util.LogWithContextIfNeeded("Failed to remove routes", err, t.l) } - // TODO how to set metric + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // Catch any stray logs + util.LogWithContextIfNeeded("Failed to add routes", err, t.l) + } } return nil } func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - ok, r := t.routeTree.MostSpecificContains(ip) + ok, r := t.routeTree.Load().MostSpecificContains(ip) if ok { return r } @@ -340,6 +366,88 @@ func getLinkAddr(name string) (*netroute.LinkAddr, error) { return nil, nil } +func (t *tun) addRoutes(logErrors bool) error { + routeSock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC) + if err != nil { + return fmt.Errorf("unable to create AF_ROUTE socket: %v", err) + } + + defer func() { + unix.Shutdown(routeSock, unix.SHUT_RDWR) + err := unix.Close(routeSock) + if err != nil { + t.l.WithError(err).Error("failed to close AF_ROUTE socket") + } + }() + + routeAddr := &netroute.Inet4Addr{} + maskAddr := &netroute.Inet4Addr{} + routes := *t.Routes.Load() + for _, r := range routes { + if r.Via == nil || !r.Install { + // We don't allow route MTUs so only install routes with a via + continue + } + + copy(routeAddr.IP[:], r.Cidr.IP.To4()) + copy(maskAddr.IP[:], net.IP(r.Cidr.Mask).To4()) + + err := addRoute(routeSock, routeAddr, maskAddr, t.linkAddr) + if err != nil { + if errors.Is(err, unix.EEXIST) { + t.l.WithField("route", r.Cidr). + Warnf("unable to add unsafe_route, identical route already exists") + } else { + retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + } else { + return retErr + } + } + } else { + t.l.WithField("route", r).Info("Added route") + } + } + + return nil +} + +func (t *tun) removeRoutes(routes []Route) error { + routeSock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC) + if err != nil { + return fmt.Errorf("unable to create AF_ROUTE socket: %v", err) + } + + defer func() { + unix.Shutdown(routeSock, unix.SHUT_RDWR) + err := unix.Close(routeSock) + if err != nil { + t.l.WithError(err).Error("failed to close AF_ROUTE socket") + } + }() + + routeAddr := &netroute.Inet4Addr{} + maskAddr := &netroute.Inet4Addr{} + + for _, r := range routes { + if !r.Install { + continue + } + + copy(routeAddr.IP[:], r.Cidr.IP.To4()) + copy(maskAddr.IP[:], net.IP(r.Cidr.Mask).To4()) + + err := delRoute(routeSock, routeAddr, maskAddr, t.linkAddr) + if err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } + return nil +} + func addRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr) error { r := netroute.RouteMessage{ Version: unix.RTM_VERSION, @@ -365,6 +473,30 @@ func addRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr) return nil } +func delRoute(sock int, addr, mask *netroute.Inet4Addr, link *netroute.LinkAddr) error { + r := netroute.RouteMessage{ + Version: unix.RTM_VERSION, + Type: unix.RTM_DELETE, + Seq: 1, + Addrs: []netroute.Addr{ + unix.RTAX_DST: addr, + unix.RTAX_GATEWAY: link, + unix.RTAX_NETMASK: mask, + }, + } + + data, err := r.Marshal() + if err != nil { + return fmt.Errorf("failed to create route.RouteMessage: %w", err) + } + _, err = unix.Write(sock, data[:]) + if err != nil { + return fmt.Errorf("failed to write route.RouteMessage to socket: %w", err) + } + + return nil +} + func (t *tun) Read(to []byte) (int, error) { buf := make([]byte, len(to)+4) diff --git a/overlay/tun_freebsd.go b/overlay/tun_freebsd.go index 338b8f6..3b1b80f 100644 --- a/overlay/tun_freebsd.go +++ b/overlay/tun_freebsd.go @@ -13,12 +13,15 @@ import ( "os" "os/exec" "strconv" + "sync/atomic" "syscall" "unsafe" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" ) const ( @@ -47,8 +50,8 @@ type tun struct { Device string cidr *net.IPNet MTU int - Routes []Route - routeTree *cidr.Tree4[iputil.VpnIp] + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] l *logrus.Logger io.ReadWriteCloser @@ -76,14 +79,15 @@ func (t *tun) Close() error { return nil } -func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*tun, error) { +func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*tun, error) { return nil, fmt.Errorf("newTunFromFd not supported in FreeBSD") } -func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (*tun, error) { +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*tun, error) { // Try to open existing tun device var file *os.File var err error + deviceName := c.GetString("tun.dev", "") if deviceName != "" { file, err = os.OpenFile("/dev/"+deviceName, os.O_RDWR, 0) } @@ -144,47 +148,85 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int ioctl(fd, syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&ifrr))) } - routeTree, err := makeRouteTree(l, routes, false) + t := &tun{ + ReadWriteCloser: file, + Device: deviceName, + cidr: cidr, + MTU: c.GetInt("tun.mtu", DefaultMTU), + l: l, + } + + err = t.reload(c, true) if err != nil { return nil, err } - return &tun{ - ReadWriteCloser: file, - Device: deviceName, - cidr: cidr, - MTU: defaultMTU, - Routes: routes, - routeTree: routeTree, - l: l, - }, nil + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil } func (t *tun) Activate() error { var err error // TODO use syscalls instead of exec.Command - t.l.Debug("command: ifconfig", t.Device, t.cidr.String(), t.cidr.IP.String()) - if err = exec.Command("/sbin/ifconfig", t.Device, t.cidr.String(), t.cidr.IP.String()).Run(); err != nil { + cmd := exec.Command("/sbin/ifconfig", t.Device, t.cidr.String(), t.cidr.IP.String()) + t.l.Debug("command: ", cmd.String()) + if err = cmd.Run(); err != nil { return fmt.Errorf("failed to run 'ifconfig': %s", err) } - t.l.Debug("command: route", "-n", "add", "-net", t.cidr.String(), "-interface", t.Device) - if err = exec.Command("/sbin/route", "-n", "add", "-net", t.cidr.String(), "-interface", t.Device).Run(); err != nil { + + cmd = exec.Command("/sbin/route", "-n", "add", "-net", t.cidr.String(), "-interface", t.Device) + t.l.Debug("command: ", cmd.String()) + if err = cmd.Run(); err != nil { return fmt.Errorf("failed to run 'route add': %s", err) } - t.l.Debug("command: ifconfig", t.Device, "mtu", strconv.Itoa(t.MTU)) - if err = exec.Command("/sbin/ifconfig", t.Device, "mtu", strconv.Itoa(t.MTU)).Run(); err != nil { + + cmd = exec.Command("/sbin/ifconfig", t.Device, "mtu", strconv.Itoa(t.MTU)) + t.l.Debug("command: ", cmd.String()) + if err = cmd.Run(); err != nil { return fmt.Errorf("failed to run 'ifconfig': %s", err) } + // Unsafe path routes - for _, r := range t.Routes { - if r.Via == nil || !r.Install { - // We don't allow route MTUs so only install routes with a via - continue + return t.addRoutes(false) +} + +func (t *tun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } + + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) + if err != nil { + util.LogWithContextIfNeeded("Failed to remove routes", err, t.l) } - t.l.Debug("command: route", "-n", "add", "-net", r.Cidr.String(), "-interface", t.Device) - if err = exec.Command("/sbin/route", "-n", "add", "-net", r.Cidr.String(), "-interface", t.Device).Run(); err != nil { - return fmt.Errorf("failed to run 'route add' for unsafe_route %s: %s", r.Cidr.String(), err) + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // Catch any stray logs + util.LogWithContextIfNeeded("Failed to add routes", err, t.l) } } @@ -192,7 +234,7 @@ func (t *tun) Activate() error { } func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - _, r := t.routeTree.MostSpecificContains(ip) + _, r := t.routeTree.Load().MostSpecificContains(ip) return r } @@ -208,6 +250,46 @@ func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) { return nil, fmt.Errorf("TODO: multiqueue not implemented for freebsd") } +func (t *tun) addRoutes(logErrors bool) error { + routes := *t.Routes.Load() + for _, r := range routes { + if r.Via == nil || !r.Install { + // We don't allow route MTUs so only install routes with a via + continue + } + + cmd := exec.Command("/sbin/route", "-n", "add", "-net", r.Cidr.String(), "-interface", t.Device) + t.l.Debug("command: ", cmd.String()) + if err := cmd.Run(); err != nil { + retErr := util.NewContextualError("failed to run 'route add' for unsafe_route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + } else { + return retErr + } + } + } + + return nil +} + +func (t *tun) removeRoutes(routes []Route) error { + for _, r := range routes { + if !r.Install { + continue + } + + cmd := exec.Command("/sbin/route", "-n", "delete", "-net", r.Cidr.String(), "-interface", t.Device) + t.l.Debug("command: ", cmd.String()) + if err := cmd.Run(); err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } + return nil +} + func (t *tun) deviceBytes() (o [16]byte) { for i, c := range t.Device { o[i] = byte(c) diff --git a/overlay/tun_ios.go b/overlay/tun_ios.go index ce65b33..ba15d66 100644 --- a/overlay/tun_ios.go +++ b/overlay/tun_ios.go @@ -10,43 +10,78 @@ import ( "net" "os" "sync" + "sync/atomic" "syscall" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" ) type tun struct { io.ReadWriteCloser cidr *net.IPNet - routeTree *cidr.Tree4[iputil.VpnIp] + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] + l *logrus.Logger } -func newTun(_ *logrus.Logger, _ string, _ *net.IPNet, _ int, _ []Route, _ int, _ bool, _ bool) (*tun, error) { +func newTun(_ *config.C, _ *logrus.Logger, _ *net.IPNet, _ bool) (*tun, error) { return nil, fmt.Errorf("newTun not supported in iOS") } -func newTunFromFd(l *logrus.Logger, deviceFd int, cidr *net.IPNet, _ int, routes []Route, _ int, _ bool) (*tun, error) { - routeTree, err := makeRouteTree(l, routes, false) +func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr *net.IPNet) (*tun, error) { + file := os.NewFile(uintptr(deviceFd), "/dev/tun") + t := &tun{ + cidr: cidr, + ReadWriteCloser: &tunReadCloser{f: file}, + l: l, + } + + err := t.reload(c, true) if err != nil { return nil, err } - file := os.NewFile(uintptr(deviceFd), "/dev/tun") - return &tun{ - cidr: cidr, - ReadWriteCloser: &tunReadCloser{f: file}, - routeTree: routeTree, - }, nil + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil } func (t *tun) Activate() error { return nil } +func (t *tun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } + + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes + t.Routes.Store(&routes) + t.routeTree.Store(routeTree) + return nil +} + func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - _, r := t.routeTree.MostSpecificContains(ip) + _, r := t.routeTree.Load().MostSpecificContains(ip) return r } diff --git a/overlay/tun_linux.go b/overlay/tun_linux.go index a576bf3..1f6580e 100644 --- a/overlay/tun_linux.go +++ b/overlay/tun_linux.go @@ -15,21 +15,25 @@ import ( "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" ) type tun struct { io.ReadWriteCloser - fd int - Device string - cidr *net.IPNet - MaxMTU int - DefaultMTU int - TXQueueLen int + fd int + Device string + cidr *net.IPNet + MaxMTU int + DefaultMTU int + TXQueueLen int + deviceIndex int + ioctlFd uintptr - Routes []Route + Routes atomic.Pointer[[]Route] routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] routeChan chan struct{} useSystemRoutes bool @@ -61,30 +65,20 @@ type ifreqQLEN struct { pad [8]byte } -func newTunFromFd(l *logrus.Logger, deviceFd int, cidr *net.IPNet, defaultMTU int, routes []Route, txQueueLen int, useSystemRoutes bool) (*tun, error) { - routeTree, err := makeRouteTree(l, routes, true) +func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr *net.IPNet) (*tun, error) { + file := os.NewFile(uintptr(deviceFd), "/dev/net/tun") + + t, err := newTunGeneric(c, l, file, cidr) if err != nil { return nil, err } - file := os.NewFile(uintptr(deviceFd), "/dev/net/tun") + t.Device = "tun0" - t := &tun{ - ReadWriteCloser: file, - fd: int(file.Fd()), - Device: "tun0", - cidr: cidr, - DefaultMTU: defaultMTU, - TXQueueLen: txQueueLen, - Routes: routes, - useSystemRoutes: useSystemRoutes, - l: l, - } - t.routeTree.Store(routeTree) return t, nil } -func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, txQueueLen int, multiqueue bool, useSystemRoutes bool) (*tun, error) { +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, multiqueue bool) (*tun, error) { fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0) if err != nil { return nil, err @@ -95,46 +89,113 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int if multiqueue { req.Flags |= unix.IFF_MULTI_QUEUE } - copy(req.Name[:], deviceName) + copy(req.Name[:], c.GetString("tun.dev", "")) if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil { return nil, err } name := strings.Trim(string(req.Name[:]), "\x00") file := os.NewFile(uintptr(fd), "/dev/net/tun") - - maxMTU := defaultMTU - for _, r := range routes { - if r.MTU == 0 { - r.MTU = defaultMTU - } - - if r.MTU > maxMTU { - maxMTU = r.MTU - } - } - - routeTree, err := makeRouteTree(l, routes, true) + t, err := newTunGeneric(c, l, file, cidr) if err != nil { return nil, err } + t.Device = name + + return t, nil +} + +func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, cidr *net.IPNet) (*tun, error) { t := &tun{ ReadWriteCloser: file, fd: int(file.Fd()), - Device: name, cidr: cidr, - MaxMTU: maxMTU, - DefaultMTU: defaultMTU, - TXQueueLen: txQueueLen, - Routes: routes, - useSystemRoutes: useSystemRoutes, + TXQueueLen: c.GetInt("tun.tx_queue", 500), + useSystemRoutes: c.GetBool("tun.use_system_route_table", false), l: l, } - t.routeTree.Store(routeTree) + + err := t.reload(c, true) + if err != nil { + return nil, err + } + + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + return t, nil } +func (t *tun) reload(c *config.C, initial bool) error { + routeChange, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } + + if !initial && !routeChange && !c.HasChanged("tun.mtu") { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, true) + if err != nil { + return err + } + + oldDefaultMTU := t.DefaultMTU + oldMaxMTU := t.MaxMTU + newDefaultMTU := c.GetInt("tun.mtu", DefaultMTU) + newMaxMTU := newDefaultMTU + for i, r := range routes { + if r.MTU == 0 { + routes[i].MTU = newDefaultMTU + } + + if r.MTU > t.MaxMTU { + newMaxMTU = r.MTU + } + } + + t.MaxMTU = newMaxMTU + t.DefaultMTU = newDefaultMTU + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + if oldMaxMTU != newMaxMTU { + t.setMTU() + t.l.Infof("Set max MTU to %v was %v", t.MaxMTU, oldMaxMTU) + } + + if oldDefaultMTU != newDefaultMTU { + err := t.setDefaultRoute() + if err != nil { + t.l.Warn(err) + } else { + t.l.Infof("Set default MTU to %v was %v", t.DefaultMTU, oldDefaultMTU) + } + } + + // Remove first, if the system removes a wanted route hopefully it will be re-added next + t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) + + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // This should never be called since addRoutes should log its own errors in a reload condition + util.LogWithContextIfNeeded("Failed to refresh routes", err, t.l) + } + } + + return nil +} + func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) { fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0) if err != nil { @@ -208,7 +269,7 @@ func (t *tun) Activate() error { if err != nil { return err } - fd := uintptr(s) + t.ioctlFd = uintptr(s) ifra := ifreqAddr{ Name: devName, @@ -219,52 +280,76 @@ func (t *tun) Activate() error { } // Set the device ip address - if err = ioctl(fd, unix.SIOCSIFADDR, uintptr(unsafe.Pointer(&ifra))); err != nil { + if err = ioctl(t.ioctlFd, unix.SIOCSIFADDR, uintptr(unsafe.Pointer(&ifra))); err != nil { return fmt.Errorf("failed to set tun address: %s", err) } // Set the device network ifra.Addr.Addr = mask - if err = ioctl(fd, unix.SIOCSIFNETMASK, uintptr(unsafe.Pointer(&ifra))); err != nil { + if err = ioctl(t.ioctlFd, unix.SIOCSIFNETMASK, uintptr(unsafe.Pointer(&ifra))); err != nil { return fmt.Errorf("failed to set tun netmask: %s", err) } // Set the device name ifrf := ifReq{Name: devName} - if err = ioctl(fd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil { + if err = ioctl(t.ioctlFd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil { return fmt.Errorf("failed to set tun device name: %s", err) } - // Set the MTU on the device - ifm := ifreqMTU{Name: devName, MTU: int32(t.MaxMTU)} - if err = ioctl(fd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil { - // This is currently a non fatal condition because the route table must have the MTU set appropriately as well - t.l.WithError(err).Error("Failed to set tun mtu") - } + // Setup our default MTU + t.setMTU() // Set the transmit queue length ifrq := ifreqQLEN{Name: devName, Value: int32(t.TXQueueLen)} - if err = ioctl(fd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil { + if err = ioctl(t.ioctlFd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil { // If we can't set the queue length nebula will still work but it may lead to packet loss t.l.WithError(err).Error("Failed to set tun tx queue length") } // Bring up the interface ifrf.Flags = ifrf.Flags | unix.IFF_UP - if err = ioctl(fd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil { + if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil { return fmt.Errorf("failed to bring the tun device up: %s", err) } - // Set the routes link, err := netlink.LinkByName(t.Device) if err != nil { return fmt.Errorf("failed to get tun device link: %s", err) } + t.deviceIndex = link.Attrs().Index + if err = t.setDefaultRoute(); err != nil { + return err + } + + // Set the routes + if err = t.addRoutes(false); err != nil { + return err + } + + // Run the interface + ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING + if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil { + return fmt.Errorf("failed to run tun device: %s", err) + } + + return nil +} + +func (t *tun) setMTU() { + // Set the MTU on the device + ifm := ifreqMTU{Name: t.deviceBytes(), MTU: int32(t.MaxMTU)} + if err := ioctl(t.ioctlFd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil { + // This is currently a non fatal condition because the route table must have the MTU set appropriately as well + t.l.WithError(err).Error("Failed to set tun mtu") + } +} + +func (t *tun) setDefaultRoute() error { // Default route dr := &net.IPNet{IP: t.cidr.IP.Mask(t.cidr.Mask), Mask: t.cidr.Mask} nr := netlink.Route{ - LinkIndex: link.Attrs().Index, + LinkIndex: t.deviceIndex, Dst: dr, MTU: t.DefaultMTU, AdvMSS: t.advMSS(Route{}), @@ -274,19 +359,24 @@ func (t *tun) Activate() error { Table: unix.RT_TABLE_MAIN, Type: unix.RTN_UNICAST, } - err = netlink.RouteReplace(&nr) + err := netlink.RouteReplace(&nr) if err != nil { return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err) } + return nil +} + +func (t *tun) addRoutes(logErrors bool) error { // Path routes - for _, r := range t.Routes { + routes := *t.Routes.Load() + for _, r := range routes { if !r.Install { continue } nr := netlink.Route{ - LinkIndex: link.Attrs().Index, + LinkIndex: t.deviceIndex, Dst: r.Cidr, MTU: r.MTU, AdvMSS: t.advMSS(r), @@ -297,21 +387,49 @@ func (t *tun) Activate() error { nr.Priority = r.Metric } - err = netlink.RouteAdd(&nr) + err := netlink.RouteReplace(&nr) if err != nil { - return fmt.Errorf("failed to set mtu %v on route %v; %v", r.MTU, r.Cidr, err) + retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + } else { + return retErr + } + } else { + t.l.WithField("route", r).Info("Added route") } } - // Run the interface - ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING - if err = ioctl(fd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil { - return fmt.Errorf("failed to run tun device: %s", err) - } - return nil } +func (t *tun) removeRoutes(routes []Route) { + for _, r := range routes { + if !r.Install { + continue + } + + nr := netlink.Route{ + LinkIndex: t.deviceIndex, + Dst: r.Cidr, + MTU: r.MTU, + AdvMSS: t.advMSS(r), + Scope: unix.RT_SCOPE_LINK, + } + + if r.Metric > 0 { + nr.Priority = r.Metric + } + + err := netlink.RouteDel(&nr) + if err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } +} + func (t *tun) Cidr() *net.IPNet { return t.cidr } @@ -410,5 +528,9 @@ func (t *tun) Close() error { t.ReadWriteCloser.Close() } + if t.ioctlFd > 0 { + os.NewFile(t.ioctlFd, "ioctlFd").Close() + } + return nil } diff --git a/overlay/tun_netbsd.go b/overlay/tun_netbsd.go index b1135fe..cc0216f 100644 --- a/overlay/tun_netbsd.go +++ b/overlay/tun_netbsd.go @@ -11,12 +11,15 @@ import ( "os/exec" "regexp" "strconv" + "sync/atomic" "syscall" "unsafe" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" ) type ifreqDestroy struct { @@ -28,8 +31,8 @@ type tun struct { Device string cidr *net.IPNet MTU int - Routes []Route - routeTree *cidr.Tree4[iputil.VpnIp] + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] l *logrus.Logger io.ReadWriteCloser @@ -56,43 +59,50 @@ func (t *tun) Close() error { return nil } -func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*tun, error) { +func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*tun, error) { return nil, fmt.Errorf("newTunFromFd not supported in NetBSD") } var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`) -func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (*tun, error) { +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*tun, error) { // Try to open tun device var file *os.File var err error + deviceName := c.GetString("tun.dev", "") if deviceName == "" { return nil, fmt.Errorf("a device name in the format of /dev/tunN must be specified") } if !deviceNameRE.MatchString(deviceName) { return nil, fmt.Errorf("a device name in the format of /dev/tunN must be specified") } + file, err = os.OpenFile("/dev/"+deviceName, os.O_RDWR, 0) - if err != nil { return nil, err } - routeTree, err := makeRouteTree(l, routes, false) - - if err != nil { - return nil, err - } - - return &tun{ + t := &tun{ ReadWriteCloser: file, Device: deviceName, cidr: cidr, - MTU: defaultMTU, - Routes: routes, - routeTree: routeTree, + MTU: c.GetInt("tun.mtu", DefaultMTU), l: l, - }, nil + } + + err = t.reload(c, true) + if err != nil { + return nil, err + } + + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil } func (t *tun) Activate() error { @@ -116,17 +126,42 @@ func (t *tun) Activate() error { if err = cmd.Run(); err != nil { return fmt.Errorf("failed to run 'ifconfig': %s", err) } + // Unsafe path routes - for _, r := range t.Routes { - if r.Via == nil || !r.Install { - // We don't allow route MTUs so only install routes with a via - continue + return t.addRoutes(false) +} + +func (t *tun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } + + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) + if err != nil { + util.LogWithContextIfNeeded("Failed to remove routes", err, t.l) } - cmd = exec.Command("/sbin/route", "-n", "add", "-net", r.Cidr.String(), t.cidr.IP.String()) - t.l.Debug("command: ", cmd.String()) - if err = cmd.Run(); err != nil { - return fmt.Errorf("failed to run 'route add' for unsafe_route %s: %s", r.Cidr.String(), err) + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // Catch any stray logs + util.LogWithContextIfNeeded("Failed to add routes", err, t.l) } } @@ -134,7 +169,7 @@ func (t *tun) Activate() error { } func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - _, r := t.routeTree.MostSpecificContains(ip) + _, r := t.routeTree.Load().MostSpecificContains(ip) return r } @@ -150,6 +185,46 @@ func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) { return nil, fmt.Errorf("TODO: multiqueue not implemented for netbsd") } +func (t *tun) addRoutes(logErrors bool) error { + routes := *t.Routes.Load() + for _, r := range routes { + if r.Via == nil || !r.Install { + // We don't allow route MTUs so only install routes with a via + continue + } + + cmd := exec.Command("/sbin/route", "-n", "add", "-net", r.Cidr.String(), t.cidr.IP.String()) + t.l.Debug("command: ", cmd.String()) + if err := cmd.Run(); err != nil { + retErr := util.NewContextualError("failed to run 'route add' for unsafe_route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + } else { + return retErr + } + } + } + + return nil +} + +func (t *tun) removeRoutes(routes []Route) error { + for _, r := range routes { + if !r.Install { + continue + } + + cmd := exec.Command("/sbin/route", "-n", "delete", "-net", r.Cidr.String(), t.cidr.IP.String()) + t.l.Debug("command: ", cmd.String()) + if err := cmd.Run(); err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } + return nil +} + func (t *tun) deviceBytes() (o [16]byte) { for i, c := range t.Device { o[i] = byte(c) diff --git a/overlay/tun_openbsd.go b/overlay/tun_openbsd.go index 45c06dc..53f57b1 100644 --- a/overlay/tun_openbsd.go +++ b/overlay/tun_openbsd.go @@ -11,19 +11,22 @@ import ( "os/exec" "regexp" "strconv" + "sync/atomic" "syscall" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" ) type tun struct { Device string cidr *net.IPNet MTU int - Routes []Route - routeTree *cidr.Tree4[iputil.VpnIp] + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] l *logrus.Logger io.ReadWriteCloser @@ -40,13 +43,14 @@ func (t *tun) Close() error { return nil } -func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*tun, error) { +func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*tun, error) { return nil, fmt.Errorf("newTunFromFd not supported in OpenBSD") } var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`) -func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (*tun, error) { +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*tun, error) { + deviceName := c.GetString("tun.dev", "") if deviceName == "" { return nil, fmt.Errorf("a device name in the format of tunN must be specified") } @@ -60,20 +64,64 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int return nil, err } - routeTree, err := makeRouteTree(l, routes, false) + t := &tun{ + ReadWriteCloser: file, + Device: deviceName, + cidr: cidr, + MTU: c.GetInt("tun.mtu", DefaultMTU), + l: l, + } + + err = t.reload(c, true) if err != nil { return nil, err } - return &tun{ - ReadWriteCloser: file, - Device: deviceName, - cidr: cidr, - MTU: defaultMTU, - Routes: routes, - routeTree: routeTree, - l: l, - }, nil + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil +} + +func (t *tun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } + + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) + if err != nil { + util.LogWithContextIfNeeded("Failed to remove routes", err, t.l) + } + + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // Catch any stray logs + util.LogWithContextIfNeeded("Failed to add routes", err, t.l) + } + } + + return nil } func (t *tun) Activate() error { @@ -98,25 +146,52 @@ func (t *tun) Activate() error { } // Unsafe path routes - for _, r := range t.Routes { + return t.addRoutes(false) +} + +func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { + _, r := t.routeTree.Load().MostSpecificContains(ip) + return r +} + +func (t *tun) addRoutes(logErrors bool) error { + routes := *t.Routes.Load() + for _, r := range routes { if r.Via == nil || !r.Install { // We don't allow route MTUs so only install routes with a via continue } - cmd = exec.Command("/sbin/route", "-n", "add", "-inet", r.Cidr.String(), t.cidr.IP.String()) + cmd := exec.Command("/sbin/route", "-n", "add", "-inet", r.Cidr.String(), t.cidr.IP.String()) t.l.Debug("command: ", cmd.String()) - if err = cmd.Run(); err != nil { - return fmt.Errorf("failed to run 'route add' for unsafe_route %s: %s", r.Cidr.String(), err) + if err := cmd.Run(); err != nil { + retErr := util.NewContextualError("failed to run 'route add' for unsafe_route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + } else { + return retErr + } } } return nil } -func (t *tun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - _, r := t.routeTree.MostSpecificContains(ip) - return r +func (t *tun) removeRoutes(routes []Route) error { + for _, r := range routes { + if !r.Install { + continue + } + + cmd := exec.Command("/sbin/route", "-n", "delete", "-inet", r.Cidr.String(), t.cidr.IP.String()) + t.l.Debug("command: ", cmd.String()) + if err := cmd.Run(); err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } + return nil } func (t *tun) Cidr() *net.IPNet { diff --git a/overlay/tun_tester.go b/overlay/tun_tester.go index 964315a..3833983 100644 --- a/overlay/tun_tester.go +++ b/overlay/tun_tester.go @@ -12,6 +12,7 @@ import ( "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" ) @@ -27,14 +28,18 @@ type TestTun struct { TxPackets chan []byte // Packets transmitted outside by nebula } -func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, _ int, routes []Route, _ int, _ bool, _ bool) (*TestTun, error) { +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*TestTun, error) { + _, routes, err := getAllRoutesFromConfig(c, cidr, true) + if err != nil { + return nil, err + } routeTree, err := makeRouteTree(l, routes, false) if err != nil { return nil, err } return &TestTun{ - Device: deviceName, + Device: c.GetString("tun.dev", ""), cidr: cidr, Routes: routes, routeTree: routeTree, @@ -44,7 +49,7 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, _ int, routes }, nil } -func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (*TestTun, error) { +func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (*TestTun, error) { return nil, fmt.Errorf("newTunFromFd not supported") } diff --git a/overlay/tun_water_windows.go b/overlay/tun_water_windows.go index e27cff2..a1acd2b 100644 --- a/overlay/tun_water_windows.go +++ b/overlay/tun_water_windows.go @@ -6,10 +6,13 @@ import ( "net" "os/exec" "strconv" + "sync/atomic" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" "github.com/songgao/water" ) @@ -17,25 +20,34 @@ type waterTun struct { Device string cidr *net.IPNet MTU int - Routes []Route - routeTree *cidr.Tree4[iputil.VpnIp] - + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] + l *logrus.Logger + f *net.Interface *water.Interface } -func newWaterTun(l *logrus.Logger, cidr *net.IPNet, defaultMTU int, routes []Route) (*waterTun, error) { - routeTree, err := makeRouteTree(l, routes, false) +func newWaterTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*waterTun, error) { + // NOTE: You cannot set the deviceName under Windows, so you must check tun.Device after calling .Activate() + t := &waterTun{ + cidr: cidr, + MTU: c.GetInt("tun.mtu", DefaultMTU), + l: l, + } + + err := t.reload(c, true) if err != nil { return nil, err } - // NOTE: You cannot set the deviceName under Windows, so you must check tun.Device after calling .Activate() - return &waterTun{ - cidr: cidr, - MTU: defaultMTU, - Routes: routes, - routeTree: routeTree, - }, nil + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil } func (t *waterTun) Activate() error { @@ -74,30 +86,104 @@ func (t *waterTun) Activate() error { return fmt.Errorf("failed to run 'netsh' to set MTU: %s", err) } - iface, err := net.InterfaceByName(t.Device) + t.f, err = net.InterfaceByName(t.Device) if err != nil { return fmt.Errorf("failed to find interface named %s: %v", t.Device, err) } - for _, r := range t.Routes { - if r.Via == nil || !r.Install { - // We don't allow route MTUs so only install routes with a via - continue - } + err = t.addRoutes(false) + if err != nil { + return err + } - err = exec.Command( - "C:\\Windows\\System32\\route.exe", "add", r.Cidr.String(), r.Via.String(), "IF", strconv.Itoa(iface.Index), "METRIC", strconv.Itoa(r.Metric), - ).Run() + return nil +} + +func (t *waterTun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } + + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) + + // Ensure any routes we actually want are installed + err = t.addRoutes(true) if err != nil { - return fmt.Errorf("failed to add the unsafe_route %s: %v", r.Cidr.String(), err) + // Catch any stray logs + util.LogWithContextIfNeeded("Failed to set routes", err, t.l) + } else { + for _, r := range findRemovedRoutes(routes, *oldRoutes) { + t.l.WithField("route", r).Info("Removed route") + } } } return nil } +func (t *waterTun) addRoutes(logErrors bool) error { + // Path routes + routes := *t.Routes.Load() + for _, r := range routes { + if r.Via == nil || !r.Install { + // We don't allow route MTUs so only install routes with a via + continue + } + + err := exec.Command( + "C:\\Windows\\System32\\route.exe", "add", r.Cidr.String(), r.Via.String(), "IF", strconv.Itoa(t.f.Index), "METRIC", strconv.Itoa(r.Metric), + ).Run() + + if err != nil { + retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + } else { + return retErr + } + } else { + t.l.WithField("route", r).Info("Added route") + } + } + + return nil +} + +func (t *waterTun) removeRoutes(routes []Route) { + for _, r := range routes { + if !r.Install { + continue + } + + err := exec.Command( + "C:\\Windows\\System32\\route.exe", "delete", r.Cidr.String(), r.Via.String(), "IF", strconv.Itoa(t.f.Index), "METRIC", strconv.Itoa(r.Metric), + ).Run() + if err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } +} + func (t *waterTun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - _, r := t.routeTree.MostSpecificContains(ip) + _, r := t.routeTree.Load().MostSpecificContains(ip) return r } diff --git a/overlay/tun_windows.go b/overlay/tun_windows.go index 57d90cb..f85ee9c 100644 --- a/overlay/tun_windows.go +++ b/overlay/tun_windows.go @@ -12,13 +12,14 @@ import ( "syscall" "github.com/sirupsen/logrus" + "github.com/slackhq/nebula/config" ) -func newTunFromFd(_ *logrus.Logger, _ int, _ *net.IPNet, _ int, _ []Route, _ int, _ bool) (Device, error) { +func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ *net.IPNet) (Device, error) { return nil, fmt.Errorf("newTunFromFd not supported in Windows") } -func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route, _ int, _ bool, _ bool) (Device, error) { +func newTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, multiqueue bool) (Device, error) { useWintun := true if err := checkWinTunExists(); err != nil { l.WithError(err).Warn("Check Wintun driver failed, fallback to wintap driver") @@ -26,14 +27,14 @@ func newTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int } if useWintun { - device, err := newWinTun(l, deviceName, cidr, defaultMTU, routes) + device, err := newWinTun(c, l, cidr, multiqueue) if err != nil { return nil, fmt.Errorf("create Wintun interface failed, %w", err) } return device, nil } - device, err := newWaterTun(l, cidr, defaultMTU, routes) + device, err := newWaterTun(c, l, cidr, multiqueue) if err != nil { return nil, fmt.Errorf("create wintap driver failed, %w", err) } diff --git a/overlay/tun_wintun_windows.go b/overlay/tun_wintun_windows.go index 9647024..197e3a7 100644 --- a/overlay/tun_wintun_windows.go +++ b/overlay/tun_wintun_windows.go @@ -6,11 +6,14 @@ import ( "io" "net" "net/netip" + "sync/atomic" "unsafe" "github.com/sirupsen/logrus" "github.com/slackhq/nebula/cidr" + "github.com/slackhq/nebula/config" "github.com/slackhq/nebula/iputil" + "github.com/slackhq/nebula/util" "github.com/slackhq/nebula/wintun" "golang.org/x/sys/windows" "golang.zx2c4.com/wireguard/windows/tunnel/winipcfg" @@ -23,8 +26,9 @@ type winTun struct { cidr *net.IPNet prefix netip.Prefix MTU int - Routes []Route - routeTree *cidr.Tree4[iputil.VpnIp] + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[cidr.Tree4[iputil.VpnIp]] + l *logrus.Logger tun *wintun.NativeTun } @@ -48,83 +52,148 @@ func generateGUIDByDeviceName(name string) (*windows.GUID, error) { return (*windows.GUID)(unsafe.Pointer(&sum[0])), nil } -func newWinTun(l *logrus.Logger, deviceName string, cidr *net.IPNet, defaultMTU int, routes []Route) (*winTun, error) { +func newWinTun(c *config.C, l *logrus.Logger, cidr *net.IPNet, _ bool) (*winTun, error) { + deviceName := c.GetString("tun.dev", "") guid, err := generateGUIDByDeviceName(deviceName) if err != nil { return nil, fmt.Errorf("generate GUID failed: %w", err) } - var tunDevice wintun.Device - tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, defaultMTU) - if err != nil { - // Windows 10 has an issue with unclean shutdowns not fully cleaning up the wintun device. - // Trying a second time resolves the issue. - l.WithError(err).Debug("Failed to create wintun device, retrying") - tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, defaultMTU) - if err != nil { - return nil, fmt.Errorf("create TUN device failed: %w", err) - } - } - - routeTree, err := makeRouteTree(l, routes, false) - if err != nil { - return nil, err - } - prefix, err := iputil.ToNetIpPrefix(*cidr) if err != nil { return nil, err } - return &winTun{ - Device: deviceName, - cidr: cidr, - prefix: prefix, - MTU: defaultMTU, - Routes: routes, - routeTree: routeTree, + t := &winTun{ + Device: deviceName, + cidr: cidr, + prefix: prefix, + MTU: c.GetInt("tun.mtu", DefaultMTU), + l: l, + } - tun: tunDevice.(*wintun.NativeTun), - }, nil + err = t.reload(c, true) + if err != nil { + return nil, err + } + + var tunDevice wintun.Device + tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, t.MTU) + if err != nil { + // Windows 10 has an issue with unclean shutdowns not fully cleaning up the wintun device. + // Trying a second time resolves the issue. + l.WithError(err).Debug("Failed to create wintun device, retrying") + tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, t.MTU) + if err != nil { + return nil, fmt.Errorf("create TUN device failed: %w", err) + } + } + t.tun = tunDevice.(*wintun.NativeTun) + + c.RegisterReloadCallback(func(c *config.C) { + err := t.reload(c, false) + if err != nil { + util.LogWithContextIfNeeded("failed to reload tun device", err, t.l) + } + }) + + return t, nil +} + +func (t *winTun) reload(c *config.C, initial bool) error { + change, routes, err := getAllRoutesFromConfig(c, t.cidr, initial) + if err != nil { + return err + } + + if !initial && !change { + return nil + } + + routeTree, err := makeRouteTree(t.l, routes, false) + if err != nil { + return err + } + + // Teach nebula how to handle the routes before establishing them in the system table + oldRoutes := t.Routes.Swap(&routes) + t.routeTree.Store(routeTree) + + if !initial { + // Remove first, if the system removes a wanted route hopefully it will be re-added next + err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes)) + if err != nil { + util.LogWithContextIfNeeded("Failed to remove routes", err, t.l) + } + + // Ensure any routes we actually want are installed + err = t.addRoutes(true) + if err != nil { + // Catch any stray logs + util.LogWithContextIfNeeded("Failed to add routes", err, t.l) + } + } + + return nil } func (t *winTun) Activate() error { luid := winipcfg.LUID(t.tun.LUID()) - if err := luid.SetIPAddresses([]netip.Prefix{t.prefix}); err != nil { + err := luid.SetIPAddresses([]netip.Prefix{t.prefix}) + if err != nil { return fmt.Errorf("failed to set address: %w", err) } - foundDefault4 := false - routes := make([]*winipcfg.RouteData, 0, len(t.Routes)+1) + err = t.addRoutes(false) + if err != nil { + return err + } - for _, r := range t.Routes { + return nil +} + +func (t *winTun) addRoutes(logErrors bool) error { + luid := winipcfg.LUID(t.tun.LUID()) + routes := *t.Routes.Load() + foundDefault4 := false + + for _, r := range routes { if r.Via == nil || !r.Install { // We don't allow route MTUs so only install routes with a via continue } + prefix, err := iputil.ToNetIpPrefix(*r.Cidr) + if err != nil { + retErr := util.NewContextualError("Failed to parse cidr to netip prefix, ignoring route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + continue + } else { + return retErr + } + } + + // Add our unsafe route + err = luid.AddRoute(prefix, r.Via.ToNetIpAddr(), uint32(r.Metric)) + if err != nil { + retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err) + if logErrors { + retErr.Log(t.l) + continue + } else { + return retErr + } + } else { + t.l.WithField("route", r).Info("Added route") + } + if !foundDefault4 { if ones, bits := r.Cidr.Mask.Size(); ones == 0 && bits != 0 { foundDefault4 = true } } - - prefix, err := iputil.ToNetIpPrefix(*r.Cidr) - if err != nil { - return err - } - - // Add our unsafe route - routes = append(routes, &winipcfg.RouteData{ - Destination: prefix, - NextHop: r.Via.ToNetIpAddr(), - Metric: uint32(r.Metric), - }) - } - - if err := luid.AddRoutes(routes); err != nil { - return fmt.Errorf("failed to add routes: %w", err) } ipif, err := luid.IPInterface(windows.AF_INET) @@ -141,12 +210,35 @@ func (t *winTun) Activate() error { if err := ipif.Set(); err != nil { return fmt.Errorf("failed to set ip interface: %w", err) } + return nil +} +func (t *winTun) removeRoutes(routes []Route) error { + luid := winipcfg.LUID(t.tun.LUID()) + + for _, r := range routes { + if !r.Install { + continue + } + + prefix, err := iputil.ToNetIpPrefix(*r.Cidr) + if err != nil { + t.l.WithError(err).WithField("route", r).Info("Failed to convert cidr to netip prefix") + continue + } + + err = luid.DeleteRoute(prefix, r.Via.ToNetIpAddr()) + if err != nil { + t.l.WithError(err).WithField("route", r).Error("Failed to remove route") + } else { + t.l.WithField("route", r).Info("Removed route") + } + } return nil } func (t *winTun) RouteFor(ip iputil.VpnIp) iputil.VpnIp { - _, r := t.routeTree.MostSpecificContains(ip) + _, r := t.routeTree.Load().MostSpecificContains(ip) return r }