mirror of https://github.com/slackhq/nebula.git
575 lines
12 KiB
Go
575 lines
12 KiB
Go
//go:build !android && !e2e_testing
|
|
// +build !android,!e2e_testing
|
|
|
|
package overlay
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/netip"
|
|
"os"
|
|
"strings"
|
|
"sync/atomic"
|
|
"unsafe"
|
|
|
|
"github.com/gaissmai/bart"
|
|
"github.com/sirupsen/logrus"
|
|
"github.com/slackhq/nebula/config"
|
|
"github.com/slackhq/nebula/util"
|
|
"github.com/vishvananda/netlink"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
type tun struct {
|
|
io.ReadWriteCloser
|
|
fd int
|
|
Device string
|
|
cidr netip.Prefix
|
|
MaxMTU int
|
|
DefaultMTU int
|
|
TXQueueLen int
|
|
deviceIndex int
|
|
ioctlFd uintptr
|
|
|
|
Routes atomic.Pointer[[]Route]
|
|
routeTree atomic.Pointer[bart.Table[netip.Addr]]
|
|
routeChan chan struct{}
|
|
useSystemRoutes bool
|
|
|
|
l *logrus.Logger
|
|
}
|
|
|
|
type ifReq struct {
|
|
Name [16]byte
|
|
Flags uint16
|
|
pad [8]byte
|
|
}
|
|
|
|
type ifreqAddr struct {
|
|
Name [16]byte
|
|
Addr unix.RawSockaddrInet4
|
|
pad [8]byte
|
|
}
|
|
|
|
type ifreqMTU struct {
|
|
Name [16]byte
|
|
MTU int32
|
|
pad [8]byte
|
|
}
|
|
|
|
type ifreqQLEN struct {
|
|
Name [16]byte
|
|
Value int32
|
|
pad [8]byte
|
|
}
|
|
|
|
func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, cidr netip.Prefix) (*tun, error) {
|
|
file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
|
|
|
|
t, err := newTunGeneric(c, l, file, cidr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
t.Device = "tun0"
|
|
|
|
return t, nil
|
|
}
|
|
|
|
func newTun(c *config.C, l *logrus.Logger, cidr netip.Prefix, multiqueue bool) (*tun, error) {
|
|
fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
|
|
if err != nil {
|
|
// If /dev/net/tun doesn't exist, try to create it (will happen in docker)
|
|
if os.IsNotExist(err) {
|
|
err = os.MkdirAll("/dev/net", 0755)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("/dev/net/tun doesn't exist, failed to mkdir -p /dev/net: %w", err)
|
|
}
|
|
err = unix.Mknod("/dev/net/tun", unix.S_IFCHR|0600, int(unix.Mkdev(10, 200)))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create /dev/net/tun: %w", err)
|
|
}
|
|
|
|
fd, err = unix.Open("/dev/net/tun", os.O_RDWR, 0)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("created /dev/net/tun, but still failed: %w", err)
|
|
}
|
|
} else {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
var req ifReq
|
|
req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI)
|
|
if multiqueue {
|
|
req.Flags |= unix.IFF_MULTI_QUEUE
|
|
}
|
|
copy(req.Name[:], c.GetString("tun.dev", ""))
|
|
if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
|
|
return nil, err
|
|
}
|
|
name := strings.Trim(string(req.Name[:]), "\x00")
|
|
|
|
file := os.NewFile(uintptr(fd), "/dev/net/tun")
|
|
t, err := newTunGeneric(c, l, file, cidr)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
t.Device = name
|
|
|
|
return t, nil
|
|
}
|
|
|
|
func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, cidr netip.Prefix) (*tun, error) {
|
|
t := &tun{
|
|
ReadWriteCloser: file,
|
|
fd: int(file.Fd()),
|
|
cidr: cidr,
|
|
TXQueueLen: c.GetInt("tun.tx_queue", 500),
|
|
useSystemRoutes: c.GetBool("tun.use_system_route_table", false),
|
|
l: l,
|
|
}
|
|
|
|
err := t.reload(c, true)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
c.RegisterReloadCallback(func(c *config.C) {
|
|
err := t.reload(c, false)
|
|
if err != nil {
|
|
util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
|
|
}
|
|
})
|
|
|
|
return t, nil
|
|
}
|
|
|
|
func (t *tun) reload(c *config.C, initial bool) error {
|
|
routeChange, routes, err := getAllRoutesFromConfig(c, t.cidr, initial)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !initial && !routeChange && !c.HasChanged("tun.mtu") {
|
|
return nil
|
|
}
|
|
|
|
routeTree, err := makeRouteTree(t.l, routes, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
oldDefaultMTU := t.DefaultMTU
|
|
oldMaxMTU := t.MaxMTU
|
|
newDefaultMTU := c.GetInt("tun.mtu", DefaultMTU)
|
|
newMaxMTU := newDefaultMTU
|
|
for i, r := range routes {
|
|
if r.MTU == 0 {
|
|
routes[i].MTU = newDefaultMTU
|
|
}
|
|
|
|
if r.MTU > t.MaxMTU {
|
|
newMaxMTU = r.MTU
|
|
}
|
|
}
|
|
|
|
t.MaxMTU = newMaxMTU
|
|
t.DefaultMTU = newDefaultMTU
|
|
|
|
// Teach nebula how to handle the routes before establishing them in the system table
|
|
oldRoutes := t.Routes.Swap(&routes)
|
|
t.routeTree.Store(routeTree)
|
|
|
|
if !initial {
|
|
if oldMaxMTU != newMaxMTU {
|
|
t.setMTU()
|
|
t.l.Infof("Set max MTU to %v was %v", t.MaxMTU, oldMaxMTU)
|
|
}
|
|
|
|
if oldDefaultMTU != newDefaultMTU {
|
|
err := t.setDefaultRoute()
|
|
if err != nil {
|
|
t.l.Warn(err)
|
|
} else {
|
|
t.l.Infof("Set default MTU to %v was %v", t.DefaultMTU, oldDefaultMTU)
|
|
}
|
|
}
|
|
|
|
// Remove first, if the system removes a wanted route hopefully it will be re-added next
|
|
t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
|
|
|
|
// Ensure any routes we actually want are installed
|
|
err = t.addRoutes(true)
|
|
if err != nil {
|
|
// This should never be called since addRoutes should log its own errors in a reload condition
|
|
util.LogWithContextIfNeeded("Failed to refresh routes", err, t.l)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
|
|
fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var req ifReq
|
|
req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI | unix.IFF_MULTI_QUEUE)
|
|
copy(req.Name[:], t.Device)
|
|
if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
file := os.NewFile(uintptr(fd), "/dev/net/tun")
|
|
|
|
return file, nil
|
|
}
|
|
|
|
func (t *tun) RouteFor(ip netip.Addr) netip.Addr {
|
|
r, _ := t.routeTree.Load().Lookup(ip)
|
|
return r
|
|
}
|
|
|
|
func (t *tun) Write(b []byte) (int, error) {
|
|
var nn int
|
|
max := len(b)
|
|
|
|
for {
|
|
n, err := unix.Write(t.fd, b[nn:max])
|
|
if n > 0 {
|
|
nn += n
|
|
}
|
|
if nn == len(b) {
|
|
return nn, err
|
|
}
|
|
|
|
if err != nil {
|
|
return nn, err
|
|
}
|
|
|
|
if n == 0 {
|
|
return nn, io.ErrUnexpectedEOF
|
|
}
|
|
}
|
|
}
|
|
|
|
func (t *tun) deviceBytes() (o [16]byte) {
|
|
for i, c := range t.Device {
|
|
o[i] = byte(c)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (t *tun) Activate() error {
|
|
devName := t.deviceBytes()
|
|
|
|
if t.useSystemRoutes {
|
|
t.watchRoutes()
|
|
}
|
|
|
|
var addr, mask [4]byte
|
|
|
|
//TODO: IPV6-WORK
|
|
addr = t.cidr.Addr().As4()
|
|
tmask := net.CIDRMask(t.cidr.Bits(), 32)
|
|
copy(mask[:], tmask)
|
|
|
|
s, err := unix.Socket(
|
|
unix.AF_INET,
|
|
unix.SOCK_DGRAM,
|
|
unix.IPPROTO_IP,
|
|
)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
t.ioctlFd = uintptr(s)
|
|
|
|
ifra := ifreqAddr{
|
|
Name: devName,
|
|
Addr: unix.RawSockaddrInet4{
|
|
Family: unix.AF_INET,
|
|
Addr: addr,
|
|
},
|
|
}
|
|
|
|
// Set the device ip address
|
|
if err = ioctl(t.ioctlFd, unix.SIOCSIFADDR, uintptr(unsafe.Pointer(&ifra))); err != nil {
|
|
return fmt.Errorf("failed to set tun address: %s", err)
|
|
}
|
|
|
|
// Set the device network
|
|
ifra.Addr.Addr = mask
|
|
if err = ioctl(t.ioctlFd, unix.SIOCSIFNETMASK, uintptr(unsafe.Pointer(&ifra))); err != nil {
|
|
return fmt.Errorf("failed to set tun netmask: %s", err)
|
|
}
|
|
|
|
// Set the device name
|
|
ifrf := ifReq{Name: devName}
|
|
if err = ioctl(t.ioctlFd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
|
return fmt.Errorf("failed to set tun device name: %s", err)
|
|
}
|
|
|
|
// Setup our default MTU
|
|
t.setMTU()
|
|
|
|
// Set the transmit queue length
|
|
ifrq := ifreqQLEN{Name: devName, Value: int32(t.TXQueueLen)}
|
|
if err = ioctl(t.ioctlFd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil {
|
|
// If we can't set the queue length nebula will still work but it may lead to packet loss
|
|
t.l.WithError(err).Error("Failed to set tun tx queue length")
|
|
}
|
|
|
|
// Bring up the interface
|
|
ifrf.Flags = ifrf.Flags | unix.IFF_UP
|
|
if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
|
return fmt.Errorf("failed to bring the tun device up: %s", err)
|
|
}
|
|
|
|
link, err := netlink.LinkByName(t.Device)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get tun device link: %s", err)
|
|
}
|
|
t.deviceIndex = link.Attrs().Index
|
|
|
|
if err = t.setDefaultRoute(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Set the routes
|
|
if err = t.addRoutes(false); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Run the interface
|
|
ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
|
|
if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
|
return fmt.Errorf("failed to run tun device: %s", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (t *tun) setMTU() {
|
|
// Set the MTU on the device
|
|
ifm := ifreqMTU{Name: t.deviceBytes(), MTU: int32(t.MaxMTU)}
|
|
if err := ioctl(t.ioctlFd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil {
|
|
// This is currently a non fatal condition because the route table must have the MTU set appropriately as well
|
|
t.l.WithError(err).Error("Failed to set tun mtu")
|
|
}
|
|
}
|
|
|
|
func (t *tun) setDefaultRoute() error {
|
|
// Default route
|
|
|
|
dr := &net.IPNet{
|
|
IP: t.cidr.Masked().Addr().AsSlice(),
|
|
Mask: net.CIDRMask(t.cidr.Bits(), t.cidr.Addr().BitLen()),
|
|
}
|
|
|
|
nr := netlink.Route{
|
|
LinkIndex: t.deviceIndex,
|
|
Dst: dr,
|
|
MTU: t.DefaultMTU,
|
|
AdvMSS: t.advMSS(Route{}),
|
|
Scope: unix.RT_SCOPE_LINK,
|
|
Src: net.IP(t.cidr.Addr().AsSlice()),
|
|
Protocol: unix.RTPROT_KERNEL,
|
|
Table: unix.RT_TABLE_MAIN,
|
|
Type: unix.RTN_UNICAST,
|
|
}
|
|
err := netlink.RouteReplace(&nr)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to set mtu %v on the default route %v; %v", t.DefaultMTU, dr, err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (t *tun) addRoutes(logErrors bool) error {
|
|
// Path routes
|
|
routes := *t.Routes.Load()
|
|
for _, r := range routes {
|
|
if !r.Install {
|
|
continue
|
|
}
|
|
|
|
dr := &net.IPNet{
|
|
IP: r.Cidr.Masked().Addr().AsSlice(),
|
|
Mask: net.CIDRMask(r.Cidr.Bits(), r.Cidr.Addr().BitLen()),
|
|
}
|
|
|
|
nr := netlink.Route{
|
|
LinkIndex: t.deviceIndex,
|
|
Dst: dr,
|
|
MTU: r.MTU,
|
|
AdvMSS: t.advMSS(r),
|
|
Scope: unix.RT_SCOPE_LINK,
|
|
}
|
|
|
|
if r.Metric > 0 {
|
|
nr.Priority = r.Metric
|
|
}
|
|
|
|
err := netlink.RouteReplace(&nr)
|
|
if err != nil {
|
|
retErr := util.NewContextualError("Failed to add route", map[string]interface{}{"route": r}, err)
|
|
if logErrors {
|
|
retErr.Log(t.l)
|
|
} else {
|
|
return retErr
|
|
}
|
|
} else {
|
|
t.l.WithField("route", r).Info("Added route")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (t *tun) removeRoutes(routes []Route) {
|
|
for _, r := range routes {
|
|
if !r.Install {
|
|
continue
|
|
}
|
|
|
|
dr := &net.IPNet{
|
|
IP: r.Cidr.Masked().Addr().AsSlice(),
|
|
Mask: net.CIDRMask(r.Cidr.Bits(), r.Cidr.Addr().BitLen()),
|
|
}
|
|
|
|
nr := netlink.Route{
|
|
LinkIndex: t.deviceIndex,
|
|
Dst: dr,
|
|
MTU: r.MTU,
|
|
AdvMSS: t.advMSS(r),
|
|
Scope: unix.RT_SCOPE_LINK,
|
|
}
|
|
|
|
if r.Metric > 0 {
|
|
nr.Priority = r.Metric
|
|
}
|
|
|
|
err := netlink.RouteDel(&nr)
|
|
if err != nil {
|
|
t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
|
|
} else {
|
|
t.l.WithField("route", r).Info("Removed route")
|
|
}
|
|
}
|
|
}
|
|
|
|
func (t *tun) Cidr() netip.Prefix {
|
|
return t.cidr
|
|
}
|
|
|
|
func (t *tun) Name() string {
|
|
return t.Device
|
|
}
|
|
|
|
func (t *tun) advMSS(r Route) int {
|
|
mtu := r.MTU
|
|
if r.MTU == 0 {
|
|
mtu = t.DefaultMTU
|
|
}
|
|
|
|
// We only need to set advmss if the route MTU does not match the device MTU
|
|
if mtu != t.MaxMTU {
|
|
return mtu - 40
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func (t *tun) watchRoutes() {
|
|
rch := make(chan netlink.RouteUpdate)
|
|
doneChan := make(chan struct{})
|
|
|
|
if err := netlink.RouteSubscribe(rch, doneChan); err != nil {
|
|
t.l.WithError(err).Errorf("failed to subscribe to system route changes")
|
|
return
|
|
}
|
|
|
|
t.routeChan = doneChan
|
|
|
|
go func() {
|
|
for {
|
|
select {
|
|
case r := <-rch:
|
|
t.updateRoutes(r)
|
|
case <-doneChan:
|
|
// netlink.RouteSubscriber will close the rch for us
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
func (t *tun) updateRoutes(r netlink.RouteUpdate) {
|
|
if r.Gw == nil {
|
|
// Not a gateway route, ignore
|
|
t.l.WithField("route", r).Debug("Ignoring route update, not a gateway route")
|
|
return
|
|
}
|
|
|
|
//TODO: IPV6-WORK what if not ok?
|
|
gwAddr, ok := netip.AddrFromSlice(r.Gw)
|
|
if !ok {
|
|
t.l.WithField("route", r).Debug("Ignoring route update, invalid gateway address")
|
|
return
|
|
}
|
|
|
|
gwAddr = gwAddr.Unmap()
|
|
if !t.cidr.Contains(gwAddr) {
|
|
// Gateway isn't in our overlay network, ignore
|
|
t.l.WithField("route", r).Debug("Ignoring route update, not in our network")
|
|
return
|
|
}
|
|
|
|
if x := r.Dst.IP.To4(); x == nil {
|
|
// Nebula only handles ipv4 on the overlay currently
|
|
t.l.WithField("route", r).Debug("Ignoring route update, destination is not ipv4")
|
|
return
|
|
}
|
|
|
|
dstAddr, ok := netip.AddrFromSlice(r.Dst.IP)
|
|
if !ok {
|
|
t.l.WithField("route", r).Debug("Ignoring route update, invalid destination address")
|
|
return
|
|
}
|
|
|
|
ones, _ := r.Dst.Mask.Size()
|
|
dst := netip.PrefixFrom(dstAddr, ones)
|
|
|
|
newTree := t.routeTree.Load().Clone()
|
|
|
|
if r.Type == unix.RTM_NEWROUTE {
|
|
t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Adding route")
|
|
newTree.Insert(dst, gwAddr)
|
|
|
|
} else {
|
|
newTree.Delete(dst)
|
|
t.l.WithField("destination", r.Dst).WithField("via", r.Gw).Info("Removing route")
|
|
}
|
|
t.routeTree.Store(newTree)
|
|
}
|
|
|
|
func (t *tun) Close() error {
|
|
if t.routeChan != nil {
|
|
close(t.routeChan)
|
|
}
|
|
|
|
if t.ReadWriteCloser != nil {
|
|
t.ReadWriteCloser.Close()
|
|
}
|
|
|
|
if t.ioctlFd > 0 {
|
|
os.NewFile(t.ioctlFd, "ioctlFd").Close()
|
|
}
|
|
|
|
return nil
|
|
}
|