diff --git a/.github/workflows/smoke/Dockerfile b/.github/workflows/smoke/Dockerfile index 18460b3..f8a89ef 100644 --- a/.github/workflows/smoke/Dockerfile +++ b/.github/workflows/smoke/Dockerfile @@ -1,4 +1,6 @@ -FROM debian:buster +FROM ubuntu:jammy + +RUN apt-get update && apt-get install -y iputils-ping ncat tcpdump ADD ./build /nebula diff --git a/.github/workflows/smoke/genconfig.sh b/.github/workflows/smoke/genconfig.sh index 005734c..373ea5f 100755 --- a/.github/workflows/smoke/genconfig.sh +++ b/.github/workflows/smoke/genconfig.sh @@ -50,6 +50,8 @@ tun: dev: ${TUN_DEV:-nebula1} firewall: + inbound_action: reject + outbound_action: reject outbound: ${OUTBOUND:-$FIREWALL_ALL} inbound: ${INBOUND:-$FIREWALL_ALL} diff --git a/.github/workflows/smoke/smoke.sh b/.github/workflows/smoke/smoke.sh index 213add3..836e61a 100755 --- a/.github/workflows/smoke/smoke.sh +++ b/.github/workflows/smoke/smoke.sh @@ -34,6 +34,21 @@ sleep 1 sudo docker run --name host4 --device /dev/net/tun:/dev/net/tun --cap-add NET_ADMIN --rm nebula:smoke -config host4.yml 2>&1 | tee logs/host4 | sed -u 's/^/ [host4] /' & sleep 1 +# grab tcpdump pcaps for debugging +sudo docker exec lighthouse1 tcpdump -i nebula1 -q -w - -U 2>logs/lighthouse1.inside.log >logs/lighthouse1.inside.pcap & +sudo docker exec lighthouse1 tcpdump -i eth0 -q -w - -U 2>logs/lighthouse1.outside.log >logs/lighthouse1.outside.pcap & +sudo docker exec host2 tcpdump -i nebula1 -q -w - -U 2>logs/host2.inside.log >logs/host2.inside.pcap & +sudo docker exec host2 tcpdump -i eth0 -q -w - -U 2>logs/host2.outside.log >logs/host2.outside.pcap & +sudo docker exec host3 tcpdump -i nebula1 -q -w - -U 2>logs/host3.inside.log >logs/host3.inside.pcap & +sudo docker exec host3 tcpdump -i eth0 -q -w - -U 2>logs/host3.outside.log >logs/host3.outside.pcap & +sudo docker exec host4 tcpdump -i nebula1 -q -w - -U 2>logs/host4.inside.log >logs/host4.inside.pcap & +sudo docker exec host4 tcpdump -i eth0 -q -w - -U 2>logs/host4.outside.log >logs/host4.outside.pcap & + +sudo docker exec host2 ncat -nklv 0.0.0.0 2000 & +sudo docker exec host3 ncat -nklv 0.0.0.0 2000 & +sudo docker exec host2 ncat -e '/usr/bin/echo host2' -nkluv 0.0.0.0 3000 & +sudo docker exec host3 ncat -e '/usr/bin/echo host3' -nkluv 0.0.0.0 3000 & + set +x echo echo " *** Testing ping from lighthouse1" @@ -51,6 +66,15 @@ sudo docker exec host2 ping -c1 192.168.100.1 # Should fail because not allowed by host3 inbound firewall ! sudo docker exec host2 ping -c1 192.168.100.3 -w5 || exit 1 +set +x +echo +echo " *** Testing ncat from host2" +echo +set -x +# Should fail because not allowed by host3 inbound firewall +! sudo docker exec host2 ncat -nzv -w5 192.168.100.3 2000 || exit 1 +! sudo docker exec host2 ncat -nzuv -w5 192.168.100.3 3000 | grep -q host3 || exit 1 + set +x echo echo " *** Testing ping from host3" @@ -59,6 +83,14 @@ set -x sudo docker exec host3 ping -c1 192.168.100.1 sudo docker exec host3 ping -c1 192.168.100.2 +set +x +echo +echo " *** Testing ncat from host3" +echo +set -x +sudo docker exec host3 ncat -nzv -w5 192.168.100.2 2000 +sudo docker exec host3 ncat -nzuv -w5 192.168.100.2 3000 | grep -q host2 + set +x echo echo " *** Testing ping from host4" @@ -69,6 +101,17 @@ sudo docker exec host4 ping -c1 192.168.100.1 ! sudo docker exec host4 ping -c1 192.168.100.2 -w5 || exit 1 ! sudo docker exec host4 ping -c1 192.168.100.3 -w5 || exit 1 +set +x +echo +echo " *** Testing ncat from host4" +echo +set -x +# Should fail because not allowed by host4 outbound firewall +! sudo docker exec host4 ncat -nzv -w5 192.168.100.2 2000 || exit 1 +! sudo docker exec host4 ncat -nzv -w5 192.168.100.3 2000 || exit 1 +! sudo docker exec host4 ncat -nzuv -w5 192.168.100.2 3000 | grep -q host2 || exit 1 +! sudo docker exec host4 ncat -nzuv -w5 192.168.100.3 3000 | grep -q host3 || exit 1 + set +x echo echo " *** Testing conntrack" diff --git a/examples/config.yml b/examples/config.yml index f214bf7..9fe95ce 100644 --- a/examples/config.yml +++ b/examples/config.yml @@ -259,6 +259,15 @@ logging: # Nebula security group configuration firewall: + # Action to take when a packet is not allowed by the firewall rules. + # Can be one of: + # `drop` (default): silently drop the packet. + # `reject`: send a reject reply. + # - For TCP, this will be a RST "Connection Reset" packet. + # - For other protocols, this will be an ICMP port unreachable packet. + outbound_action: drop + inbound_action: drop + conntrack: tcp_timeout: 12m udp_timeout: 3m diff --git a/firewall.go b/firewall.go index 9fd75fc..061d9e6 100644 --- a/firewall.go +++ b/firewall.go @@ -47,6 +47,9 @@ type Firewall struct { InRules *FirewallTable OutRules *FirewallTable + InSendReject bool + OutSendReject bool + //TODO: we should have many more options for TCP, an option for ICMP, and mimic the kernel a bit better // https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt TCPTimeout time.Duration //linux: 5 days max @@ -179,6 +182,28 @@ func NewFirewallFromConfig(l *logrus.Logger, nc *cert.NebulaCertificate, c *conf //TODO: max_connections ) + inboundAction := c.GetString("firewall.inbound_action", "drop") + switch inboundAction { + case "reject": + fw.InSendReject = true + case "drop": + fw.InSendReject = false + default: + l.WithField("action", inboundAction).Warn("invalid firewall.inbound_action, defaulting to `drop`") + fw.InSendReject = false + } + + outboundAction := c.GetString("firewall.outbound_action", "drop") + switch outboundAction { + case "reject": + fw.OutSendReject = true + case "drop": + fw.OutSendReject = false + default: + l.WithField("action", inboundAction).Warn("invalid firewall.outbound_action, defaulting to `drop`") + fw.OutSendReject = false + } + err := AddFirewallRulesFromConfig(l, false, c, fw) if err != nil { return nil, err diff --git a/inside.go b/inside.go index 38d9332..0734883 100644 --- a/inside.go +++ b/inside.go @@ -46,6 +46,7 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet hostinfo := f.getOrHandshake(fwPacket.RemoteIP) if hostinfo == nil { + f.rejectInside(packet, out, q) if f.l.Level >= logrus.DebugLevel { f.l.WithField("vpnIp", fwPacket.RemoteIP). WithField("fwPacket", fwPacket). @@ -71,14 +72,42 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet if dropReason == nil { f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, packet, nb, out, q) - } else if f.l.Level >= logrus.DebugLevel { - hostinfo.logger(f.l). - WithField("fwPacket", fwPacket). - WithField("reason", dropReason). - Debugln("dropping outbound packet") + } else { + f.rejectInside(packet, out, q) + if f.l.Level >= logrus.DebugLevel { + hostinfo.logger(f.l). + WithField("fwPacket", fwPacket). + WithField("reason", dropReason). + Debugln("dropping outbound packet") + } } } +func (f *Interface) rejectInside(packet []byte, out []byte, q int) { + if !f.firewall.InSendReject { + return + } + + out = iputil.CreateRejectPacket(packet, out) + _, err := f.readers[q].Write(out) + if err != nil { + f.l.WithError(err).Error("Failed to write to tun") + } +} + +func (f *Interface) rejectOutside(packet []byte, ci *ConnectionState, hostinfo *HostInfo, nb, out []byte, q int) { + if !f.firewall.OutSendReject { + return + } + + // Use some out buffer space to build the packet before encryption + // Need 40 bytes for the reject packet (20 byte ipv4 header, 20 byte tcp rst packet) + // Leave 100 bytes for the encrypted packet (60 byte Nebula header, 40 byte reject packet) + out = out[:140] + outPacket := iputil.CreateRejectPacket(packet, out[100:]) + f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, outPacket, nb, out, q) +} + func (f *Interface) Handshake(vpnIp iputil.VpnIp) { f.getOrHandshake(vpnIp) } diff --git a/iputil/packet.go b/iputil/packet.go new file mode 100644 index 0000000..74ae37f --- /dev/null +++ b/iputil/packet.go @@ -0,0 +1,211 @@ +package iputil + +import ( + "encoding/binary" + + "golang.org/x/net/ipv4" +) + +func CreateRejectPacket(packet []byte, out []byte) []byte { + // TODO ipv4 only, need to fix when inside supports ipv6 + switch packet[9] { + case 6: // tcp + return ipv4CreateRejectTCPPacket(packet, out) + default: + return ipv4CreateRejectICMPPacket(packet, out) + } +} + +func ipv4CreateRejectICMPPacket(packet []byte, out []byte) []byte { + ihl := int(packet[0]&0x0f) << 2 + + // ICMP reply includes header and first 8 bytes of the packet + packetLen := len(packet) + if packetLen > ihl+8 { + packetLen = ihl + 8 + } + + outLen := ipv4.HeaderLen + 8 + packetLen + + out = out[:(outLen)] + + ipHdr := out[0:ipv4.HeaderLen] + ipHdr[0] = ipv4.Version<<4 | (ipv4.HeaderLen >> 2) // version, ihl + ipHdr[1] = 0 // DSCP, ECN + binary.BigEndian.PutUint16(ipHdr[2:], uint16(ipv4.HeaderLen+8+packetLen)) // Total Length + + ipHdr[4] = 0 // id + ipHdr[5] = 0 // . + ipHdr[6] = 0 // flags, fragment offset + ipHdr[7] = 0 // . + ipHdr[8] = 64 // TTL + ipHdr[9] = 1 // protocol (icmp) + ipHdr[10] = 0 // checksum + ipHdr[11] = 0 // . + + // Swap dest / src IPs + copy(ipHdr[12:16], packet[16:20]) + copy(ipHdr[16:20], packet[12:16]) + + // Calculate checksum + binary.BigEndian.PutUint16(ipHdr[10:], tcpipChecksum(ipHdr, 0)) + + // ICMP Destination Unreachable + icmpOut := out[ipv4.HeaderLen:] + icmpOut[0] = 3 // type (Destination unreachable) + icmpOut[1] = 3 // code (Port unreachable error) + icmpOut[2] = 0 // checksum + icmpOut[3] = 0 // . + icmpOut[4] = 0 // unused + icmpOut[5] = 0 // . + icmpOut[6] = 0 // . + icmpOut[7] = 0 // . + + // Copy original IP header and first 8 bytes as body + copy(icmpOut[8:], packet[:packetLen]) + + // Calculate checksum + binary.BigEndian.PutUint16(icmpOut[2:], tcpipChecksum(icmpOut, 0)) + + return out +} + +func ipv4CreateRejectTCPPacket(packet []byte, out []byte) []byte { + const tcpLen = 20 + + ihl := int(packet[0]&0x0f) << 2 + outLen := ipv4.HeaderLen + tcpLen + + out = out[:(outLen)] + + ipHdr := out[0:ipv4.HeaderLen] + ipHdr[0] = ipv4.Version<<4 | (ipv4.HeaderLen >> 2) // version, ihl + ipHdr[1] = 0 // DSCP, ECN + binary.BigEndian.PutUint16(ipHdr[2:], uint16(outLen)) // Total Length + ipHdr[4] = 0 // id + ipHdr[5] = 0 // . + ipHdr[6] = 0 // flags, fragment offset + ipHdr[7] = 0 // . + ipHdr[8] = 64 // TTL + ipHdr[9] = 6 // protocol (tcp) + ipHdr[10] = 0 // checksum + ipHdr[11] = 0 // . + + // Swap dest / src IPs + copy(ipHdr[12:16], packet[16:20]) + copy(ipHdr[16:20], packet[12:16]) + + // Calculate checksum + binary.BigEndian.PutUint16(ipHdr[10:], tcpipChecksum(ipHdr, 0)) + + // TCP RST + tcpIn := packet[ihl:] + var ackSeq, seq uint32 + outFlags := byte(0b00000100) // RST + + // Set seq and ackSeq based on how iptables/netfilter does it in Linux: + // - https://github.com/torvalds/linux/blob/v5.19/net/ipv4/netfilter/nf_reject_ipv4.c#L193-L221 + inAck := tcpIn[13]&0b00010000 != 0 + if inAck { + seq = binary.BigEndian.Uint32(tcpIn[8:]) + } else { + inSyn := uint32((tcpIn[13] & 0b00000010) >> 1) + inFin := uint32(tcpIn[13] & 0b00000001) + // seq from the packet + syn + fin + tcp segment length + ackSeq = binary.BigEndian.Uint32(tcpIn[4:]) + inSyn + inFin + uint32(len(tcpIn)) - uint32(tcpIn[12]>>4)<<2 + outFlags |= 0b00010000 // ACK + } + + tcpOut := out[ipv4.HeaderLen:] + // Swap dest / src ports + copy(tcpOut[0:2], tcpIn[2:4]) + copy(tcpOut[2:4], tcpIn[0:2]) + binary.BigEndian.PutUint32(tcpOut[4:], seq) + binary.BigEndian.PutUint32(tcpOut[8:], ackSeq) + tcpOut[12] = (tcpLen >> 2) << 4 // data offset, reserved, NS + tcpOut[13] = outFlags // CWR, ECE, URG, ACK, PSH, RST, SYN, FIN + tcpOut[14] = 0 // window size + tcpOut[15] = 0 // . + tcpOut[16] = 0 // checksum + tcpOut[17] = 0 // . + tcpOut[18] = 0 // URG Pointer + tcpOut[19] = 0 // . + + // Calculate checksum + csum := ipv4PseudoheaderChecksum(ipHdr[12:16], ipHdr[16:20], 6, tcpLen) + binary.BigEndian.PutUint16(tcpOut[16:], tcpipChecksum(tcpOut, csum)) + + return out +} + +func CreateICMPEchoResponse(packet, out []byte) []byte { + // Return early if this is not a simple ICMP Echo Request + //TODO: make constants out of these + if !(len(packet) >= 28 && len(packet) <= 9001 && packet[0] == 0x45 && packet[9] == 0x01 && packet[20] == 0x08) { + return nil + } + + // We don't support fragmented packets + if packet[7] != 0 || (packet[6]&0x2F != 0) { + return nil + } + + out = out[:len(packet)] + + copy(out, packet) + + // Swap dest / src IPs and recalculate checksum + ipv4 := out[0:20] + copy(ipv4[12:16], packet[16:20]) + copy(ipv4[16:20], packet[12:16]) + ipv4[10] = 0 + ipv4[11] = 0 + binary.BigEndian.PutUint16(ipv4[10:], tcpipChecksum(ipv4, 0)) + + // Change type to ICMP Echo Reply and recalculate checksum + icmp := out[20:] + icmp[0] = 0 + icmp[2] = 0 + icmp[3] = 0 + binary.BigEndian.PutUint16(icmp[2:], tcpipChecksum(icmp, 0)) + + return out +} + +// calculates the TCP/IP checksum defined in rfc1071. The passed-in +// csum is any initial checksum data that's already been computed. +// +// based on: +// - https://github.com/google/gopacket/blob/v1.1.19/layers/tcpip.go#L50-L70 +func tcpipChecksum(data []byte, csum uint32) uint16 { + // to handle odd lengths, we loop to length - 1, incrementing by 2, then + // handle the last byte specifically by checking against the original + // length. + length := len(data) - 1 + for i := 0; i < length; i += 2 { + // For our test packet, doing this manually is about 25% faster + // (740 ns vs. 1000ns) than doing it by calling binary.BigEndian.Uint16. + csum += uint32(data[i]) << 8 + csum += uint32(data[i+1]) + } + if len(data)%2 == 1 { + csum += uint32(data[length]) << 8 + } + for csum > 0xffff { + csum = (csum >> 16) + (csum & 0xffff) + } + return ^uint16(csum) +} + +// based on: +// - https://github.com/google/gopacket/blob/v1.1.19/layers/tcpip.go#L26-L35 +func ipv4PseudoheaderChecksum(src, dst []byte, proto, length uint32) (csum uint32) { + csum += (uint32(src[0]) + uint32(src[2])) << 8 + csum += uint32(src[1]) + uint32(src[3]) + csum += (uint32(dst[0]) + uint32(dst[2])) << 8 + csum += uint32(dst[1]) + uint32(dst[3]) + csum += proto + csum += length & 0xffff + csum += length >> 16 + return csum +} diff --git a/outside.go b/outside.go index 605325d..8fa90be 100644 --- a/outside.go +++ b/outside.go @@ -399,6 +399,7 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out dropReason := f.firewall.Drop(out, *fwPacket, true, hostinfo, f.caPool, localCache) if dropReason != nil { + f.rejectOutside(out, hostinfo.ConnectionState, hostinfo, nb, out, q) if f.l.Level >= logrus.DebugLevel { hostinfo.logger(f.l).WithField("fwPacket", fwPacket). WithField("reason", dropReason). diff --git a/overlay/tun_disabled.go b/overlay/tun_disabled.go index b7f7273..e1e4ede 100644 --- a/overlay/tun_disabled.go +++ b/overlay/tun_disabled.go @@ -1,7 +1,6 @@ package overlay import ( - "encoding/binary" "fmt" "io" "net" @@ -75,38 +74,15 @@ func (t *disabledTun) Read(b []byte) (int, error) { } func (t *disabledTun) handleICMPEchoRequest(b []byte) bool { - // Return early if this is not a simple ICMP Echo Request - //TODO: make constants out of these - if !(len(b) >= 28 && len(b) <= 9001 && b[0] == 0x45 && b[9] == 0x01 && b[20] == 0x08) { + out := make([]byte, len(b)) + out = iputil.CreateICMPEchoResponse(b, out) + if out == nil { return false } - // We don't support fragmented packets - if b[7] != 0 || (b[6]&0x2F != 0) { - return false - } - - buf := make([]byte, len(b)) - copy(buf, b) - - // Swap dest / src IPs and recalculate checksum - ipv4 := buf[0:20] - copy(ipv4[12:16], b[16:20]) - copy(ipv4[16:20], b[12:16]) - ipv4[10] = 0 - ipv4[11] = 0 - binary.BigEndian.PutUint16(ipv4[10:], ipChecksum(ipv4)) - - // Change type to ICMP Echo Reply and recalculate checksum - icmp := buf[20:] - icmp[0] = 0 - icmp[2] = 0 - icmp[3] = 0 - binary.BigEndian.PutUint16(icmp[2:], ipChecksum(icmp)) - // attempt to write it, but don't block select { - case t.read <- buf: + case t.read <- out: default: t.l.Debugf("tun_disabled: dropped ICMP Echo Reply response") } @@ -154,22 +130,3 @@ func (p prettyPacket) String() string { return s.String() } - -func ipChecksum(b []byte) uint16 { - var c uint32 - sz := len(b) - 1 - - for i := 0; i < sz; i += 2 { - c += uint32(b[i]) << 8 - c += uint32(b[i+1]) - } - if sz%2 == 0 { - c += uint32(b[sz]) << 8 - } - - for (c >> 16) > 0 { - c = (c & 0xffff) + (c >> 16) - } - - return ^uint16(c) -}