mirror of https://github.com/slackhq/nebula.git
fix race in getOrHandshake (#400)
We missed this race with #396 (and I think this is also the crash in issue #226). We need to lock a little higher in the getOrHandshake method, before we reset hostinfo.ConnectionInfo. Previously, two routines could enter this section and confuse the handshake process. This could result in the other side sending a recv_error that also has a race with setting hostinfo.ConnectionInfo back to nil. So we make sure to grab the lock in handleRecvError as well. Neither of these code paths are in the hot path (handling packets between two hosts over an active tunnel) so there should be no performance concerns.
This commit is contained in:
parent
73a5ed90b2
commit
64d8035d09
34
inside.go
34
inside.go
|
@ -91,6 +91,17 @@ func (f *Interface) getOrHandshake(vpnIp uint32) *HostInfo {
|
|||
return hostinfo
|
||||
}
|
||||
|
||||
// Handshake is not ready, we need to grab the lock now before we start
|
||||
// the handshake process
|
||||
hostinfo.Lock()
|
||||
defer hostinfo.Unlock()
|
||||
|
||||
// Double check, now that we have the lock
|
||||
ci = hostinfo.ConnectionState
|
||||
if ci != nil && ci.eKey != nil && ci.ready {
|
||||
return hostinfo
|
||||
}
|
||||
|
||||
if ci == nil {
|
||||
// if we don't have a connection state, then send a handshake initiation
|
||||
ci = f.newConnectionState(true, noise.HandshakeIX, []byte{}, 0)
|
||||
|
@ -103,21 +114,16 @@ func (f *Interface) getOrHandshake(vpnIp uint32) *HostInfo {
|
|||
|
||||
// If we have already created the handshake packet, we don't want to call the function at all.
|
||||
if !hostinfo.HandshakeReady {
|
||||
hostinfo.Lock()
|
||||
defer hostinfo.Unlock()
|
||||
ixHandshakeStage0(f, vpnIp, hostinfo)
|
||||
// FIXME: Maybe make XX selectable, but probably not since psk makes it nearly pointless for us.
|
||||
//xx_handshakeStage0(f, ip, hostinfo)
|
||||
|
||||
if !hostinfo.HandshakeReady {
|
||||
ixHandshakeStage0(f, vpnIp, hostinfo)
|
||||
// FIXME: Maybe make XX selectable, but probably not since psk makes it nearly pointless for us.
|
||||
//xx_handshakeStage0(f, ip, hostinfo)
|
||||
|
||||
// If this is a static host, we don't need to wait for the HostQueryReply
|
||||
// We can trigger the handshake right now
|
||||
if _, ok := f.lightHouse.staticList[vpnIp]; ok {
|
||||
select {
|
||||
case f.handshakeManager.trigger <- vpnIp:
|
||||
default:
|
||||
}
|
||||
// If this is a static host, we don't need to wait for the HostQueryReply
|
||||
// We can trigger the handshake right now
|
||||
if _, ok := f.lightHouse.staticList[vpnIp]; ok {
|
||||
select {
|
||||
case f.handshakeManager.trigger <- vpnIp:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -329,6 +329,9 @@ func (f *Interface) handleRecvError(addr *udpAddr, h *Header) {
|
|||
return
|
||||
}
|
||||
|
||||
hostinfo.Lock()
|
||||
defer hostinfo.Unlock()
|
||||
|
||||
if !hostinfo.RecvErrorExceeded() {
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue