arikawa/voice/udp/connection.go

338 lines
10 KiB
Go

package udp
import (
"bytes"
"context"
"encoding/binary"
"io"
"net"
"sync"
"time"
"github.com/pkg/errors"
"golang.org/x/crypto/nacl/secretbox"
)
// ErrDecryptionFailed is returned from ReadPacket if the received packet fails
// to decrypt.
var ErrDecryptionFailed = errors.New("decryption failed")
// Dialer is the default dialer that this package uses for all its dialing.
var Dialer = net.Dialer{
Timeout: 10 * time.Second,
}
// Connection represents a voice connection. It is not thread-safe.
type Connection struct {
GatewayIP string
GatewayPort uint16
conn net.Conn
ssrc uint32
// frequency rate.Limiter
frequency *time.Ticker
timeIncr uint32
stopFreq chan struct{}
packet [12]byte
secret [32]byte
sequence uint16
timestamp uint32
nonce [24]byte
// recv fields
recvNonce [24]byte
recvBuf []byte // len 1400
recvOpus []byte // len 1400
recvPacket *Packet // uses recvOpus' backing array
closed sync.Once
}
// DialConnection dials the UDP connection using the given address and SSRC
// number.
func DialConnection(ctx context.Context, addr string, ssrc uint32) (*Connection, error) {
return DialConnectionCustom(ctx, &Dialer, addr, ssrc)
}
// DialConnectionCustom dials the UDP connection with a custom dialer.
func DialConnectionCustom(
ctx context.Context, dialer *net.Dialer, addr string, ssrc uint32) (*Connection, error) {
// Create a new UDP connection.
conn, err := dialer.DialContext(ctx, "udp", addr)
if err != nil {
return nil, errors.Wrap(err, "failed to dial host")
}
// https://discord.com/developers/docs/topics/voice-connections#ip-discovery
ssrcBuffer := [70]byte{
0x1, 0x2,
}
binary.BigEndian.PutUint16(ssrcBuffer[2:4], 70)
binary.BigEndian.PutUint32(ssrcBuffer[4:8], ssrc)
_, err = conn.Write(ssrcBuffer[:])
if err != nil {
return nil, errors.Wrap(err, "failed to write SSRC buffer")
}
var ipBuffer [70]byte
// ReadFull makes sure to read all 70 bytes.
_, err = io.ReadFull(conn, ipBuffer[:])
if err != nil {
return nil, errors.Wrap(err, "failed to read IP buffer")
}
ipbody := ipBuffer[4:68]
nullPos := bytes.Index(ipbody, []byte{'\x00'})
if nullPos < 0 {
return nil, errors.New("UDP IP discovery did not contain a null terminator")
}
ip := ipbody[:nullPos]
port := binary.LittleEndian.Uint16(ipBuffer[68:70])
// https://discord.com/developers/docs/topics/voice-connections#encrypting-and-sending-voice
packet := [12]byte{
0: 0x80, // Version + Flags
1: 0x78, // Payload Type
// [2:4] // Sequence
// [4:8] // Timestamp
}
// Write SSRC to the header.
binary.BigEndian.PutUint32(packet[8:12], ssrc) // SSRC
return &Connection{
GatewayIP: string(ip),
GatewayPort: port,
frequency: time.NewTicker(20 * time.Millisecond),
timeIncr: 960,
stopFreq: make(chan struct{}),
packet: packet,
ssrc: ssrc,
conn: conn,
recvBuf: make([]byte, 1400),
recvOpus: make([]byte, 1400),
recvPacket: &Packet{},
}, nil
}
// ResetFrequency resets the internal frequency ticker as well as the timestamp
// incremental number. For more information, refer to
// https://tools.ietf.org/html/rfc7587#section-4.2.
//
// frameDuration controls the Opus frame duration used by the UDP connection to
// control the frequency of packets sent over. 20ms is the default by libopus.
//
// timestampIncr is the timestamp to increment for each Opus packet. This should
// be consistent with th given frameDuration. For the right combination, refer
// to the Valid Parameters section below.
//
// Valid Parameters
//
// The following table lists the recommended parameters for these variables.
//
// +---------+-----+-----+------+------+
// | Mode | 10 | 20 | 40 | 60 |
// +---------+-----+-----+------+------+
// | ts incr | 480 | 960 | 1920 | 2880 |
// +---------+-----+-----+------+------+
//
// Note that audio mode is omitted, as it is not recommended. For the full
// table, refer to the IETF RFC7587 section 4.2 link above.
func (c *Connection) ResetFrequency(frameDuration time.Duration, timeIncr uint32) {
c.frequency.Stop()
c.frequency = time.NewTicker(frameDuration)
c.timeIncr = timeIncr
}
// UseSecret uses the given secret. This method is not thread-safe, so it should
// only be used right after initialization.
func (c *Connection) UseSecret(secret [32]byte) {
c.secret = secret
}
// SetWriteDeadline sets the UDP connection's write deadline.
func (c *Connection) SetWriteDeadline(deadline time.Time) {
c.conn.SetWriteDeadline(deadline)
}
// SetReadDeadline sets the UDP connection's read deadline.
func (c *Connection) SetReadDeadline(deadline time.Time) {
c.conn.SetReadDeadline(deadline)
}
// Close closes the connection.
func (c *Connection) Close() error {
c.closed.Do(func() {
// Be sure to only run this ONCE.
c.frequency.Stop()
close(c.stopFreq)
})
return c.conn.Close()
}
// Write sends a packet of audio into the voice UDP connection. It is made to be
// stream-compatible: the internal frequency clock will slow Write down to match
// the real playback time.
func (c *Connection) Write(b []byte) (int, error) {
// Write a new sequence.
binary.BigEndian.PutUint16(c.packet[2:4], c.sequence)
c.sequence++
binary.BigEndian.PutUint32(c.packet[4:8], c.timestamp)
c.timestamp += c.timeIncr
// Copy the first 12 bytes from the packet into the nonce.
copy(c.nonce[:12], c.packet[:])
// Seal the message, but reuse the packet buffer. We pass in the first 12
// bytes of the packet, but allow it to reuse the whole packet buffer
toSend := secretbox.Seal(c.packet[:12], b, &c.nonce, &c.secret)
select {
case <-c.frequency.C:
// ok
case <-c.stopFreq:
return 0, errors.Wrap(net.ErrClosed, "frequency ticker stopped")
}
_, err := c.conn.Write(toSend)
if err != nil {
return 0, err
}
return len(b), nil
}
// Packet represents a voice packet.
type Packet struct {
header []byte
Opus []byte
}
// VersionFlags returns the version flags of the current packet.
func (p *Packet) VersionFlags() byte { return p.header[0] }
// Type returns the packet type.
func (p *Packet) Type() byte { return p.header[1] }
// Sequence returns the packet sequence.
func (p *Packet) Sequence() uint16 { return binary.BigEndian.Uint16(p.header[2:4]) }
// Timestamp returns the packet's timestamp.
func (p *Packet) Timestamp() uint32 { return binary.BigEndian.Uint32(p.header[4:8]) }
// SSRC returns the packet's SSRC number.
func (p *Packet) SSRC() uint32 { return binary.BigEndian.Uint32(p.header[8:12]) }
// Copy copies the current packet into the given packet.
func (p *Packet) Copy(dst *Packet) {
dst.header = append(dst.header[:0], p.header...)
dst.Opus = append(dst.Opus[:0], p.Opus...)
}
const packetHeaderSize = 12
// ReadPacket reads the UDP connection and returns a packet if successful. The
// returned packet is invalidated once ReadPacket is called again. To avoid
// this, manually Copy the packet.
func (c *Connection) ReadPacket() (*Packet, error) {
if c.recvPacket.header == nil {
// Initialize the recvPacket's header.
c.recvPacket.header = c.recvBuf[:12]
}
for {
i, err := c.conn.Read(c.recvBuf)
if err != nil {
return nil, err
}
if i < packetHeaderSize || (c.recvBuf[0] != 0x80 && c.recvBuf[0] != 0x90) {
continue
}
// Copy the nonce to be read.
// TODO: once Go 1.17 is released, we can remove recvNonce and directly
// cast it as (*[packetHeaderSize]byte)(c.recvBuf).
copy(c.recvNonce[:], c.recvBuf[0:packetHeaderSize])
var ok bool
// Open (decrypt) the rest of the received bytes.
c.recvPacket.Opus, ok = secretbox.Open(
c.recvOpus[:0], c.recvBuf[packetHeaderSize:i], &c.recvNonce, &c.secret)
if !ok {
return nil, ErrDecryptionFailed
}
// Partial structure of the RTP header for reference
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |V=2|P|X| CC |M| PT | sequence number |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | timestamp |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// References
//
// https://tools.ietf.org/html/rfc3550#section-5.1
//
// We first check VersionFlags (8-bit) for whether or not the 4th bit
// (extension) is set. The value of 0x10 is 0b00010000. RFC3550 section
// 5.1 explains the extension bit as:
//
// If the extension bit is set, the fixed header MUST be followed by
// exactly one header extension, with a format defined in Section
// 5.3.1.
//
isExtension := c.recvPacket.VersionFlags()&0x10 == 0x10
// We then check for whether or not the marker bit (9th bit) is set. The
// 9th bit is carried over to the second byte (Type), so we check its
// presence with 0x80, or 0b10000000. RFC3550 section 5.1 explains the
// marker bit as:
//
// The interpretation of the marker is defined by a profile. It is
// intended to allow significant events such as frame boundaries to
// be marked in the packet stream. A profile MAY define additional
// marker bits or specify that there is no marker bit by changing
// the number of bits in the payload type field (see Section 5.3).
//
// RFC3350 section 12.1 also writes:
//
// When the RTCP packet type field is compared to the corresponding
// octet of the RTP header, this range corresponds to the marker bit
// being 1 (which it usually is not in data packets) and to the high
// bit of the standard payload type field being 1 (since the static
// payload types are typically defined in the low half).
//
// This implies that, when the marker bit is 1, the received packet is
// an RTCP packet and NOT an RTP packet; therefore, we must ignore the
// unknown sections, so we do a (NOT isMarker) check below.
isMarker := c.recvPacket.Type()&0x80 != 0x0
if isExtension && !isMarker {
extLen := binary.BigEndian.Uint16(c.recvPacket.Opus[2:4])
shift := 4 + 4*int(extLen)
if len(c.recvPacket.Opus) > shift {
c.recvPacket.Opus = c.recvPacket.Opus[shift:]
}
}
return c.recvPacket, nil
}
}