mirror of
https://github.com/diamondburned/arikawa.git
synced 2025-08-31 10:57:24 +00:00
This commit refactors a lot of voice's internals to be more stable and handle more edge cases from Discord's voice servers. It should result in an overall more stable voice connection. A few helper functions have been added into voice.Session. Some fields will have been broken and changed to accomodate for the refactor, as well. Below are some commits that have been squashed in: voice: Fix Speaking() panic on closed voice: StopSpeaking should not error out The rationale is added as a comment into the Speaking() method. voice: Add TestKickedOut voice: Fix region change disconnecting
338 lines
10 KiB
Go
338 lines
10 KiB
Go
package udp
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/binary"
|
|
"io"
|
|
"net"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/pkg/errors"
|
|
"golang.org/x/crypto/nacl/secretbox"
|
|
)
|
|
|
|
// ErrDecryptionFailed is returned from ReadPacket if the received packet fails
|
|
// to decrypt.
|
|
var ErrDecryptionFailed = errors.New("decryption failed")
|
|
|
|
// Dialer is the default dialer that this package uses for all its dialing.
|
|
var Dialer = net.Dialer{
|
|
Timeout: 10 * time.Second,
|
|
}
|
|
|
|
// Connection represents a voice connection. It is not thread-safe.
|
|
type Connection struct {
|
|
GatewayIP string
|
|
GatewayPort uint16
|
|
|
|
conn net.Conn
|
|
ssrc uint32
|
|
|
|
// frequency rate.Limiter
|
|
frequency *time.Ticker
|
|
timeIncr uint32
|
|
stopFreq chan struct{}
|
|
|
|
packet [12]byte
|
|
secret [32]byte
|
|
|
|
sequence uint16
|
|
timestamp uint32
|
|
nonce [24]byte
|
|
|
|
// recv fields
|
|
recvNonce [24]byte
|
|
recvBuf []byte // len 1400
|
|
recvOpus []byte // len 1400
|
|
recvPacket *Packet // uses recvOpus' backing array
|
|
|
|
closed sync.Once
|
|
}
|
|
|
|
// DialConnection dials the UDP connection using the given address and SSRC
|
|
// number.
|
|
func DialConnection(ctx context.Context, addr string, ssrc uint32) (*Connection, error) {
|
|
return DialConnectionCustom(ctx, &Dialer, addr, ssrc)
|
|
}
|
|
|
|
// DialConnectionCustom dials the UDP connection with a custom dialer.
|
|
func DialConnectionCustom(
|
|
ctx context.Context, dialer *net.Dialer, addr string, ssrc uint32) (*Connection, error) {
|
|
|
|
// Create a new UDP connection.
|
|
conn, err := dialer.DialContext(ctx, "udp", addr)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to dial host")
|
|
}
|
|
|
|
// https://discord.com/developers/docs/topics/voice-connections#ip-discovery
|
|
ssrcBuffer := [70]byte{
|
|
0x1, 0x2,
|
|
}
|
|
binary.BigEndian.PutUint16(ssrcBuffer[2:4], 70)
|
|
binary.BigEndian.PutUint32(ssrcBuffer[4:8], ssrc)
|
|
|
|
_, err = conn.Write(ssrcBuffer[:])
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to write SSRC buffer")
|
|
}
|
|
|
|
var ipBuffer [70]byte
|
|
|
|
// ReadFull makes sure to read all 70 bytes.
|
|
_, err = io.ReadFull(conn, ipBuffer[:])
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "failed to read IP buffer")
|
|
}
|
|
|
|
ipbody := ipBuffer[4:68]
|
|
|
|
nullPos := bytes.Index(ipbody, []byte{'\x00'})
|
|
if nullPos < 0 {
|
|
return nil, errors.New("UDP IP discovery did not contain a null terminator")
|
|
}
|
|
|
|
ip := ipbody[:nullPos]
|
|
port := binary.LittleEndian.Uint16(ipBuffer[68:70])
|
|
|
|
// https://discord.com/developers/docs/topics/voice-connections#encrypting-and-sending-voice
|
|
packet := [12]byte{
|
|
0: 0x80, // Version + Flags
|
|
1: 0x78, // Payload Type
|
|
// [2:4] // Sequence
|
|
// [4:8] // Timestamp
|
|
}
|
|
|
|
// Write SSRC to the header.
|
|
binary.BigEndian.PutUint32(packet[8:12], ssrc) // SSRC
|
|
|
|
return &Connection{
|
|
GatewayIP: string(ip),
|
|
GatewayPort: port,
|
|
frequency: time.NewTicker(20 * time.Millisecond),
|
|
timeIncr: 960,
|
|
stopFreq: make(chan struct{}),
|
|
packet: packet,
|
|
ssrc: ssrc,
|
|
conn: conn,
|
|
recvBuf: make([]byte, 1400),
|
|
recvOpus: make([]byte, 1400),
|
|
recvPacket: &Packet{},
|
|
}, nil
|
|
}
|
|
|
|
// ResetFrequency resets the internal frequency ticker as well as the timestamp
|
|
// incremental number. For more information, refer to
|
|
// https://tools.ietf.org/html/rfc7587#section-4.2.
|
|
//
|
|
// frameDuration controls the Opus frame duration used by the UDP connection to
|
|
// control the frequency of packets sent over. 20ms is the default by libopus.
|
|
//
|
|
// timestampIncr is the timestamp to increment for each Opus packet. This should
|
|
// be consistent with th given frameDuration. For the right combination, refer
|
|
// to the Valid Parameters section below.
|
|
//
|
|
// Valid Parameters
|
|
//
|
|
// The following table lists the recommended parameters for these variables.
|
|
//
|
|
// +---------+-----+-----+------+------+
|
|
// | Mode | 10 | 20 | 40 | 60 |
|
|
// +---------+-----+-----+------+------+
|
|
// | ts incr | 480 | 960 | 1920 | 2880 |
|
|
// +---------+-----+-----+------+------+
|
|
//
|
|
// Note that audio mode is omitted, as it is not recommended. For the full
|
|
// table, refer to the IETF RFC7587 section 4.2 link above.
|
|
func (c *Connection) ResetFrequency(frameDuration time.Duration, timeIncr uint32) {
|
|
c.frequency.Stop()
|
|
c.frequency = time.NewTicker(frameDuration)
|
|
c.timeIncr = timeIncr
|
|
}
|
|
|
|
// UseSecret uses the given secret. This method is not thread-safe, so it should
|
|
// only be used right after initialization.
|
|
func (c *Connection) UseSecret(secret [32]byte) {
|
|
c.secret = secret
|
|
}
|
|
|
|
// SetWriteDeadline sets the UDP connection's write deadline.
|
|
func (c *Connection) SetWriteDeadline(deadline time.Time) {
|
|
c.conn.SetWriteDeadline(deadline)
|
|
}
|
|
|
|
// SetReadDeadline sets the UDP connection's read deadline.
|
|
func (c *Connection) SetReadDeadline(deadline time.Time) {
|
|
c.conn.SetReadDeadline(deadline)
|
|
}
|
|
|
|
// Close closes the connection.
|
|
func (c *Connection) Close() error {
|
|
c.closed.Do(func() {
|
|
// Be sure to only run this ONCE.
|
|
c.frequency.Stop()
|
|
close(c.stopFreq)
|
|
})
|
|
|
|
return c.conn.Close()
|
|
}
|
|
|
|
// Write sends a packet of audio into the voice UDP connection. It is made to be
|
|
// stream-compatible: the internal frequency clock will slow Write down to match
|
|
// the real playback time.
|
|
func (c *Connection) Write(b []byte) (int, error) {
|
|
// Write a new sequence.
|
|
binary.BigEndian.PutUint16(c.packet[2:4], c.sequence)
|
|
c.sequence++
|
|
|
|
binary.BigEndian.PutUint32(c.packet[4:8], c.timestamp)
|
|
c.timestamp += c.timeIncr
|
|
|
|
// Copy the first 12 bytes from the packet into the nonce.
|
|
copy(c.nonce[:12], c.packet[:])
|
|
|
|
// Seal the message, but reuse the packet buffer. We pass in the first 12
|
|
// bytes of the packet, but allow it to reuse the whole packet buffer
|
|
toSend := secretbox.Seal(c.packet[:12], b, &c.nonce, &c.secret)
|
|
|
|
select {
|
|
case <-c.frequency.C:
|
|
// ok
|
|
case <-c.stopFreq:
|
|
return 0, errors.Wrap(net.ErrClosed, "frequency ticker stopped")
|
|
}
|
|
|
|
_, err := c.conn.Write(toSend)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
return len(b), nil
|
|
}
|
|
|
|
// Packet represents a voice packet.
|
|
type Packet struct {
|
|
header []byte
|
|
Opus []byte
|
|
}
|
|
|
|
// VersionFlags returns the version flags of the current packet.
|
|
func (p *Packet) VersionFlags() byte { return p.header[0] }
|
|
|
|
// Type returns the packet type.
|
|
func (p *Packet) Type() byte { return p.header[1] }
|
|
|
|
// Sequence returns the packet sequence.
|
|
func (p *Packet) Sequence() uint16 { return binary.BigEndian.Uint16(p.header[2:4]) }
|
|
|
|
// Timestamp returns the packet's timestamp.
|
|
func (p *Packet) Timestamp() uint32 { return binary.BigEndian.Uint32(p.header[4:8]) }
|
|
|
|
// SSRC returns the packet's SSRC number.
|
|
func (p *Packet) SSRC() uint32 { return binary.BigEndian.Uint32(p.header[8:12]) }
|
|
|
|
// Copy copies the current packet into the given packet.
|
|
func (p *Packet) Copy(dst *Packet) {
|
|
dst.header = append(dst.header[:0], p.header...)
|
|
dst.Opus = append(dst.Opus[:0], p.Opus...)
|
|
}
|
|
|
|
const packetHeaderSize = 12
|
|
|
|
// ReadPacket reads the UDP connection and returns a packet if successful. The
|
|
// returned packet is invalidated once ReadPacket is called again. To avoid
|
|
// this, manually Copy the packet.
|
|
func (c *Connection) ReadPacket() (*Packet, error) {
|
|
if c.recvPacket.header == nil {
|
|
// Initialize the recvPacket's header.
|
|
c.recvPacket.header = c.recvBuf[:12]
|
|
}
|
|
|
|
for {
|
|
i, err := c.conn.Read(c.recvBuf)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if i < packetHeaderSize || (c.recvBuf[0] != 0x80 && c.recvBuf[0] != 0x90) {
|
|
continue
|
|
}
|
|
|
|
// Copy the nonce to be read.
|
|
// TODO: once Go 1.17 is released, we can remove recvNonce and directly
|
|
// cast it as (*[packetHeaderSize]byte)(c.recvBuf).
|
|
copy(c.recvNonce[:], c.recvBuf[0:packetHeaderSize])
|
|
|
|
var ok bool
|
|
|
|
// Open (decrypt) the rest of the received bytes.
|
|
c.recvPacket.Opus, ok = secretbox.Open(
|
|
c.recvOpus[:0], c.recvBuf[packetHeaderSize:i], &c.recvNonce, &c.secret)
|
|
if !ok {
|
|
return nil, ErrDecryptionFailed
|
|
}
|
|
|
|
// Partial structure of the RTP header for reference
|
|
//
|
|
// 0 1 2 3
|
|
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
|
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
// |V=2|P|X| CC |M| PT | sequence number |
|
|
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
// | timestamp |
|
|
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
//
|
|
// References
|
|
//
|
|
// https://tools.ietf.org/html/rfc3550#section-5.1
|
|
//
|
|
|
|
// We first check VersionFlags (8-bit) for whether or not the 4th bit
|
|
// (extension) is set. The value of 0x10 is 0b00010000. RFC3550 section
|
|
// 5.1 explains the extension bit as:
|
|
//
|
|
// If the extension bit is set, the fixed header MUST be followed by
|
|
// exactly one header extension, with a format defined in Section
|
|
// 5.3.1.
|
|
//
|
|
isExtension := c.recvPacket.VersionFlags()&0x10 == 0x10
|
|
|
|
// We then check for whether or not the marker bit (9th bit) is set. The
|
|
// 9th bit is carried over to the second byte (Type), so we check its
|
|
// presence with 0x80, or 0b10000000. RFC3550 section 5.1 explains the
|
|
// marker bit as:
|
|
//
|
|
// The interpretation of the marker is defined by a profile. It is
|
|
// intended to allow significant events such as frame boundaries to
|
|
// be marked in the packet stream. A profile MAY define additional
|
|
// marker bits or specify that there is no marker bit by changing
|
|
// the number of bits in the payload type field (see Section 5.3).
|
|
//
|
|
// RFC3350 section 12.1 also writes:
|
|
//
|
|
// When the RTCP packet type field is compared to the corresponding
|
|
// octet of the RTP header, this range corresponds to the marker bit
|
|
// being 1 (which it usually is not in data packets) and to the high
|
|
// bit of the standard payload type field being 1 (since the static
|
|
// payload types are typically defined in the low half).
|
|
//
|
|
// This implies that, when the marker bit is 1, the received packet is
|
|
// an RTCP packet and NOT an RTP packet; therefore, we must ignore the
|
|
// unknown sections, so we do a (NOT isMarker) check below.
|
|
isMarker := c.recvPacket.Type()&0x80 != 0x0
|
|
|
|
if isExtension && !isMarker {
|
|
extLen := binary.BigEndian.Uint16(c.recvPacket.Opus[2:4])
|
|
shift := 4 + 4*int(extLen)
|
|
|
|
if len(c.recvPacket.Opus) > shift {
|
|
c.recvPacket.Opus = c.recvPacket.Opus[shift:]
|
|
}
|
|
}
|
|
|
|
return c.recvPacket, nil
|
|
}
|
|
}
|