1
0
Fork 0
mirror of https://github.com/diamondburned/arikawa.git synced 2024-11-09 00:14:57 +00:00
arikawa/voice/udp/udp.go

324 lines
9.3 KiB
Go

package udp
import (
"bytes"
"context"
"encoding/binary"
"io"
"net"
"time"
"github.com/pkg/errors"
"golang.org/x/crypto/nacl/secretbox"
)
const (
packetHeaderSize = 12
)
// Dialer is the default dialer that this package uses for all its dialing.
var (
ErrDecryptionFailed = errors.New("decryption failed")
Dialer = net.Dialer{
Timeout: 10 * time.Second,
}
)
// Packet represents a voice packet. It is not thread-safe.
type Packet struct {
VersionFlags byte
Type byte
SSRC uint32
Sequence uint16
Timestamp uint32
Opus []byte
}
// Connection represents a voice connection. It is not thread-safe.
type Connection struct {
GatewayIP string
GatewayPort uint16
context context.Context
conn net.Conn
ssrc uint32
// frequency rate.Limiter
frequency *time.Ticker
timeIncr uint32
packet [12]byte
secret [32]byte
sequence uint16
timestamp uint32
nonce [24]byte
// recv fields
recvNonce [24]byte
recvBuf []byte // len 1400
recvOpus []byte // len 1400
recvPacket *Packet // uses recvOpus' backing array
}
func DialConnectionCtx(ctx context.Context, addr string, ssrc uint32) (*Connection, error) {
// Create a new UDP connection.
conn, err := Dialer.DialContext(ctx, "udp", addr)
if err != nil {
return nil, errors.Wrap(err, "failed to dial host")
}
// https://discord.com/developers/docs/topics/voice-connections#ip-discovery
ssrcBuffer := [70]byte{
0x1, 0x2,
}
binary.BigEndian.PutUint16(ssrcBuffer[2:4], 70)
binary.BigEndian.PutUint32(ssrcBuffer[4:8], ssrc)
_, err = conn.Write(ssrcBuffer[:])
if err != nil {
return nil, errors.Wrap(err, "failed to write SSRC buffer")
}
var ipBuffer [70]byte
// ReadFull makes sure to read all 70 bytes.
_, err = io.ReadFull(conn, ipBuffer[:])
if err != nil {
return nil, errors.Wrap(err, "failed to read IP buffer")
}
ipbody := ipBuffer[4:68]
nullPos := bytes.Index(ipbody, []byte{'\x00'})
if nullPos < 0 {
return nil, errors.New("UDP IP discovery did not contain a null terminator")
}
ip := ipbody[:nullPos]
port := binary.LittleEndian.Uint16(ipBuffer[68:70])
// https://discord.com/developers/docs/topics/voice-connections#encrypting-and-sending-voice
packet := [12]byte{
0: 0x80, // Version + Flags
1: 0x78, // Payload Type
// [2:4] // Sequence
// [4:8] // Timestamp
}
// Write SSRC to the header.
binary.BigEndian.PutUint32(packet[8:12], ssrc) // SSRC
return &Connection{
GatewayIP: string(ip),
GatewayPort: port,
frequency: time.NewTicker(20 * time.Millisecond),
timeIncr: 960,
context: context.Background(),
packet: packet,
ssrc: ssrc,
conn: conn,
recvBuf: make([]byte, 1400),
recvOpus: make([]byte, 1400),
recvPacket: &Packet{},
}, nil
}
// ResetFrequency resets the internal frequency ticker as well as the timestamp
// incremental number. For more information, refer to
// https://tools.ietf.org/html/rfc7587#section-4.2.
//
// frameDuration controls the Opus frame duration used by the UDP connection to
// control the frequency of packets sent over. 20ms is the default by libopus.
//
// timestampIncr is the timestamp to increment for each Opus packet. This should
// be consistent with th given frameDuration. For the right combination, refer
// to the Valid Parameters section below.
//
// Valid Parameters
//
// The following table lists the recommended parameters for these variables.
//
// +---------+-----+-----+------+------+
// | Mode | 10 | 20 | 40 | 60 |
// +---------+-----+-----+------+------+
// | ts incr | 480 | 960 | 1920 | 2880 |
// +---------+-----+-----+------+------+
//
// Note that audio mode is omitted, as it is not recommended. For the full
// table, refer to the IETF RFC7587 section 4.2 link above.
func (c *Connection) ResetFrequency(frameDuration time.Duration, timeIncr uint32) {
c.frequency.Stop()
c.frequency = time.NewTicker(frameDuration)
c.timeIncr = timeIncr
}
// UseSecret uses the given secret. This method is not thread-safe, so it should
// only be used right after initialization.
func (c *Connection) UseSecret(secret [32]byte) {
c.secret = secret
}
// UseContext lets the connection use the given context for its Write method.
// WriteCtx will override this context.
func (c *Connection) UseContext(ctx context.Context) error {
return c.useContext(ctx)
}
func (c *Connection) useContext(ctx context.Context) error {
if c.context == ctx {
return nil
}
c.context = ctx
if deadline, ok := c.context.Deadline(); ok {
return c.conn.SetWriteDeadline(deadline)
} else {
return c.conn.SetWriteDeadline(time.Time{})
}
}
func (c *Connection) Close() error {
c.frequency.Stop()
return c.conn.Close()
}
// Write sends bytes into the voice UDP connection using the preset context.
func (c *Connection) Write(b []byte) (int, error) {
return c.write(b)
}
// WriteCtx sends bytes into the voice UDP connection with a timeout using the
// given context. It ignores the context inside the connection, but will restore
// the deadline after this call is done.
func (c *Connection) WriteCtx(ctx context.Context, b []byte) (int, error) {
oldCtx := c.context
c.useContext(ctx)
defer c.useContext(oldCtx)
return c.write(b)
}
func (c *Connection) write(b []byte) (int, error) {
// Write a new sequence.
binary.BigEndian.PutUint16(c.packet[2:4], c.sequence)
c.sequence++
binary.BigEndian.PutUint32(c.packet[4:8], c.timestamp)
c.timestamp += c.timeIncr
copy(c.nonce[:], c.packet[:])
toSend := secretbox.Seal(c.packet[:], b, &c.nonce, &c.secret)
select {
case <-c.frequency.C:
case <-c.context.Done():
return 0, c.context.Err()
}
n, err := c.conn.Write(toSend)
if err != nil {
return n, errors.Wrap(err, "failed to write to UDP connection")
}
// We're not really returning everything, since we're "sealing" the bytes.
return len(b), nil
}
// ReadPacket reads the UDP connection and returns a packet if successful. This
// packet is not thread-safe to use, as it shares recvBuf's buffer. Byte slices
// inside it must be copied or used before the next call to ReadPacket happens.
func (c *Connection) ReadPacket() (*Packet, error) {
for {
rlen, err := c.conn.Read(c.recvBuf)
if err != nil {
return nil, err
}
if rlen < packetHeaderSize || (c.recvBuf[0] != 0x80 && c.recvBuf[0] != 0x90) {
continue
}
c.recvPacket.VersionFlags = c.recvBuf[0]
c.recvPacket.Type = c.recvBuf[1]
c.recvPacket.Sequence = binary.BigEndian.Uint16(c.recvBuf[2:4])
c.recvPacket.Timestamp = binary.BigEndian.Uint32(c.recvBuf[4:8])
c.recvPacket.SSRC = binary.BigEndian.Uint32(c.recvBuf[8:12])
copy(c.recvNonce[:], c.recvBuf[0:packetHeaderSize])
var ok bool
c.recvPacket.Opus, ok = secretbox.Open(
c.recvOpus[:0], c.recvBuf[packetHeaderSize:rlen], &c.recvNonce, &c.secret)
if !ok {
return nil, ErrDecryptionFailed
}
// Partial structure of the RTP header for reference
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |V=2|P|X| CC |M| PT | sequence number |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | timestamp |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// References
//
// https://tools.ietf.org/html/rfc3550#section-5.1
//
// We first check VersionFlags (8-bit) for whether or not the 4th bit
// (extension) is set. The value of 0x10 is 0b00010000. RFC3550 section
// 5.1 explains the extension bit as:
//
// If the extension bit is set, the fixed header MUST be followed by
// exactly one header extension, with a format defined in Section
// 5.3.1.
//
isExtension := c.recvPacket.VersionFlags&0x10 == 0x10
// We then check for whether or not the marker bit (9th bit) is set. The
// 9th bit is carried over to the second byte (Type), so we check its
// presence with 0x80, or 0b10000000. RFC3550 section 5.1 explains the
// marker bit as:
//
// The interpretation of the marker is defined by a profile. It is
// intended to allow significant events such as frame boundaries to
// be marked in the packet stream. A profile MAY define additional
// marker bits or specify that there is no marker bit by changing
// the number of bits in the payload type field (see Section 5.3).
//
// RFC3350 section 12.1 also writes:
//
// When the RTCP packet type field is compared to the corresponding
// octet of the RTP header, this range corresponds to the marker bit
// being 1 (which it usually is not in data packets) and to the high
// bit of the standard payload type field being 1 (since the static
// payload types are typically defined in the low half).
//
// This implies that, when the marker bit is 1, the received packet is
// an RTCP packet and NOT an RTP packet; therefore, we must ignore the
// unknown sections, so we do a (NOT isMarker) check below.
isMarker := c.recvPacket.Type&0x80 != 0x0
if isExtension && !isMarker {
extLen := binary.BigEndian.Uint16(c.recvPacket.Opus[2:4])
shift := 4 + 4*int(extLen)
if len(c.recvPacket.Opus) > shift {
c.recvPacket.Opus = c.recvPacket.Opus[shift:]
}
}
return c.recvPacket, nil
}
}