Added split package to help with completions

2025-07-14 16:06:18 +00:00 · 2020-06-10 01:12:41 -07:00 · 2020-06-10 01:12:41 -07:00 · 66dd2b1b11
parent b6eca8eafa
commit 66dd2b1b11
2 changed files with 210 additions and 0 deletions
--- a/text/split/split.go
+++ b/text/split/split.go
@ -0,0 +1,118 @@
+// Package split provides a simple string splitting utility for use with
+// CompleteMessage.
+package split
+
+import (
+	"unicode"
+	"unicode/utf8"
+)
+
+var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
+
+// SpaceIndexed returns a splitted string with the current index that
+// CompleteMessage wants. The text is the entire input string and the offset is
+// where the cursor currently is.
+func SpaceIndexed(text string, offset int) ([]string, int) {
+	// First count the fields.
+	// This is an exact count if s is ASCII, otherwise it is an approximation.
+	n := 0
+	wasSpace := 1
+	// setBits is used to track which bits are set in the bytes of s.
+	setBits := uint8(0)
+	for i := 0; i < len(text); i++ {
+		r := text[i]
+		setBits |= r
+		isSpace := int(asciiSpace[r])
+		n += wasSpace & ^isSpace
+		wasSpace = isSpace
+	}
+
+	if setBits >= utf8.RuneSelf {
+		// Some runes in the input string are not ASCII.
+		return spaceIndexedRunes([]rune(text), offset)
+	}
+
+	// ASCII fast path
+	a := make([]string, n)
+	na := 0
+	fieldStart := 0
+	i := 0
+	j := n - 1 // last by default
+
+	// Skip spaces in the front of the input.
+	for i < len(text) && asciiSpace[text[i]] != 0 {
+		i++
+	}
+
+	fieldStart = i
+
+	for i < len(text) {
+		if asciiSpace[text[i]] == 0 {
+			i++
+			continue
+		}
+
+		a[na] = text[fieldStart:i]
+		if fieldStart <= offset && offset <= i {
+			j = na
+		}
+
+		na++
+		i++
+
+		// Skip spaces in between fields.
+		for i < len(text) && asciiSpace[text[i]] != 0 {
+			i++
+		}
+		fieldStart = i
+	}
+	if fieldStart < len(text) { // Last field might end at EOF.
+		a[na] = text[fieldStart:]
+	}
+
+	return a, j
+}
+
+func spaceIndexedRunes(runes []rune, offset int) ([]string, int) {
+	// A span is used to record a slice of s of the form s[start:end].
+	// The start index is inclusive and the end index is exclusive.
+	type span struct{ start, end int }
+
+	spans := make([]span, 0, 16)
+
+	// Find the field start and end indices.
+	wasField := false
+	fromIndex := 0
+	for i, rune := range runes {
+		if unicode.IsSpace(rune) {
+			if wasField {
+				spans = append(spans, span{start: fromIndex, end: i})
+				wasField = false
+			}
+		} else {
+			if !wasField {
+				fromIndex = i
+				wasField = true
+			}
+		}
+	}
+
+	// Last field might end at EOF.
+	if wasField {
+		spans = append(spans, span{fromIndex, len(runes)})
+	}
+
+	// Create strings from recorded field indices.
+	a := make([]string, 0, len(spans))
+	j := len(spans) - 1 // assume last
+
+	for i, span := range spans {
+		a = append(a, string(runes[span.start:span.end]))
+
+		if span.start <= offset && offset <= span.end {
+			j = i
+		}
+	}
+
+	return a, j
+}
--- a/text/split/split_test.go
+++ b/text/split/split_test.go
@ -0,0 +1,92 @@
+package split
+
+import "testing"
+
+func TestSpaceIndexed(t *testing.T) {
+	var tests = []struct {
+		input  string
+		offset int
+		output []string
+		index  int
+	}{{
+		input:  "bruhemus momentus lorem ipsum",
+		offset: 13, //       ^
+		output: []string{"bruhemus", "momentus", "lorem", "ipsum"},
+		index:  1,
+	}, {
+		input: "Yoohoo! My name's Astolfo! I belong to the Rider-class! And, and... uhm, nice " +
+			"to meet you!",
+		offset: 37, //                               ^
+		output: []string{
+			"Yoohoo!", "My", "name's", "Astolfo!", "I", "belong", "to", "the", "Rider-class!",
+			"And,", "and...", "uhm,", "nice", "to", "meet", "you!"},
+		index: 6,
+	}, {
+		input:  "sorry, what were you typing?",
+		offset: len("sorry, what were you typing?") - 1,
+		output: []string{"sorry,", "what", "were", "you", "typing?"},
+		index:  4,
+	}, {
+		input:  "zeroed out input",
+		offset: 0,
+		output: []string{"zeroed", "out", "input"},
+		index:  0,
+	}, {
+		input:  "に　ほ　ん　ご",
+		offset: 3,
+		output: []string{"に", "ほ", "ん", "ご"},
+		index:  1,
+	}}
+
+	for _, test := range tests {
+		a, j := SpaceIndexed(test.input, test.offset)
+		if !strsleq(a, test.output) {
+			t.Error("Mismatch output (input/got/expected)", test.input, a, test.output)
+		}
+		if j != test.index {
+			t.Error("Mismatch index (input/got/expected)", test.input, j, test.index)
+		}
+	}
+}
+
+const benchstr = "Alright, Master! I'm your blade, your edge and your arrow! You've placed " +
+	"so much trust in me, despite how weak I am - I'll do everything in my power to not " +
+			"disappoint you!"
+const benchcursor = 32 // arbitrary
+
+func BenchmarkSpaceIndexed(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		SpaceIndexed(benchstr, benchcursor)
+	}
+}
+
+func BenchmarkSpaceIndexedLong(b *testing.B) {
+	const benchstr = benchstr + benchstr + benchstr + benchstr + benchstr + benchstr
+
+	for i := 0; i < b.N; i++ {
+		SpaceIndexed(benchstr, benchcursor)
+	}
+}
+
+// same as benchstr but w/ a horizontal line (outside ascii)
+const benchstr8 = "Alright, Master! I'm your blade, your edge and your arrow! You've placed " +
+	"so much trust in me, despite how weak I am ― I'll do everything in my power to not " +
+	"disappoint you!"
+
+func BenchmarkSpaceIndexedUTF8(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		SpaceIndexed(benchstr8, benchcursor)
+	}
+}
+
+func strsleq(s1, s2 []string) bool {
+	if len(s1) != len(s2) {
+		return false
+	}
+	for i := 0; i < len(s1); i++ {
+		if s1[i] != s2[i] {
+			return false
+		}
+	}
+	return true
+}