feat: add OmitBy, OmitByKeys, OmitByValues

feat: add WordCount
feat: add SplitWords
2026-03-01 00:35:28 +08:00 · 2023-02-21 14:52:10 +08:00 · 2023-02-21 14:27:00 +08:00 · 2023-02-21 14:16:36 +08:00
6 changed files with 292 additions and 0 deletions
@@ -123,6 +123,45 @@ func FilterByValues[K comparable, V comparable](m map[K]V, values []V) map[K]V {
 	return result
 }

+// OmitBy is the opposite of Filter, removes all the map elements for which the predicate function returns true.
+// Play: todo
+func OmitBy[K comparable, V any](m map[K]V, predicate func(key K, value V) bool) map[K]V {
+	result := make(map[K]V)
+
+	for k, v := range m {
+		if !predicate(k, v) {
+			result[k] = v
+		}
+	}
+	return result
+}
+
+// OmitByKeys the opposite of FilterByKeys, extracts all the map elements which keys are not omitted.
+// Play: todo
+func OmitByKeys[K comparable, V any](m map[K]V, keys []K) map[K]V {
+	result := make(map[K]V)
+
+	for k, v := range m {
+		if !slice.Contain(keys, k) {
+			result[k] = v
+		}
+	}
+	return result
+}
+
+// OmitByValues the opposite of FilterByValues. remov all elements whose value are in the give slice.
+// Play: todo
+func OmitByValues[K comparable, V comparable](m map[K]V, values []V) map[K]V {
+	result := make(map[K]V)
+
+	for k, v := range m {
+		if !slice.Contain(values, v) {
+			result[k] = v
+		}
+	}
+	return result
+}
+
 // Intersect iterates over maps, return a new map of key and value pairs in all given maps.
 // Play: https://go.dev/play/p/Zld0oj3sjcC
 func Intersect[K comparable, V any](maps ...map[K]V) map[K]V {
@@ -188,6 +188,69 @@ func TestFilterByValues(t *testing.T) {
 	}, acturl)
 }

+func TestOmitBy(t *testing.T) {
+	assert := internal.NewAssert(t, "TestOmitBy")
+
+	m := map[string]int{
+		"a": 1,
+		"b": 2,
+		"c": 3,
+		"d": 4,
+		"e": 5,
+	}
+	isEven := func(_ string, value int) bool {
+		return value%2 == 0
+	}
+
+	acturl := OmitBy(m, isEven)
+
+	assert.Equal(map[string]int{
+		"a": 1,
+		"c": 3,
+		"e": 5,
+	}, acturl)
+}
+
+func TestOmitByKeys(t *testing.T) {
+	assert := internal.NewAssert(t, "TestOmitByKeys")
+
+	m := map[string]int{
+		"a": 1,
+		"b": 2,
+		"c": 3,
+		"d": 4,
+		"e": 5,
+	}
+
+	acturl := OmitByKeys(m, []string{"a", "b"})
+
+	assert.Equal(map[string]int{
+		"c": 3,
+		"d": 4,
+		"e": 5,
+	}, acturl)
+}
+
+func TestOmitByValues(t *testing.T) {
+	assert := internal.NewAssert(t, "TestOmitByValues")
+
+	m := map[string]int{
+		"a": 1,
+		"b": 2,
+		"c": 3,
+		"d": 4,
+		"e": 5,
+	}
+
+	acturl := OmitByValues(m, []int{4, 5})
+
+	assert.Equal(map[string]int{
+		"a": 1,
+		"b": 2,
+		"c": 3,
+	}, acturl)
+}
+
 func TestIntersect(t *testing.T) {
 	assert := internal.NewAssert(t, "TestIntersect")

@@ -287,3 +287,76 @@ func Substring(s string, offset int, length uint) string {

 	return strings.Replace(str, "\x00", "", -1)
 }
+
+// SplitWords splits a string into words, word only contains alphabetic characters.
+// Play: todo
+func SplitWords(s string) []string {
+	var word string
+	var words []string
+	var r rune
+	var size, pos int
+
+	isWord := false
+
+	for len(s) > 0 {
+		r, size = utf8.DecodeRuneInString(s)
+
+		switch {
+		case isLetter(r):
+			if !isWord {
+				isWord = true
+				word = s
+				pos = 0
+			}
+
+		case isWord && (r == '\'' || r == '-'):
+			// is word
+
+		default:
+			if isWord {
+				isWord = false
+				words = append(words, word[:pos])
+			}
+		}
+
+		pos += size
+		s = s[size:]
+	}
+
+	if isWord {
+		words = append(words, word[:pos])
+	}
+
+	return words
+}
+
+// WordCount return the number of meaningful word, word only contains alphabetic characters.
+// Play: todo
+func WordCount(s string) int {
+	var r rune
+	var size, count int
+
+	isWord := false
+
+	for len(s) > 0 {
+		r, size = utf8.DecodeRuneInString(s)
+
+		switch {
+		case isLetter(r):
+			if !isWord {
+				isWord = true
+				count++
+			}
+
+		case isWord && (r == '\'' || r == '-'):
+			// is word
+
+		default:
+			isWord = false
+		}
+
+		s = s[size:]
+	}
+
+	return count
+}
@@ -388,3 +388,53 @@ func ExampleSubstring() {
 	// de
 	// 你好
 }
+
+func ExampleSplitWords() {
+
+	result1 := SplitWords("a word")
+	result2 := SplitWords("I'am a programmer")
+	result3 := SplitWords("Bonjour, je suis programmeur")
+	result4 := SplitWords("a -b-c' 'd'e")
+	result5 := SplitWords("你好，我是一名码农")
+	result6 := SplitWords("こんにちは，私はプログラマーです")
+
+	fmt.Println(result1)
+	fmt.Println(result2)
+	fmt.Println(result3)
+	fmt.Println(result4)
+	fmt.Println(result5)
+	fmt.Println(result6)
+
+	// Output:
+	// [a word]
+	// [I'am a programmer]
+	// [Bonjour je suis programmeur]
+	// [a b-c' d'e]
+	// []
+	// []
+}
+
+func ExampleWordCount() {
+
+	result1 := WordCount("a word")
+	result2 := WordCount("I'am a programmer")
+	result3 := WordCount("Bonjour, je suis programmeur")
+	result4 := WordCount("a -b-c' 'd'e")
+	result5 := WordCount("你好，我是一名码农")
+	result6 := WordCount("こんにちは，私はプログラマーです")
+
+	fmt.Println(result1)
+	fmt.Println(result2)
+	fmt.Println(result3)
+	fmt.Println(result4)
+	fmt.Println(result5)
+	fmt.Println(result6)
+
+	// Output:
+	// 2
+	// 3
+	// 4
+	// 3
+	// 0
+	// 0
+}
@@ -135,3 +135,36 @@ func padAtPosition(str string, length int, padStr string, position int) string {

 	return leftPad + str + rightPad
 }
+
+// isLetter checks r is a letter but not CJK character.
+func isLetter(r rune) bool {
+	if !unicode.IsLetter(r) {
+		return false
+	}
+
+	switch {
+	// cjk char: /[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff66-\uff9f]/
+
+	// hiragana and katakana (Japanese only)
+	case r >= '\u3034' && r < '\u30ff':
+		return false
+
+	// CJK unified ideographs extension A (Chinese, Japanese, and Korean)
+	case r >= '\u3400' && r < '\u4dbf':
+		return false
+
+	// CJK unified ideographs (Chinese, Japanese, and Korean)
+	case r >= '\u4e00' && r < '\u9fff':
+		return false
+
+	// CJK compatibility ideographs (Chinese, Japanese, and Korean)
+	case r >= '\uf900' && r < '\ufaff':
+		return false
+
+	// half-width katakana (Japanese only)
+	case r >= '\uff66' && r < '\uff9f':
+		return false
+	}
+
+	return true
+}
@@ -308,3 +308,37 @@ func TestSubstring(t *testing.T) {
 	assert.Equal("de", Substring("abcde", -2, 3))
 	assert.Equal("你好", Substring("你好，欢迎你", 0, 2))
 }
+
+func TestSplitWords(t *testing.T) {
+	assert := internal.NewAssert(t, "TestSplitWords")
+
+	cases := map[string][]string{
+		"a word":                       {"a", "word"},
+		"I'am a programmer":            {"I'am", "a", "programmer"},
+		"Bonjour, je suis programmeur": {"Bonjour", "je", "suis", "programmeur"},
+		"a -b-c' 'd'e":                 {"a", "b-c'", "d'e"},
+		"你好，我是一名码农":                    nil,
+		"こんにちは，私はプログラマーです": nil,
+	}
+
+	for k, v := range cases {
+		assert.Equal(v, SplitWords(k))
+	}
+}
+
+func TestWordCount(t *testing.T) {
+	assert := internal.NewAssert(t, "TestSplitWords")
+
+	cases := map[string]int{
+		"a word":                       2, //   {"a", "word"},
+		"I'am a programmer":            3, //   {"I'am", "a", "programmer"},
+		"Bonjour, je suis programmeur": 4, // {"Bonjour", "je", "suis", "programmeur"},
+		"a -b-c' 'd'e":                 3, // {"a", "b-c'", "d'e"},
+		"你好，我是一名码农":                    0, // nil,
+		"こんにちは，私はプログラマーです": 0, // nil,
+	}
+
+	for k, v := range cases {
+		assert.Equal(v, WordCount(k))
+	}
+}
Author	SHA1	Message	Date
dudaodong	32ca975204	feat: add OmitBy, OmitByKeys, OmitByValues	2023-02-21 14:52:10 +08:00
dudaodong	ec740e442c	feat: add WordCount	2023-02-21 14:27:00 +08:00
dudaodong	ed98ad53ec	feat: add SplitWords	2023-02-21 14:16:36 +08:00