From ec740e442c752406b75201ab013a0904bcb40d57 Mon Sep 17 00:00:00 2001 From: dudaodong Date: Tue, 21 Feb 2023 14:27:00 +0800 Subject: [PATCH] feat: add WordCount --- strutil/string.go | 31 +++++++++++++++++++++++++++++++ strutil/string_example_test.go | 25 +++++++++++++++++++++++++ strutil/string_test.go | 17 +++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/strutil/string.go b/strutil/string.go index 6d1728c..ebeacd6 100644 --- a/strutil/string.go +++ b/strutil/string.go @@ -329,3 +329,34 @@ func SplitWords(s string) []string { return words } + +// WordCount return the number of meaningful word, word only contains alphabetic characters. +// Play: todo +func WordCount(s string) int { + var r rune + var size, count int + + isWord := false + + for len(s) > 0 { + r, size = utf8.DecodeRuneInString(s) + + switch { + case isLetter(r): + if !isWord { + isWord = true + count++ + } + + case isWord && (r == '\'' || r == '-'): + // is word + + default: + isWord = false + } + + s = s[size:] + } + + return count +} diff --git a/strutil/string_example_test.go b/strutil/string_example_test.go index c7850ed..d679e13 100644 --- a/strutil/string_example_test.go +++ b/strutil/string_example_test.go @@ -413,3 +413,28 @@ func ExampleSplitWords() { // [] // [] } + +func ExampleWordCount() { + + result1 := WordCount("a word") + result2 := WordCount("I'am a programmer") + result3 := WordCount("Bonjour, je suis programmeur") + result4 := WordCount("a -b-c' 'd'e") + result5 := WordCount("你好,我是一名码农") + result6 := WordCount("こんにちは,私はプログラマーです") + + fmt.Println(result1) + fmt.Println(result2) + fmt.Println(result3) + fmt.Println(result4) + fmt.Println(result5) + fmt.Println(result6) + + // Output: + // 2 + // 3 + // 4 + // 3 + // 0 + // 0 +} diff --git a/strutil/string_test.go b/strutil/string_test.go index 4fd95d6..9867d17 100644 --- a/strutil/string_test.go +++ b/strutil/string_test.go @@ -325,3 +325,20 @@ func TestSplitWords(t *testing.T) { assert.Equal(v, SplitWords(k)) } } + +func TestWordCount(t *testing.T) { + assert := internal.NewAssert(t, "TestSplitWords") + + cases := map[string]int{ + "a word": 2, // {"a", "word"}, + "I'am a programmer": 3, // {"I'am", "a", "programmer"}, + "Bonjour, je suis programmeur": 4, // {"Bonjour", "je", "suis", "programmeur"}, + "a -b-c' 'd'e": 3, // {"a", "b-c'", "d'e"}, + "你好,我是一名码农": 0, // nil, + "こんにちは,私はプログラマーです": 0, // nil, + } + + for k, v := range cases { + assert.Equal(v, WordCount(k)) + } +}