1
0
mirror of https://github.com/duke-git/lancet.git synced 2026-02-04 12:52:28 +08:00

feat: add WordCount

This commit is contained in:
dudaodong
2023-02-21 14:27:00 +08:00
parent ed98ad53ec
commit ec740e442c
3 changed files with 73 additions and 0 deletions

View File

@@ -329,3 +329,34 @@ func SplitWords(s string) []string {
return words
}
// WordCount return the number of meaningful word, word only contains alphabetic characters.
// Play: todo
func WordCount(s string) int {
var r rune
var size, count int
isWord := false
for len(s) > 0 {
r, size = utf8.DecodeRuneInString(s)
switch {
case isLetter(r):
if !isWord {
isWord = true
count++
}
case isWord && (r == '\'' || r == '-'):
// is word
default:
isWord = false
}
s = s[size:]
}
return count
}

View File

@@ -413,3 +413,28 @@ func ExampleSplitWords() {
// []
// []
}
func ExampleWordCount() {
result1 := WordCount("a word")
result2 := WordCount("I'am a programmer")
result3 := WordCount("Bonjour, je suis programmeur")
result4 := WordCount("a -b-c' 'd'e")
result5 := WordCount("你好,我是一名码农")
result6 := WordCount("こんにちは,私はプログラマーです")
fmt.Println(result1)
fmt.Println(result2)
fmt.Println(result3)
fmt.Println(result4)
fmt.Println(result5)
fmt.Println(result6)
// Output:
// 2
// 3
// 4
// 3
// 0
// 0
}

View File

@@ -325,3 +325,20 @@ func TestSplitWords(t *testing.T) {
assert.Equal(v, SplitWords(k))
}
}
func TestWordCount(t *testing.T) {
assert := internal.NewAssert(t, "TestSplitWords")
cases := map[string]int{
"a word": 2, // {"a", "word"},
"I'am a programmer": 3, // {"I'am", "a", "programmer"},
"Bonjour, je suis programmeur": 4, // {"Bonjour", "je", "suis", "programmeur"},
"a -b-c' 'd'e": 3, // {"a", "b-c'", "d'e"},
"你好,我是一名码农": 0, // nil,
"こんにちは,私はプログラマーです": 0, // nil,
}
for k, v := range cases {
assert.Equal(v, WordCount(k))
}
}