doc: add doc for SplitWords and WordCount

2026-03-01 00:35:28 +08:00 · 2023-02-23 10:23:38 +08:00
parent c02654559a
commit 15c1537bf0
5 changed files with 647 additions and 333 deletions
@@ -42,6 +42,8 @@ import (
 -   [Wrap](#Wrap)
 -   [Unwrap](#Unwrap)
 -   [SplitEx](#SplitEx)
 -   [SplitWords](#SplitWords)
 -   [WordCount](#WordCount)
 <div STYLE="page-break-after: always;"></div>
@@ -735,3 +737,89 @@ func main() {
    fmt.Println(arr5) //[]string{" a", "b", "c"}
 }
 ```
 ### <span id="SplitWords">SplitWords</span>
 <p>Splits a string into words, word only contains alphabetic characters.</p>
 <b>Signature:</b>
 ```go
 func SplitWords(s string) []string
 ```
 <b>Example:</b>
 ```go
 import (
    "fmt"
    "github.com/duke-git/lancet/strutil"
 )
 func main() {
    result1 := strutil.SplitWords("a word")
    result2 := strutil.SplitWords("I'am a programmer")
    result3 := strutil.SplitWords("Bonjour, je suis programmeur")
    result4 := strutil.SplitWords("a -b-c' 'd'e")
    result5 := strutil.SplitWords("你好，我是一名码农")
    result6 := strutil.SplitWords("こんにちは，私はプログラマーです")
    fmt.Println(result1)
    fmt.Println(result2)
    fmt.Println(result3)
    fmt.Println(result4)
    fmt.Println(result5)
    fmt.Println(result6)
    // Output:
    // [a word]
    // [I'am a programmer]
    // [Bonjour je suis programmeur]
    // [a b-c' d'e]
    // []
    // []
 }
 ```
 ### <span id="WordCount">WordCount</span>
 <p>Return the number of meaningful word, word only contains alphabetic characters.</p>
 <b>Signature:</b>
 ```go
 func WordCount(s string) int
 ```
 <b>Example:</b>
 ```go
 import (
    "fmt"
    "github.com/duke-git/lancet/strutil"
 )
 func main() {
    result1 := strutil.WordCount("a word")
    result2 := strutil.WordCount("I'am a programmer")
    result3 := strutil.WordCount("Bonjour, je suis programmeur")
    result4 := strutil.WordCount("a -b-c' 'd'e")
    result5 := strutil.WordCount("你好，我是一名码农")
    result6 := strutil.WordCount("こんにちは，私はプログラマーです")
    fmt.Println(result1)
    fmt.Println(result2)
    fmt.Println(result3)
    fmt.Println(result4)
    fmt.Println(result5)
    fmt.Println(result6)
    // Output:
    // 2
    // 3
    // 4
    // 3
    // 0
    // 0
 }
 ```
@@ -42,6 +42,8 @@ import (
 -   [Wrap](#Wrap)
 -   [Unwrap](#Unwrap)
 -   [SplitEx](#SplitEx)
 -   [SplitWords](#SplitWords)
 -   [WordCount](#WordCount)
 <div STYLE="page-break-after: always;"></div>
@@ -408,7 +410,7 @@ func Pad(source string, size int, padStr string) string
 ```go
 import (
    "fmt"
-    "github.com/duke-git/lancet/v2/strutil"
+    "github.com/duke-git/lancet/strutil"
 )
 func main() {
@@ -453,7 +455,7 @@ func PadEnd(source string, size int, padStr string) string
 ```go
 import (
    "fmt"
-    "github.com/duke-git/lancet/v2/strutil"
+    "github.com/duke-git/lancet/strutil"
 )
 func main() {
@@ -499,7 +501,7 @@ func PadStart(source string, size int, padStr string) string
 ```go
 import (
    "fmt"
-    "github.com/duke-git/lancet/v2/strutil"
+    "github.com/duke-git/lancet/strutil"
 )
 func main() {
@@ -736,3 +738,89 @@ func main() {
    fmt.Println(arr5) //[]string{" a", "b", "c"}
 }
 ```
 ### <span id="SplitWords">SplitWords</span>
 <p>将字符串拆分为单词，只支持字母字符单词。</p>
 <b>函数签名:</b>
 ```go
 func SplitWords(s string) []string
 ```
 <b>示例:</b>
 ```go
 import (
    "fmt"
    "github.com/duke-git/lancet/strutil"
 )
 func main() {
    result1 := strutil.SplitWords("a word")
    result2 := strutil.SplitWords("I'am a programmer")
    result3 := strutil.SplitWords("Bonjour, je suis programmeur")
    result4 := strutil.SplitWords("a -b-c' 'd'e")
    result5 := strutil.SplitWords("你好，我是一名码农")
    result6 := strutil.SplitWords("こんにちは，私はプログラマーです")
    fmt.Println(result1)
    fmt.Println(result2)
    fmt.Println(result3)
    fmt.Println(result4)
    fmt.Println(result5)
    fmt.Println(result6)
    // Output:
    // [a word]
    // [I'am a programmer]
    // [Bonjour je suis programmeur]
    // [a b-c' d'e]
    // []
    // []
 }
 ```
 ### <span id="WordCount">WordCount</span>
 <p>返回有意义单词的数量，只支持字母字符单词。</p>
 <b>函数签名:</b>
 ```go
 func WordCount(s string) int
 ```
 <b>示例:</b>
 ```go
 import (
    "fmt"
    "github.com/duke-git/lancet/strutil"
 )
 func main() {
    result1 := strutil.WordCount("a word")
    result2 := strutil.WordCount("I'am a programmer")
    result3 := strutil.WordCount("Bonjour, je suis programmeur")
    result4 := strutil.WordCount("a -b-c' 'd'e")
    result5 := strutil.WordCount("你好，我是一名码农")
    result6 := strutil.WordCount("こんにちは，私はプログラマーです")
    fmt.Println(result1)
    fmt.Println(result2)
    fmt.Println(result3)
    fmt.Println(result4)
    fmt.Println(result5)
    fmt.Println(result6)
    // Output:
    // 2
    // 3
    // 4
    // 3
    // 0
    // 0
 }
 ```
@@ -253,3 +253,74 @@ func SplitEx(s, sep string, removeEmptyString bool) []string {
 	return ret
 }
 // SplitWords splits a string into words, word only contains alphabetic characters.
 func SplitWords(s string) []string {
 	var word string
 	var words []string
 	var r rune
 	var size, pos int
 	isWord := false
 	for len(s) > 0 {
 		r, size = utf8.DecodeRuneInString(s)
 		switch {
 		case isLetter(r):
 			if !isWord {
 				isWord = true
 				word = s
 				pos = 0
 			}
 		case isWord && (r == '\'' || r == '-'):
 			// is word
 		default:
 			if isWord {
 				isWord = false
 				words = append(words, word[:pos])
 			}
 		}
 		pos += size
 		s = s[size:]
 	}
 	if isWord {
 		words = append(words, word[:pos])
 	}
 	return words
 }
 // WordCount return the number of meaningful word, word only contains alphabetic characters.
 func WordCount(s string) int {
 	var r rune
 	var size, count int
 	isWord := false
 	for len(s) > 0 {
 		r, size = utf8.DecodeRuneInString(s)
 		switch {
 		case isLetter(r):
 			if !isWord {
 				isWord = true
 				count++
 			}
 		case isWord && (r == '\'' || r == '-'):
 			// is word
 		default:
 			isWord = false
 		}
 		s = s[size:]
 	}
 	return count
 }
@@ -132,3 +132,36 @@ func padAtPosition(str string, length int, padStr string, position int) string {
 	return leftPad + str + rightPad
 }
 // isLetter checks r is a letter but not CJK character.
 func isLetter(r rune) bool {
 	if !unicode.IsLetter(r) {
 		return false
 	}
 	switch {
 	// cjk char: /[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff66-\uff9f]/
 	// hiragana and katakana (Japanese only)
 	case r >= '\u3034' && r < '\u30ff':
 		return false
 	// CJK unified ideographs extension A (Chinese, Japanese, and Korean)
 	case r >= '\u3400' && r < '\u4dbf':
 		return false
 	// CJK unified ideographs (Chinese, Japanese, and Korean)
 	case r >= '\u4e00' && r < '\u9fff':
 		return false
 	// CJK compatibility ideographs (Chinese, Japanese, and Korean)
 	case r >= '\uf900' && r < '\ufaff':
 		return false
 	// half-width katakana (Japanese only)
 	case r >= '\uff66' && r < '\uff9f':
 		return false
 	}
 	return true
 }
@@ -295,3 +295,37 @@ func TestSplitEx(t *testing.T) {
 	assert.Equal([]string{" a", "b", "c", ""}, SplitEx(" a = b = c = ", " = ", false))
 	assert.Equal([]string{" a", "b", "c"}, SplitEx(" a = b = c = ", " = ", true))
 }
 func TestSplitWords(t *testing.T) {
 	assert := internal.NewAssert(t, "TestSplitWords")
 	cases := map[string][]string{
 		"a word":                       {"a", "word"},
 		"I'am a programmer":            {"I'am", "a", "programmer"},
 		"Bonjour, je suis programmeur": {"Bonjour", "je", "suis", "programmeur"},
 		"a -b-c' 'd'e":                 {"a", "b-c'", "d'e"},
 		"你好，我是一名码农":                    nil,
 		"こんにちは，私はプログラマーです": nil,
 	}
 	for k, v := range cases {
 		assert.Equal(v, SplitWords(k))
 	}
 }
 func TestWordCount(t *testing.T) {
 	assert := internal.NewAssert(t, "TestSplitWords")
 	cases := map[string]int{
 		"a word":                       2, //   {"a", "word"},
 		"I'am a programmer":            3, //   {"I'am", "a", "programmer"},
 		"Bonjour, je suis programmeur": 4, // {"Bonjour", "je", "suis", "programmeur"},
 		"a -b-c' 'd'e":                 3, // {"a", "b-c'", "d'e"},
 		"你好，我是一名码农":                    0, // nil,
 		"こんにちは，私はプログラマーです": 0, // nil,
 	}
 	for k, v := range cases {
 		assert.Equal(v, WordCount(k))
 	}
 }