feat: add ExtractContent

:
2026-03-01 00:35:28 +08:00 · 2024-11-08 14:11:25 +08:00
parent 0ed2b11ba1
commit 08f14d2b08
5 changed files with 181 additions and 1 deletions
@@ -68,6 +68,7 @@ import (
 -   [Rotate](#Rotate)
 -   [TemplateReplace](#TemplateReplace)
 -   [RegexMatchAllGroups](#RegexMatchAllGroups)
 -   [ExtractContent](#ExtractContent)
 <div STYLE="page-break-after: always;"></div>
@@ -1728,4 +1729,34 @@ func main() {
    // [john.doe@example.com john.doe example com]
    // [jane.doe@example.com jane.doe example com]
 }
 ```
 ### <span id="ExtractContent">ExtractContent</span>
 <p>提取两个标记之间的内容。</p>
 <b>函数签名:</b>
 ```go
 func ExtractContent(s, start, end string) []string
 ```
 <b>示例:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
 ```go
 import (
    "fmt"
    "github.com/duke-git/lancet/v2/strutil"
 )
 func main() {
    html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
    result := strutil.ExtractContent(html, "<span>", "</span>")
    fmt.Println(result)
    // Output:
    // [content1 content2 content1]
 }
 ```
@@ -68,6 +68,8 @@ import (
 -   [Rotate](#Rotate)
 -   [TemplateReplace](#TemplateReplace)
 -   [RegexMatchAllGroups](#RegexMatchAllGroups)
 -   [ExtractContent](#RegexMatchAllGroups)
 <div STYLE="page-break-after: always;"></div>
@@ -1708,7 +1710,7 @@ func main() {
 func RegexMatchAllGroups(pattern, str string) [][]string
 ```
-<b>example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
+<b>Example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
 ```go
 import (
@@ -1729,4 +1731,34 @@ func main() {
    // [john.doe@example.com john.doe example com]
    // [jane.doe@example.com jane.doe example com]
 }
 ```
 ### <span id="ExtractContent">ExtractContent</span>
 <p>Extracts the content between the start and end strings in the source string.</p>
 <b>Signature:</b>
 ```go
 func ExtractContent(s, start, end string) []string
 ```
 <b>Example:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
 ```go
 import (
    "fmt"
    "github.com/duke-git/lancet/v2/strutil"
 )
 func main() {
    html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
    result := strutil.ExtractContent(html, "<span>", "</span>")
    fmt.Println(result)
    // Output:
    // [content1 content2 content1]
 }
 ```
@@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string {
 	matches := re.FindAllStringSubmatch(str, -1)
 	return matches
 }
 // ExtractContent extracts the content between the start and end strings in the source string.
 // Play: todo
 func ExtractContent(s, start, end string) []string {
 	result := []string{}
 	for {
 		if _, after, ok := strings.Cut(s, start); ok {
 			if before, _, ok := strings.Cut(after, end); ok {
 				result = append(result, before)
 				s = after
 			} else {
 				break
 			}
 		} else {
 			break
 		}
 	}
 	return result
 }
@@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() {
 	// [john.doe@example.com john.doe example com]
 	// [jane.doe@example.com jane.doe example com]
 }
 func ExampleExtractContent() {
 	html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
 	result := ExtractContent(html, "<span>", "</span>")
 	fmt.Println(result)
 	// Output:
 	// [content1 content2 content1]
 }
@@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) {
 		assert.Equal(tt.expected, result)
 	}
 }
 func TestExtractContent(t *testing.T) {
 	t.Parallel()
 	assert := internal.NewAssert(t, "TestExtractContent")
 	tests := []struct {
 		name     string
 		input    string
 		start    string
 		end      string
 		expected []string
 	}{
 		{
 			name:     "Extract content between <tag> and </tag>",
 			input:    "This is <tag>content1</tag> and <tag>content2</tag> and <tag>content3</tag>",
 			start:    "<tag>",
 			end:      "</tag>",
 			expected: []string{"content1", "content2", "content3"},
 		},
 		{
 			name:     "No tags in the string",
 			input:    "This string has no tags",
 			start:    "<tag>",
 			end:      "</tag>",
 			expected: []string{},
 		},
 		{
 			name:     "Single tag pair",
 			input:    "<tag>onlyContent</tag>",
 			start:    "<tag>",
 			end:      "</tag>",
 			expected: []string{"onlyContent"},
 		},
 		{
 			name:     "Tags without end tag",
 			input:    "This <tag>content without end tag",
 			start:    "<tag>",
 			end:      "</tag>",
 			expected: []string{},
 		},
 		{
 			name:     "Tags with nested content",
 			input:    "<tag>content <nested>inner</nested> end</tag>",
 			start:    "<tag>",
 			end:      "</tag>",
 			expected: []string{"content <nested>inner</nested> end"},
 		},
 		{
 			name:     "Edge case with empty string",
 			input:    "",
 			start:    "<tag>",
 			end:      "</tag>",
 			expected: []string{},
 		},
 		{
 			name:     "Edge case with no start tag",
 			input:    "content without start tag",
 			start:    "<tag>",
 			end:      "</tag>",
 			expected: []string{},
 		},
 		{
 			name:     "Edge case with no end tag",
 			input:    "<tag>content without end tag",
 			start:    "<tag>",
 			end:      "</tag>",
 			expected: []string{},
 		},
 		{
 			name:     "Multiple consecutive tags",
 			input:    "<tag>content1</tag><tag>content2</tag><tag>content3</tag>",
 			start:    "<tag>",
 			end:      "</tag>",
 			expected: []string{"content1", "content2", "content3"},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result := ExtractContent(tt.input, tt.start, tt.end)
 			assert.Equal(tt.expected, result)
 		})
 	}
 }