1
0
mirror of https://github.com/duke-git/lancet.git synced 2026-02-04 12:52:28 +08:00

feat: add ExtractContent

:
This commit is contained in:
dudaodong
2024-11-08 14:11:25 +08:00
parent 0ed2b11ba1
commit 08f14d2b08
5 changed files with 181 additions and 1 deletions

View File

@@ -68,6 +68,7 @@ import (
- [Rotate](#Rotate) - [Rotate](#Rotate)
- [TemplateReplace](#TemplateReplace) - [TemplateReplace](#TemplateReplace)
- [RegexMatchAllGroups](#RegexMatchAllGroups) - [RegexMatchAllGroups](#RegexMatchAllGroups)
- [ExtractContent](#ExtractContent)
<div STYLE="page-break-after: always;"></div> <div STYLE="page-break-after: always;"></div>
@@ -1728,4 +1729,34 @@ func main() {
// [john.doe@example.com john.doe example com] // [john.doe@example.com john.doe example com]
// [jane.doe@example.com jane.doe example com] // [jane.doe@example.com jane.doe example com]
} }
```
### <span id="ExtractContent">ExtractContent</span>
<p>提取两个标记之间的内容。</p>
<b>函数签名:</b>
```go
func ExtractContent(s, start, end string) []string
```
<b>示例:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
```go
import (
"fmt"
"github.com/duke-git/lancet/v2/strutil"
)
func main() {
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
result := strutil.ExtractContent(html, "<span>", "</span>")
fmt.Println(result)
// Output:
// [content1 content2 content1]
}
``` ```

View File

@@ -68,6 +68,8 @@ import (
- [Rotate](#Rotate) - [Rotate](#Rotate)
- [TemplateReplace](#TemplateReplace) - [TemplateReplace](#TemplateReplace)
- [RegexMatchAllGroups](#RegexMatchAllGroups) - [RegexMatchAllGroups](#RegexMatchAllGroups)
- [ExtractContent](#RegexMatchAllGroups)
<div STYLE="page-break-after: always;"></div> <div STYLE="page-break-after: always;"></div>
@@ -1708,7 +1710,7 @@ func main() {
func RegexMatchAllGroups(pattern, str string) [][]string func RegexMatchAllGroups(pattern, str string) [][]string
``` ```
<b>example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b> <b>Example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
```go ```go
import ( import (
@@ -1729,4 +1731,34 @@ func main() {
// [john.doe@example.com john.doe example com] // [john.doe@example.com john.doe example com]
// [jane.doe@example.com jane.doe example com] // [jane.doe@example.com jane.doe example com]
} }
```
### <span id="ExtractContent">ExtractContent</span>
<p>Extracts the content between the start and end strings in the source string.</p>
<b>Signature:</b>
```go
func ExtractContent(s, start, end string) []string
```
<b>Example:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
```go
import (
"fmt"
"github.com/duke-git/lancet/v2/strutil"
)
func main() {
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
result := strutil.ExtractContent(html, "<span>", "</span>")
fmt.Println(result)
// Output:
// [content1 content2 content1]
}
``` ```

View File

@@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string {
matches := re.FindAllStringSubmatch(str, -1) matches := re.FindAllStringSubmatch(str, -1)
return matches return matches
} }
// ExtractContent extracts the content between the start and end strings in the source string.
// Play: todo
func ExtractContent(s, start, end string) []string {
result := []string{}
for {
if _, after, ok := strings.Cut(s, start); ok {
if before, _, ok := strings.Cut(after, end); ok {
result = append(result, before)
s = after
} else {
break
}
} else {
break
}
}
return result
}

View File

@@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() {
// [john.doe@example.com john.doe example com] // [john.doe@example.com john.doe example com]
// [jane.doe@example.com jane.doe example com] // [jane.doe@example.com jane.doe example com]
} }
func ExampleExtractContent() {
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
result := ExtractContent(html, "<span>", "</span>")
fmt.Println(result)
// Output:
// [content1 content2 content1]
}

View File

@@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) {
assert.Equal(tt.expected, result) assert.Equal(tt.expected, result)
} }
} }
func TestExtractContent(t *testing.T) {
t.Parallel()
assert := internal.NewAssert(t, "TestExtractContent")
tests := []struct {
name string
input string
start string
end string
expected []string
}{
{
name: "Extract content between <tag> and </tag>",
input: "This is <tag>content1</tag> and <tag>content2</tag> and <tag>content3</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"content1", "content2", "content3"},
},
{
name: "No tags in the string",
input: "This string has no tags",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Single tag pair",
input: "<tag>onlyContent</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"onlyContent"},
},
{
name: "Tags without end tag",
input: "This <tag>content without end tag",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Tags with nested content",
input: "<tag>content <nested>inner</nested> end</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"content <nested>inner</nested> end"},
},
{
name: "Edge case with empty string",
input: "",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Edge case with no start tag",
input: "content without start tag",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Edge case with no end tag",
input: "<tag>content without end tag",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Multiple consecutive tags",
input: "<tag>content1</tag><tag>content2</tag><tag>content3</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"content1", "content2", "content3"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := ExtractContent(tt.input, tt.start, tt.end)
assert.Equal(tt.expected, result)
})
}
}