mirror of
https://github.com/duke-git/lancet.git
synced 2026-02-04 12:52:28 +08:00
feat: add ExtractContent
:
This commit is contained in:
@@ -68,6 +68,7 @@ import (
|
||||
- [Rotate](#Rotate)
|
||||
- [TemplateReplace](#TemplateReplace)
|
||||
- [RegexMatchAllGroups](#RegexMatchAllGroups)
|
||||
- [ExtractContent](#ExtractContent)
|
||||
|
||||
|
||||
<div STYLE="page-break-after: always;"></div>
|
||||
@@ -1729,3 +1730,33 @@ func main() {
|
||||
// [jane.doe@example.com jane.doe example com]
|
||||
}
|
||||
```
|
||||
|
||||
### <span id="ExtractContent">ExtractContent</span>
|
||||
|
||||
<p>提取两个标记之间的内容。</p>
|
||||
|
||||
<b>函数签名:</b>
|
||||
|
||||
```go
|
||||
func ExtractContent(s, start, end string) []string
|
||||
```
|
||||
|
||||
<b>示例:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
|
||||
|
||||
```go
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/duke-git/lancet/v2/strutil"
|
||||
)
|
||||
|
||||
func main() {
|
||||
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
|
||||
|
||||
result := strutil.ExtractContent(html, "<span>", "</span>")
|
||||
|
||||
fmt.Println(result)
|
||||
|
||||
// Output:
|
||||
// [content1 content2 content1]
|
||||
}
|
||||
```
|
||||
@@ -68,6 +68,8 @@ import (
|
||||
- [Rotate](#Rotate)
|
||||
- [TemplateReplace](#TemplateReplace)
|
||||
- [RegexMatchAllGroups](#RegexMatchAllGroups)
|
||||
- [ExtractContent](#RegexMatchAllGroups)
|
||||
|
||||
|
||||
<div STYLE="page-break-after: always;"></div>
|
||||
|
||||
@@ -1708,7 +1710,7 @@ func main() {
|
||||
func RegexMatchAllGroups(pattern, str string) [][]string
|
||||
```
|
||||
|
||||
<b>example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
|
||||
<b>Example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
|
||||
|
||||
```go
|
||||
import (
|
||||
@@ -1730,3 +1732,33 @@ func main() {
|
||||
// [jane.doe@example.com jane.doe example com]
|
||||
}
|
||||
```
|
||||
|
||||
### <span id="ExtractContent">ExtractContent</span>
|
||||
|
||||
<p>Extracts the content between the start and end strings in the source string.</p>
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```go
|
||||
func ExtractContent(s, start, end string) []string
|
||||
```
|
||||
|
||||
<b>Example:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
|
||||
|
||||
```go
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/duke-git/lancet/v2/strutil"
|
||||
)
|
||||
|
||||
func main() {
|
||||
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
|
||||
|
||||
result := strutil.ExtractContent(html, "<span>", "</span>")
|
||||
|
||||
fmt.Println(result)
|
||||
|
||||
// Output:
|
||||
// [content1 content2 content1]
|
||||
}
|
||||
```
|
||||
@@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string {
|
||||
matches := re.FindAllStringSubmatch(str, -1)
|
||||
return matches
|
||||
}
|
||||
|
||||
// ExtractContent extracts the content between the start and end strings in the source string.
|
||||
// Play: todo
|
||||
func ExtractContent(s, start, end string) []string {
|
||||
result := []string{}
|
||||
|
||||
for {
|
||||
if _, after, ok := strings.Cut(s, start); ok {
|
||||
if before, _, ok := strings.Cut(after, end); ok {
|
||||
result = append(result, before)
|
||||
s = after
|
||||
} else {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() {
|
||||
// [john.doe@example.com john.doe example com]
|
||||
// [jane.doe@example.com jane.doe example com]
|
||||
}
|
||||
|
||||
func ExampleExtractContent() {
|
||||
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
|
||||
|
||||
result := ExtractContent(html, "<span>", "</span>")
|
||||
|
||||
fmt.Println(result)
|
||||
|
||||
// Output:
|
||||
// [content1 content2 content1]
|
||||
|
||||
}
|
||||
|
||||
@@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) {
|
||||
assert.Equal(tt.expected, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContent(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := internal.NewAssert(t, "TestExtractContent")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
start string
|
||||
end string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "Extract content between <tag> and </tag>",
|
||||
input: "This is <tag>content1</tag> and <tag>content2</tag> and <tag>content3</tag>",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{"content1", "content2", "content3"},
|
||||
},
|
||||
{
|
||||
name: "No tags in the string",
|
||||
input: "This string has no tags",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Single tag pair",
|
||||
input: "<tag>onlyContent</tag>",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{"onlyContent"},
|
||||
},
|
||||
{
|
||||
name: "Tags without end tag",
|
||||
input: "This <tag>content without end tag",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Tags with nested content",
|
||||
input: "<tag>content <nested>inner</nested> end</tag>",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{"content <nested>inner</nested> end"},
|
||||
},
|
||||
{
|
||||
name: "Edge case with empty string",
|
||||
input: "",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Edge case with no start tag",
|
||||
input: "content without start tag",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Edge case with no end tag",
|
||||
input: "<tag>content without end tag",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Multiple consecutive tags",
|
||||
input: "<tag>content1</tag><tag>content2</tag><tag>content3</tag>",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{"content1", "content2", "content3"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := ExtractContent(tt.input, tt.start, tt.end)
|
||||
assert.Equal(tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user