mirror of
https://github.com/duke-git/lancet.git
synced 2026-02-04 12:52:28 +08:00
feat: add ExtractContent
:
This commit is contained in:
@@ -68,6 +68,7 @@ import (
|
|||||||
- [Rotate](#Rotate)
|
- [Rotate](#Rotate)
|
||||||
- [TemplateReplace](#TemplateReplace)
|
- [TemplateReplace](#TemplateReplace)
|
||||||
- [RegexMatchAllGroups](#RegexMatchAllGroups)
|
- [RegexMatchAllGroups](#RegexMatchAllGroups)
|
||||||
|
- [ExtractContent](#ExtractContent)
|
||||||
|
|
||||||
|
|
||||||
<div STYLE="page-break-after: always;"></div>
|
<div STYLE="page-break-after: always;"></div>
|
||||||
@@ -1728,4 +1729,34 @@ func main() {
|
|||||||
// [john.doe@example.com john.doe example com]
|
// [john.doe@example.com john.doe example com]
|
||||||
// [jane.doe@example.com jane.doe example com]
|
// [jane.doe@example.com jane.doe example com]
|
||||||
}
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### <span id="ExtractContent">ExtractContent</span>
|
||||||
|
|
||||||
|
<p>提取两个标记之间的内容。</p>
|
||||||
|
|
||||||
|
<b>函数签名:</b>
|
||||||
|
|
||||||
|
```go
|
||||||
|
func ExtractContent(s, start, end string) []string
|
||||||
|
```
|
||||||
|
|
||||||
|
<b>示例:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
|
||||||
|
|
||||||
|
```go
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/duke-git/lancet/v2/strutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
|
||||||
|
|
||||||
|
result := strutil.ExtractContent(html, "<span>", "</span>")
|
||||||
|
|
||||||
|
fmt.Println(result)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// [content1 content2 content1]
|
||||||
|
}
|
||||||
```
|
```
|
||||||
@@ -68,6 +68,8 @@ import (
|
|||||||
- [Rotate](#Rotate)
|
- [Rotate](#Rotate)
|
||||||
- [TemplateReplace](#TemplateReplace)
|
- [TemplateReplace](#TemplateReplace)
|
||||||
- [RegexMatchAllGroups](#RegexMatchAllGroups)
|
- [RegexMatchAllGroups](#RegexMatchAllGroups)
|
||||||
|
- [ExtractContent](#RegexMatchAllGroups)
|
||||||
|
|
||||||
|
|
||||||
<div STYLE="page-break-after: always;"></div>
|
<div STYLE="page-break-after: always;"></div>
|
||||||
|
|
||||||
@@ -1708,7 +1710,7 @@ func main() {
|
|||||||
func RegexMatchAllGroups(pattern, str string) [][]string
|
func RegexMatchAllGroups(pattern, str string) [][]string
|
||||||
```
|
```
|
||||||
|
|
||||||
<b>example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
|
<b>Example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
|
||||||
|
|
||||||
```go
|
```go
|
||||||
import (
|
import (
|
||||||
@@ -1729,4 +1731,34 @@ func main() {
|
|||||||
// [john.doe@example.com john.doe example com]
|
// [john.doe@example.com john.doe example com]
|
||||||
// [jane.doe@example.com jane.doe example com]
|
// [jane.doe@example.com jane.doe example com]
|
||||||
}
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### <span id="ExtractContent">ExtractContent</span>
|
||||||
|
|
||||||
|
<p>Extracts the content between the start and end strings in the source string.</p>
|
||||||
|
|
||||||
|
<b>Signature:</b>
|
||||||
|
|
||||||
|
```go
|
||||||
|
func ExtractContent(s, start, end string) []string
|
||||||
|
```
|
||||||
|
|
||||||
|
<b>Example:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
|
||||||
|
|
||||||
|
```go
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"github.com/duke-git/lancet/v2/strutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
|
||||||
|
|
||||||
|
result := strutil.ExtractContent(html, "<span>", "</span>")
|
||||||
|
|
||||||
|
fmt.Println(result)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// [content1 content2 content1]
|
||||||
|
}
|
||||||
```
|
```
|
||||||
@@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string {
|
|||||||
matches := re.FindAllStringSubmatch(str, -1)
|
matches := re.FindAllStringSubmatch(str, -1)
|
||||||
return matches
|
return matches
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ExtractContent extracts the content between the start and end strings in the source string.
|
||||||
|
// Play: todo
|
||||||
|
func ExtractContent(s, start, end string) []string {
|
||||||
|
result := []string{}
|
||||||
|
|
||||||
|
for {
|
||||||
|
if _, after, ok := strings.Cut(s, start); ok {
|
||||||
|
if before, _, ok := strings.Cut(after, end); ok {
|
||||||
|
result = append(result, before)
|
||||||
|
s = after
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|||||||
@@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() {
|
|||||||
// [john.doe@example.com john.doe example com]
|
// [john.doe@example.com john.doe example com]
|
||||||
// [jane.doe@example.com jane.doe example com]
|
// [jane.doe@example.com jane.doe example com]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ExampleExtractContent() {
|
||||||
|
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
|
||||||
|
|
||||||
|
result := ExtractContent(html, "<span>", "</span>")
|
||||||
|
|
||||||
|
fmt.Println(result)
|
||||||
|
|
||||||
|
// Output:
|
||||||
|
// [content1 content2 content1]
|
||||||
|
|
||||||
|
}
|
||||||
|
|||||||
@@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) {
|
|||||||
assert.Equal(tt.expected, result)
|
assert.Equal(tt.expected, result)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExtractContent(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
assert := internal.NewAssert(t, "TestExtractContent")
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
start string
|
||||||
|
end string
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Extract content between <tag> and </tag>",
|
||||||
|
input: "This is <tag>content1</tag> and <tag>content2</tag> and <tag>content3</tag>",
|
||||||
|
start: "<tag>",
|
||||||
|
end: "</tag>",
|
||||||
|
expected: []string{"content1", "content2", "content3"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "No tags in the string",
|
||||||
|
input: "This string has no tags",
|
||||||
|
start: "<tag>",
|
||||||
|
end: "</tag>",
|
||||||
|
expected: []string{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Single tag pair",
|
||||||
|
input: "<tag>onlyContent</tag>",
|
||||||
|
start: "<tag>",
|
||||||
|
end: "</tag>",
|
||||||
|
expected: []string{"onlyContent"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Tags without end tag",
|
||||||
|
input: "This <tag>content without end tag",
|
||||||
|
start: "<tag>",
|
||||||
|
end: "</tag>",
|
||||||
|
expected: []string{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Tags with nested content",
|
||||||
|
input: "<tag>content <nested>inner</nested> end</tag>",
|
||||||
|
start: "<tag>",
|
||||||
|
end: "</tag>",
|
||||||
|
expected: []string{"content <nested>inner</nested> end"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Edge case with empty string",
|
||||||
|
input: "",
|
||||||
|
start: "<tag>",
|
||||||
|
end: "</tag>",
|
||||||
|
expected: []string{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Edge case with no start tag",
|
||||||
|
input: "content without start tag",
|
||||||
|
start: "<tag>",
|
||||||
|
end: "</tag>",
|
||||||
|
expected: []string{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Edge case with no end tag",
|
||||||
|
input: "<tag>content without end tag",
|
||||||
|
start: "<tag>",
|
||||||
|
end: "</tag>",
|
||||||
|
expected: []string{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Multiple consecutive tags",
|
||||||
|
input: "<tag>content1</tag><tag>content2</tag><tag>content3</tag>",
|
||||||
|
start: "<tag>",
|
||||||
|
end: "</tag>",
|
||||||
|
expected: []string{"content1", "content2", "content3"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := ExtractContent(tt.input, tt.start, tt.end)
|
||||||
|
assert.Equal(tt.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user