mirror of
https://github.com/duke-git/lancet.git
synced 2026-02-04 12:52:28 +08:00
feat: add ExtractContent
:
This commit is contained in:
@@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string {
|
||||
matches := re.FindAllStringSubmatch(str, -1)
|
||||
return matches
|
||||
}
|
||||
|
||||
// ExtractContent extracts the content between the start and end strings in the source string.
|
||||
// Play: todo
|
||||
func ExtractContent(s, start, end string) []string {
|
||||
result := []string{}
|
||||
|
||||
for {
|
||||
if _, after, ok := strings.Cut(s, start); ok {
|
||||
if before, _, ok := strings.Cut(after, end); ok {
|
||||
result = append(result, before)
|
||||
s = after
|
||||
} else {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() {
|
||||
// [john.doe@example.com john.doe example com]
|
||||
// [jane.doe@example.com jane.doe example com]
|
||||
}
|
||||
|
||||
func ExampleExtractContent() {
|
||||
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
|
||||
|
||||
result := ExtractContent(html, "<span>", "</span>")
|
||||
|
||||
fmt.Println(result)
|
||||
|
||||
// Output:
|
||||
// [content1 content2 content1]
|
||||
|
||||
}
|
||||
|
||||
@@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) {
|
||||
assert.Equal(tt.expected, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractContent(t *testing.T) {
|
||||
t.Parallel()
|
||||
assert := internal.NewAssert(t, "TestExtractContent")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
start string
|
||||
end string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "Extract content between <tag> and </tag>",
|
||||
input: "This is <tag>content1</tag> and <tag>content2</tag> and <tag>content3</tag>",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{"content1", "content2", "content3"},
|
||||
},
|
||||
{
|
||||
name: "No tags in the string",
|
||||
input: "This string has no tags",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Single tag pair",
|
||||
input: "<tag>onlyContent</tag>",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{"onlyContent"},
|
||||
},
|
||||
{
|
||||
name: "Tags without end tag",
|
||||
input: "This <tag>content without end tag",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Tags with nested content",
|
||||
input: "<tag>content <nested>inner</nested> end</tag>",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{"content <nested>inner</nested> end"},
|
||||
},
|
||||
{
|
||||
name: "Edge case with empty string",
|
||||
input: "",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Edge case with no start tag",
|
||||
input: "content without start tag",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Edge case with no end tag",
|
||||
input: "<tag>content without end tag",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Multiple consecutive tags",
|
||||
input: "<tag>content1</tag><tag>content2</tag><tag>content3</tag>",
|
||||
start: "<tag>",
|
||||
end: "</tag>",
|
||||
expected: []string{"content1", "content2", "content3"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := ExtractContent(tt.input, tt.start, tt.end)
|
||||
assert.Equal(tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user