1
0
mirror of https://github.com/duke-git/lancet.git synced 2026-02-04 12:52:28 +08:00

feat: add ExtractContent

:
This commit is contained in:
dudaodong
2024-11-08 14:11:25 +08:00
parent 0ed2b11ba1
commit 08f14d2b08
5 changed files with 181 additions and 1 deletions

View File

@@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string {
matches := re.FindAllStringSubmatch(str, -1)
return matches
}
// ExtractContent extracts the content between the start and end strings in the source string.
// Play: todo
func ExtractContent(s, start, end string) []string {
result := []string{}
for {
if _, after, ok := strings.Cut(s, start); ok {
if before, _, ok := strings.Cut(after, end); ok {
result = append(result, before)
s = after
} else {
break
}
} else {
break
}
}
return result
}

View File

@@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() {
// [john.doe@example.com john.doe example com]
// [jane.doe@example.com jane.doe example com]
}
func ExampleExtractContent() {
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
result := ExtractContent(html, "<span>", "</span>")
fmt.Println(result)
// Output:
// [content1 content2 content1]
}

View File

@@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) {
assert.Equal(tt.expected, result)
}
}
func TestExtractContent(t *testing.T) {
t.Parallel()
assert := internal.NewAssert(t, "TestExtractContent")
tests := []struct {
name string
input string
start string
end string
expected []string
}{
{
name: "Extract content between <tag> and </tag>",
input: "This is <tag>content1</tag> and <tag>content2</tag> and <tag>content3</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"content1", "content2", "content3"},
},
{
name: "No tags in the string",
input: "This string has no tags",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Single tag pair",
input: "<tag>onlyContent</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"onlyContent"},
},
{
name: "Tags without end tag",
input: "This <tag>content without end tag",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Tags with nested content",
input: "<tag>content <nested>inner</nested> end</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"content <nested>inner</nested> end"},
},
{
name: "Edge case with empty string",
input: "",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Edge case with no start tag",
input: "content without start tag",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Edge case with no end tag",
input: "<tag>content without end tag",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Multiple consecutive tags",
input: "<tag>content1</tag><tag>content2</tag><tag>content3</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"content1", "content2", "content3"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := ExtractContent(tt.input, tt.start, tt.end)
assert.Equal(tt.expected, result)
})
}
}