feat: add ExtractContent

:
2026-03-01 00:35:28 +08:00 · 2024-11-08 14:11:25 +08:00
parent 0ed2b11ba1
commit 08f14d2b08
5 changed files with 181 additions and 1 deletions
@@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string {
 	matches := re.FindAllStringSubmatch(str, -1)
 	return matches
 }
+
+// ExtractContent extracts the content between the start and end strings in the source string.
+// Play: todo
+func ExtractContent(s, start, end string) []string {
+	result := []string{}
+
+	for {
+		if _, after, ok := strings.Cut(s, start); ok {
+			if before, _, ok := strings.Cut(after, end); ok {
+				result = append(result, before)
+				s = after
+			} else {
+				break
+			}
+		} else {
+			break
+		}
+	}
+
+	return result
+}
@@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() {
 	// [john.doe@example.com john.doe example com]
 	// [jane.doe@example.com jane.doe example com]
 }
+
+func ExampleExtractContent() {
+	html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
+
+	result := ExtractContent(html, "<span>", "</span>")
+
+	fmt.Println(result)
+
+	// Output:
+	// [content1 content2 content1]
+
+}
@@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) {
 		assert.Equal(tt.expected, result)
 	}
 }
+
+func TestExtractContent(t *testing.T) {
+	t.Parallel()
+	assert := internal.NewAssert(t, "TestExtractContent")
+
+	tests := []struct {
+		name     string
+		input    string
+		start    string
+		end      string
+		expected []string
+	}{
+		{
+			name:     "Extract content between <tag> and </tag>",
+			input:    "This is <tag>content1</tag> and <tag>content2</tag> and <tag>content3</tag>",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{"content1", "content2", "content3"},
+		},
+		{
+			name:     "No tags in the string",
+			input:    "This string has no tags",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Single tag pair",
+			input:    "<tag>onlyContent</tag>",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{"onlyContent"},
+		},
+		{
+			name:     "Tags without end tag",
+			input:    "This <tag>content without end tag",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Tags with nested content",
+			input:    "<tag>content <nested>inner</nested> end</tag>",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{"content <nested>inner</nested> end"},
+		},
+		{
+			name:     "Edge case with empty string",
+			input:    "",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Edge case with no start tag",
+			input:    "content without start tag",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Edge case with no end tag",
+			input:    "<tag>content without end tag",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Multiple consecutive tags",
+			input:    "<tag>content1</tag><tag>content2</tag><tag>content3</tag>",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{"content1", "content2", "content3"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ExtractContent(tt.input, tt.start, tt.end)
+			assert.Equal(tt.expected, result)
+		})
+	}
+}