mirror of
https://github.com/duke-git/lancet.git
synced 2026-02-04 12:52:28 +08:00
* feat: add address.Smart and Decompose for parse CN address * feat: add Xinjiang directly-administered county-level cities support - Add '自治区直辖县级市' as a city-level unit (ID: 4043) in A2Data for Xinjiang - Add 12 directly-administered county-level cities in A3Data (IDs: 4044-4055): * 石河子市 (Shihezi, 1976, 8th Division) - ID: 4044 * 阿拉尔市 (Aral, 2002, 1st Division) - ID: 4045 * 图木舒克市 (Tumxuk, 2002, 3rd Division) - ID: 4046 * 五家渠市 (Wujiaqu, 2002, 6th Division) - ID: 4047 * 北屯市 (Beitun, 2011, 10th Division) - ID: 4048 * 铁门关市 (Tiemenguan, 2012, 2nd Division) - ID: 4049 * 双河市 (Shuanghe, 2014, 5th Division) - ID: 4050 * 可克达拉市 (Kokdala, 2015, 4th Division) - ID: 4051 * 昆玉市 (Kunyu, 2016, 14th Division) - ID: 4052 * 胡杨河市 (Huyanghe, 2019, 7th Division) - ID: 4053 * 新星市 (Xinxing, 2021, 13th Division) - ID: 4054 * 白杨市 (Baiyang, 2023, 9th Division) - ID: 4055 - All county-level cities are under PID 4043 (自治区直辖县级市) - Add test case for Xinjiang Shihezi city address parsing - Now supports parsing addresses like: 新疆石河子市北三路25小区 * docs: formated address data * fix: parse repeat address error * feat: update readme file --------- Co-authored-by: Jiawen <im@linjiawen.com>
76 lines
1.5 KiB
Go
76 lines
1.5 KiB
Go
package formatter
|
||
|
||
import (
|
||
"strings"
|
||
"unicode/utf8"
|
||
)
|
||
|
||
// mbStrpos 返回字符串首次出现的位置(UTF-8字符计数)
|
||
func mbStrpos(haystack, needle string) int {
|
||
if needle == "" {
|
||
return 0
|
||
}
|
||
idx := strings.Index(haystack, needle)
|
||
if idx == -1 {
|
||
return -1
|
||
}
|
||
return utf8.RuneCountInString(haystack[:idx])
|
||
}
|
||
|
||
// mbStrripos 返回字符串最后出现的位置(UTF-8字符计数)
|
||
func mbStrripos(haystack, needle string) int {
|
||
if needle == "" {
|
||
return utf8.RuneCountInString(haystack)
|
||
}
|
||
idx := strings.LastIndex(haystack, needle)
|
||
if idx == -1 {
|
||
return -1
|
||
}
|
||
return utf8.RuneCountInString(haystack[:idx])
|
||
}
|
||
|
||
// mbStrstr 检查字符串是否包含子串
|
||
func mbStrstr(haystack, needle string) bool {
|
||
return strings.Contains(haystack, needle)
|
||
}
|
||
|
||
// mbSubstr 截取字符串(UTF-8字符计数)
|
||
// start: 起始位置(从0开始)
|
||
// length: 截取长度(字符数)
|
||
func mbSubstr(str string, start, length int) string {
|
||
runes := []rune(str)
|
||
strLen := len(runes)
|
||
|
||
// 处理负数起始位置
|
||
if start < 0 {
|
||
start = strLen + start
|
||
if start < 0 {
|
||
start = 0
|
||
}
|
||
}
|
||
|
||
// 起始位置超出字符串长度
|
||
if start >= strLen {
|
||
return ""
|
||
}
|
||
|
||
// 计算结束位置
|
||
end := start + length
|
||
if end > strLen {
|
||
end = strLen
|
||
}
|
||
if end < start {
|
||
return ""
|
||
}
|
||
|
||
return string(runes[start:end])
|
||
}
|
||
|
||
// mbSubstrCount 统计子串出现次数
|
||
func mbSubstrCount(haystack, needle string) int {
|
||
if needle == "" {
|
||
return 0
|
||
}
|
||
return strings.Count(haystack, needle)
|
||
}
|