mirror of
https://github.com/duke-git/lancet.git
synced 2026-02-19 04:02:27 +08:00
feat: add address.Smart and Decompose for parse CN address (#346)
* feat: add address.Smart and Decompose for parse CN address * feat: add Xinjiang directly-administered county-level cities support - Add '自治区直辖县级市' as a city-level unit (ID: 4043) in A2Data for Xinjiang - Add 12 directly-administered county-level cities in A3Data (IDs: 4044-4055): * 石河子市 (Shihezi, 1976, 8th Division) - ID: 4044 * 阿拉尔市 (Aral, 2002, 1st Division) - ID: 4045 * 图木舒克市 (Tumxuk, 2002, 3rd Division) - ID: 4046 * 五家渠市 (Wujiaqu, 2002, 6th Division) - ID: 4047 * 北屯市 (Beitun, 2011, 10th Division) - ID: 4048 * 铁门关市 (Tiemenguan, 2012, 2nd Division) - ID: 4049 * 双河市 (Shuanghe, 2014, 5th Division) - ID: 4050 * 可克达拉市 (Kokdala, 2015, 4th Division) - ID: 4051 * 昆玉市 (Kunyu, 2016, 14th Division) - ID: 4052 * 胡杨河市 (Huyanghe, 2019, 7th Division) - ID: 4053 * 新星市 (Xinxing, 2021, 13th Division) - ID: 4054 * 白杨市 (Baiyang, 2023, 9th Division) - ID: 4055 - All county-level cities are under PID 4043 (自治区直辖县级市) - Add test case for Xinjiang Shihezi city address parsing - Now supports parsing addresses like: 新疆石河子市北三路25小区 * docs: formated address data * fix: parse repeat address error * feat: update readme file --------- Co-authored-by: Jiawen <im@linjiawen.com>
This commit is contained in:
75
formatter/address_helper.go
Normal file
75
formatter/address_helper.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package formatter
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// mbStrpos 返回字符串首次出现的位置(UTF-8字符计数)
|
||||
func mbStrpos(haystack, needle string) int {
|
||||
if needle == "" {
|
||||
return 0
|
||||
}
|
||||
idx := strings.Index(haystack, needle)
|
||||
if idx == -1 {
|
||||
return -1
|
||||
}
|
||||
return utf8.RuneCountInString(haystack[:idx])
|
||||
}
|
||||
|
||||
// mbStrripos 返回字符串最后出现的位置(UTF-8字符计数)
|
||||
func mbStrripos(haystack, needle string) int {
|
||||
if needle == "" {
|
||||
return utf8.RuneCountInString(haystack)
|
||||
}
|
||||
idx := strings.LastIndex(haystack, needle)
|
||||
if idx == -1 {
|
||||
return -1
|
||||
}
|
||||
return utf8.RuneCountInString(haystack[:idx])
|
||||
}
|
||||
|
||||
// mbStrstr 检查字符串是否包含子串
|
||||
func mbStrstr(haystack, needle string) bool {
|
||||
return strings.Contains(haystack, needle)
|
||||
}
|
||||
|
||||
// mbSubstr 截取字符串(UTF-8字符计数)
|
||||
// start: 起始位置(从0开始)
|
||||
// length: 截取长度(字符数)
|
||||
func mbSubstr(str string, start, length int) string {
|
||||
runes := []rune(str)
|
||||
strLen := len(runes)
|
||||
|
||||
// 处理负数起始位置
|
||||
if start < 0 {
|
||||
start = strLen + start
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
}
|
||||
|
||||
// 起始位置超出字符串长度
|
||||
if start >= strLen {
|
||||
return ""
|
||||
}
|
||||
|
||||
// 计算结束位置
|
||||
end := start + length
|
||||
if end > strLen {
|
||||
end = strLen
|
||||
}
|
||||
if end < start {
|
||||
return ""
|
||||
}
|
||||
|
||||
return string(runes[start:end])
|
||||
}
|
||||
|
||||
// mbSubstrCount 统计子串出现次数
|
||||
func mbSubstrCount(haystack, needle string) int {
|
||||
if needle == "" {
|
||||
return 0
|
||||
}
|
||||
return strings.Count(haystack, needle)
|
||||
}
|
||||
Reference in New Issue
Block a user