1
0
mirror of https://github.com/duke-git/lancet.git synced 2026-02-04 12:52:28 +08:00
Files
lancet/formatter/address_helper.go
Javen b3fd282b50 feat: add address.Smart and Decompose for parse CN address (#346)
* feat: add address.Smart and Decompose for parse CN address

* feat: add Xinjiang directly-administered county-level cities support

- Add '自治区直辖县级市' as a city-level unit (ID: 4043) in A2Data for Xinjiang
- Add 12 directly-administered county-level cities in A3Data (IDs: 4044-4055):
  * 石河子市 (Shihezi, 1976, 8th Division) - ID: 4044
  * 阿拉尔市 (Aral, 2002, 1st Division) - ID: 4045
  * 图木舒克市 (Tumxuk, 2002, 3rd Division) - ID: 4046
  * 五家渠市 (Wujiaqu, 2002, 6th Division) - ID: 4047
  * 北屯市 (Beitun, 2011, 10th Division) - ID: 4048
  * 铁门关市 (Tiemenguan, 2012, 2nd Division) - ID: 4049
  * 双河市 (Shuanghe, 2014, 5th Division) - ID: 4050
  * 可克达拉市 (Kokdala, 2015, 4th Division) - ID: 4051
  * 昆玉市 (Kunyu, 2016, 14th Division) - ID: 4052
  * 胡杨河市 (Huyanghe, 2019, 7th Division) - ID: 4053
  * 新星市 (Xinxing, 2021, 13th Division) - ID: 4054
  * 白杨市 (Baiyang, 2023, 9th Division) - ID: 4055
- All county-level cities are under PID 4043 (自治区直辖县级市)
- Add test case for Xinjiang Shihezi city address parsing
- Now supports parsing addresses like: 新疆石河子市北三路25小区

* docs: formated address data

* fix: parse repeat address error

* feat: update readme file

---------

Co-authored-by: Jiawen <im@linjiawen.com>
2026-01-13 14:00:44 +08:00

76 lines
1.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package formatter
import (
"strings"
"unicode/utf8"
)
// mbStrpos 返回字符串首次出现的位置UTF-8字符计数
func mbStrpos(haystack, needle string) int {
if needle == "" {
return 0
}
idx := strings.Index(haystack, needle)
if idx == -1 {
return -1
}
return utf8.RuneCountInString(haystack[:idx])
}
// mbStrripos 返回字符串最后出现的位置UTF-8字符计数
func mbStrripos(haystack, needle string) int {
if needle == "" {
return utf8.RuneCountInString(haystack)
}
idx := strings.LastIndex(haystack, needle)
if idx == -1 {
return -1
}
return utf8.RuneCountInString(haystack[:idx])
}
// mbStrstr 检查字符串是否包含子串
func mbStrstr(haystack, needle string) bool {
return strings.Contains(haystack, needle)
}
// mbSubstr 截取字符串UTF-8字符计数
// start: 起始位置从0开始
// length: 截取长度(字符数)
func mbSubstr(str string, start, length int) string {
runes := []rune(str)
strLen := len(runes)
// 处理负数起始位置
if start < 0 {
start = strLen + start
if start < 0 {
start = 0
}
}
// 起始位置超出字符串长度
if start >= strLen {
return ""
}
// 计算结束位置
end := start + length
if end > strLen {
end = strLen
}
if end < start {
return ""
}
return string(runes[start:end])
}
// mbSubstrCount 统计子串出现次数
func mbSubstrCount(haystack, needle string) int {
if needle == "" {
return 0
}
return strings.Count(haystack, needle)
}