1
0
mirror of https://github.com/duke-git/lancet.git synced 2026-02-19 04:02:27 +08:00

feat: add address.Smart and Decompose for parse CN address (#346)

* feat: add address.Smart and Decompose for parse CN address

* feat: add Xinjiang directly-administered county-level cities support

- Add '自治区直辖县级市' as a city-level unit (ID: 4043) in A2Data for Xinjiang
- Add 12 directly-administered county-level cities in A3Data (IDs: 4044-4055):
  * 石河子市 (Shihezi, 1976, 8th Division) - ID: 4044
  * 阿拉尔市 (Aral, 2002, 1st Division) - ID: 4045
  * 图木舒克市 (Tumxuk, 2002, 3rd Division) - ID: 4046
  * 五家渠市 (Wujiaqu, 2002, 6th Division) - ID: 4047
  * 北屯市 (Beitun, 2011, 10th Division) - ID: 4048
  * 铁门关市 (Tiemenguan, 2012, 2nd Division) - ID: 4049
  * 双河市 (Shuanghe, 2014, 5th Division) - ID: 4050
  * 可克达拉市 (Kokdala, 2015, 4th Division) - ID: 4051
  * 昆玉市 (Kunyu, 2016, 14th Division) - ID: 4052
  * 胡杨河市 (Huyanghe, 2019, 7th Division) - ID: 4053
  * 新星市 (Xinxing, 2021, 13th Division) - ID: 4054
  * 白杨市 (Baiyang, 2023, 9th Division) - ID: 4055
- All county-level cities are under PID 4043 (自治区直辖县级市)
- Add test case for Xinjiang Shihezi city address parsing
- Now supports parsing addresses like: 新疆石河子市北三路25小区

* docs: formated address data

* fix: parse repeat address error

* feat: update readme file

---------

Co-authored-by: Jiawen <im@linjiawen.com>
This commit is contained in:
Javen
2026-01-13 14:00:44 +08:00
committed by GitHub
parent a1cebec9f2
commit b3fd282b50
8 changed files with 5679 additions and 0 deletions

View File

@@ -0,0 +1,75 @@
package formatter
import (
"strings"
"unicode/utf8"
)
// mbStrpos 返回字符串首次出现的位置UTF-8字符计数
func mbStrpos(haystack, needle string) int {
if needle == "" {
return 0
}
idx := strings.Index(haystack, needle)
if idx == -1 {
return -1
}
return utf8.RuneCountInString(haystack[:idx])
}
// mbStrripos 返回字符串最后出现的位置UTF-8字符计数
func mbStrripos(haystack, needle string) int {
if needle == "" {
return utf8.RuneCountInString(haystack)
}
idx := strings.LastIndex(haystack, needle)
if idx == -1 {
return -1
}
return utf8.RuneCountInString(haystack[:idx])
}
// mbStrstr 检查字符串是否包含子串
func mbStrstr(haystack, needle string) bool {
return strings.Contains(haystack, needle)
}
// mbSubstr 截取字符串UTF-8字符计数
// start: 起始位置从0开始
// length: 截取长度(字符数)
func mbSubstr(str string, start, length int) string {
runes := []rune(str)
strLen := len(runes)
// 处理负数起始位置
if start < 0 {
start = strLen + start
if start < 0 {
start = 0
}
}
// 起始位置超出字符串长度
if start >= strLen {
return ""
}
// 计算结束位置
end := start + length
if end > strLen {
end = strLen
}
if end < start {
return ""
}
return string(runes[start:end])
}
// mbSubstrCount 统计子串出现次数
func mbSubstrCount(haystack, needle string) int {
if needle == "" {
return 0
}
return strings.Count(haystack, needle)
}