1
0
mirror of https://github.com/duke-git/lancet.git synced 2026-02-09 23:22:28 +08:00

feat: add address.Smart and Decompose for parse CN address (#346)

* feat: add address.Smart and Decompose for parse CN address

* feat: add Xinjiang directly-administered county-level cities support

- Add '自治区直辖县级市' as a city-level unit (ID: 4043) in A2Data for Xinjiang
- Add 12 directly-administered county-level cities in A3Data (IDs: 4044-4055):
  * 石河子市 (Shihezi, 1976, 8th Division) - ID: 4044
  * 阿拉尔市 (Aral, 2002, 1st Division) - ID: 4045
  * 图木舒克市 (Tumxuk, 2002, 3rd Division) - ID: 4046
  * 五家渠市 (Wujiaqu, 2002, 6th Division) - ID: 4047
  * 北屯市 (Beitun, 2011, 10th Division) - ID: 4048
  * 铁门关市 (Tiemenguan, 2012, 2nd Division) - ID: 4049
  * 双河市 (Shuanghe, 2014, 5th Division) - ID: 4050
  * 可克达拉市 (Kokdala, 2015, 4th Division) - ID: 4051
  * 昆玉市 (Kunyu, 2016, 14th Division) - ID: 4052
  * 胡杨河市 (Huyanghe, 2019, 7th Division) - ID: 4053
  * 新星市 (Xinxing, 2021, 13th Division) - ID: 4054
  * 白杨市 (Baiyang, 2023, 9th Division) - ID: 4055
- All county-level cities are under PID 4043 (自治区直辖县级市)
- Add test case for Xinjiang Shihezi city address parsing
- Now supports parsing addresses like: 新疆石河子市北三路25小区

* docs: formated address data

* fix: parse repeat address error

* feat: update readme file

---------

Co-authored-by: Jiawen <im@linjiawen.com>
This commit is contained in:
Javen
2026-01-13 14:00:44 +08:00
committed by GitHub
parent a1cebec9f2
commit b3fd282b50
8 changed files with 5679 additions and 0 deletions

494
formatter/address.go Normal file
View File

@@ -0,0 +1,494 @@
// Package formatter implements some functions to format string, struct.
package formatter
import (
"regexp"
"strings"
"unicode/utf8"
)
// AddressInfo represents the parsed address information including user details and location.
// AddressInfo 表示解析后的地址信息,包括用户详细信息和位置信息
type AddressInfo struct {
Name string `json:"name"` // Name of the recipient / 姓名
Mobile string `json:"mobile"` // Mobile phone number or landline / 手机号或座机
IDN string `json:"idn"` // ID card number / 身份证号
Postcode string `json:"postcode"` // Postal code / 邮编
Province string `json:"province"` // Province / 省
City string `json:"city"` // City / 市
Region string `json:"region"` // District or county / 区/县
Street string `json:"street"` // Street address / 街道详细地址
Addr string `json:"addr"` // Original address string / 原始地址字符串
}
// fuzzyResult represents the result of fuzzy address parsing.
// fuzzyResult 表示模糊地址解析的结果
type fuzzyResult struct {
A1 string // Province level / 省级
A2 string // City level / 市级
A3 string // District/County level / 区/县级
Street string // Street address / 街道地址
}
// ParseCNAddress parses a Chinese address string intelligently and extracts structured information.
// It can parse addresses with or without user information (name, phone, ID card, etc.).
// When withUser is true, it extracts user information from the address string.
// When withUser is false, it only parses the location information.
// The function handles various address formats including:
// - Standard format: "Province City District Street"
// - Compact format: "Name Phone Province City District Street"
// - With keywords: "Name: xxx Phone: xxx Address: xxx"
// - County-level cities: "Province City CountyCity District" (e.g., "河北省石家庄市新乐市")
// ParseCNAddress 智能解析中国地址字符串并提取结构化信息。
// 可以解析带或不带用户信息(姓名、电话、身份证等)的地址。
// 当 withUser 为 true 时,从地址字符串中提取用户信息。
// 当 withUser 为 false 时,仅解析位置信息。
// 该函数处理多种地址格式,包括:
// - 标准格式:"省 市 区 街道"
// - 紧凑格式:"姓名 电话 省 市 区 街道"
// - 带关键词:"姓名:xxx 电话:xxx 地址:xxx"
// - 县级市:"省 市 县级市 区"(如"河北省石家庄市新乐市"
func ParseCNAddress(str string, withUser bool) *AddressInfo {
result := &AddressInfo{}
if withUser {
ParsePersonInfo := ParsePersonInfo(str)
result = ParsePersonInfo
} else {
result.Addr = str
}
fuzz := fuzz(result.Addr)
parse := parse(fuzz.A1, fuzz.A2, fuzz.A3)
result.Province = parse.Province
result.City = parse.City
result.Region = parse.Region
// 提取街道地址:从原始地址中找到区/县的位置,提取后面的内容
if result.Region != "" && result.Addr != "" {
// 在原始地址中查找区/县的位置转换为rune数组以正确处理中文
addrRunes := []rune(result.Addr)
regionRunes := []rune(result.Region)
regionPos := mbStrpos(result.Addr, result.Region)
if regionPos != -1 {
// 提取区/县后面的内容作为街道地址
streetStart := regionPos + len(regionRunes)
if streetStart < len(addrRunes) {
result.Street = string(addrRunes[streetStart:])
}
} else if fuzz.Street != "" {
// 如果没找到区/县使用fuzz返回的街道
result.Street = fuzz.Street
}
} else if fuzz.Street != "" {
result.Street = fuzz.Street
}
// 清理街道地址中的重复省市区信息(可能存在部分匹配的残留)
result.Street = strings.ReplaceAll(result.Street, result.Region, "")
result.Street = strings.ReplaceAll(result.Street, result.City, "")
result.Street = strings.ReplaceAll(result.Street, result.Province, "")
// 清理街道地址中的残留片段(如"自治区直辖县级市"被替换后的残留)
result.Street = strings.ReplaceAll(result.Street, "自治区直辖县级市", "")
result.Street = strings.ReplaceAll(result.Street, "直辖县级市", "")
result.Street = strings.TrimSpace(result.Street)
return result
}
// ParsePersonInfo extracts user information (name, phone, ID card, postal code) from an address string.
// It separates personal information from the address, supporting various formats:
// - Labeled format: "Name: xxx Phone: xxx Address: xxx"
// - Compact format: "Name Phone Address" (e.g., "张三13800138000北京市朝阳区")
// - With separators: using colons, commas, newlines as delimiters
// Returns an AddressInfo with extracted user information and cleaned address string in Addr field.
// ParsePersonInfo 从地址字符串中提取用户信息(姓名、电话、身份证、邮编)。
// 将个人信息与地址分离,支持多种格式:
// - 带标签格式:"姓名:xxx 电话:xxx 地址:xxx"
// - 紧凑格式:"姓名 电话 地址"(如"张三13800138000北京市朝阳区"
// - 带分隔符:使用冒号、逗号、换行符作为分隔符
// 返回包含提取的用户信息和清理后地址字符串(在 Addr 字段中)的 AddressInfo。
func ParsePersonInfo(str string) *AddressInfo {
compose := &AddressInfo{}
// 先尝试提取带标签的信息
// 提取姓名 (支持: 姓名:xxx, 收货人:xxx, 收件人:xxx)
nameRe := regexp.MustCompile(`(?:姓名|收货人|收件人)[:]\s*([^\s\d\n]+)`)
if match := nameRe.FindStringSubmatch(str); len(match) > 1 {
compose.Name = strings.TrimSpace(match[1])
str = nameRe.ReplaceAllString(str, " ")
}
// 提取手机号或座机号 (支持: 电话:xxx, 手机:xxx, 联系电话:xxx)
phoneRe := regexp.MustCompile(`(?:电话|手机号码|手机|联系电话)[:]\s*([\d\-]+)`)
if match := phoneRe.FindStringSubmatch(str); len(match) > 1 {
compose.Mobile = strings.TrimSpace(match[1])
str = phoneRe.ReplaceAllString(str, " ")
}
// 提取所在地区 (支持: 所在地区:xxx)
regionRe := regexp.MustCompile(`所在地区[:]\s*([^\n]+)`)
if match := regionRe.FindStringSubmatch(str); len(match) > 1 {
// 将所在地区保留在字符串中,不删除
// str 保持不变,让后续的地址解析处理
}
// 提取详细地址 (支持: 详细地址:xxx, 收货地址:xxx, 地址:xxx)
addrRe := regexp.MustCompile(`(?:详细地址|收货地址|地址)[:]\s*([^\n]+)`)
if match := addrRe.FindStringSubmatch(str); len(match) > 1 {
// 保留详细地址在字符串中
str = addrRe.ReplaceAllString(str, " "+match[1])
}
// 如果还没有提取到姓名和手机号,尝试识别紧凑格式 (如: 马云13593464918陕西省...)
if compose.Name == "" && compose.Mobile == "" {
// 匹配: 2-4个汉字 + 7-12位数字 + 剩余内容
compactRe := regexp.MustCompile(`^([\x{4e00}-\x{9fa5}]{2,4})(\d{7,12})(.*)$`)
if match := compactRe.FindStringSubmatch(str); len(match) > 3 {
compose.Name = match[1]
compose.Mobile = match[2]
str = match[3] // 保留剩余的地址部分
}
}
// 替换常见的地址关键词为空格
replacements := map[string]string{
"收货地址": " ", "详细地址": " ", "地址": " ", "收货人": " ",
"收件人": " ", "收货": " ", "所在地区": " ", "邮编": " ",
"电话": " ", "手机号码": " ", "身份证号码": " ", "身份证号": " ",
"身份证": " ", "姓名": " ", "联系电话": " ", "手机": " ",
"": " ", ":": " ", "": " ", ";": " ",
"": " ", ",": " ", "。": " ", "\n": " ", "\r": " ",
}
for old, new := range replacements {
str = strings.ReplaceAll(str, old, new)
}
// 将多个空格合并为一个
spaceRe := regexp.MustCompile(`\s{1,}`)
str = spaceRe.ReplaceAllString(str, " ")
// 处理座机号格式 (如: 800-8585222)
telRe := regexp.MustCompile(`(\d{3,4})-(\d{6,8})`)
str = telRe.ReplaceAllString(str, "$1$2")
// 提取身份证号 (18位或17位+X)
idnRe := regexp.MustCompile(`\d{18}|\d{17}[Xx]`)
if match := idnRe.FindString(str); match != "" {
compose.IDN = strings.ToUpper(match)
str = strings.ReplaceAll(str, match, "")
}
// 如果之前没有提取到手机号,现在提取
if compose.Mobile == "" {
mobileRe := regexp.MustCompile(`\d{7,12}`)
if match := mobileRe.FindString(str); match != "" {
compose.Mobile = match
str = strings.ReplaceAll(str, match, "")
}
} else {
// 已经提取过手机号,从字符串中删除
str = strings.ReplaceAll(str, compose.Mobile, "")
}
// 提取邮编
postcodeRe := regexp.MustCompile(`\d{6}`)
if match := postcodeRe.FindString(str); match != "" {
compose.Postcode = match
str = strings.ReplaceAll(str, match, "")
}
// 清理多余空格
str = strings.TrimSpace(spaceRe.ReplaceAllString(str, " "))
// 如果之前没有提取到姓名,现在提取
if compose.Name == "" {
// 提取姓名(取最短的词作为姓名,排除空字符串)
splitArr := strings.Split(str, " ")
if len(splitArr) > 0 {
for _, value := range splitArr {
value = strings.TrimSpace(value)
if value == "" {
continue
}
if compose.Name == "" {
compose.Name = value
} else if utf8.RuneCountInString(value) < utf8.RuneCountInString(compose.Name) && utf8.RuneCountInString(value) >= 2 {
compose.Name = value
}
}
if compose.Name != "" {
str = strings.TrimSpace(strings.ReplaceAll(str, compose.Name, ""))
}
}
} else {
// 已经提取过姓名,从字符串中删除
str = strings.TrimSpace(strings.ReplaceAll(str, compose.Name, ""))
}
compose.Addr = str
return compose
}
// fuzz 根据统计规律分析出二三级地址
func fuzz(addr string) *fuzzyResult {
addrOrigin := addr
addr = strings.ReplaceAll(addr, " ", "")
addr = strings.ReplaceAll(addr, ",", "")
// 先替换"自治区直辖县级市"为"市",避免后续"自治区"替换时产生问题
addr = strings.ReplaceAll(addr, "自治区直辖县级市", "市")
addr = strings.ReplaceAll(addr, "自治区", "省")
addr = strings.ReplaceAll(addr, "自治州", "州")
addr = strings.ReplaceAll(addr, "小区", "")
addr = strings.ReplaceAll(addr, "校区", "")
// 过滤"市辖区" - 这是一个行政术语占位符,不是真正的区名
addr = strings.ReplaceAll(addr, "市辖区", "")
a1 := ""
a2 := ""
a3 := ""
street := ""
deep3KeywordPos := -1
// 判断是否包含县/区/旗
countyPos := mbStrpos(addr, "县")
districtPos := mbStrpos(addr, "区")
bannerPos := mbStrpos(addr, "旗")
// 只要存在这些关键词就处理,不再限制位置
hasEarlyCounty := countyPos != -1
hasEarlyDistrict := districtPos != -1
hasEarlyBanner := bannerPos != -1
if hasEarlyCounty || hasEarlyDistrict || hasEarlyBanner {
// 优先检查是否存在县级市(如"新乐市"
// 如果同时存在"XX市"和"XX区"/"XX县",优先处理"市"
hasCountyLevelCity := false
if mbStrstr(addr, "市") {
// 查找所有"市"的位置
cityCount := mbSubstrCount(addr, "市")
if cityCount >= 2 {
// 找到第二个"市"的位置(可能是县级市)
firstCityPos := mbStrpos(addr, "市")
// 从第一个"市"之后继续查找
addrAfterFirstCity := mbSubstr(addr, firstCityPos+1, utf8.RuneCountInString(addr)-firstCityPos-1)
secondCityPos := mbStrpos(addrAfterFirstCity, "市")
if secondCityPos != -1 {
secondCityAbsPos := firstCityPos + 1 + secondCityPos
// 检查第二个"市"后面是否存在"区"或"县"
addrAfterSecondCity := mbSubstr(addr, secondCityAbsPos+1, utf8.RuneCountInString(addr)-secondCityAbsPos-1)
if mbStrstr(addrAfterSecondCity, "区") || mbStrstr(addrAfterSecondCity, "县") {
// 提取两个"市"之间的内容
betweenCities := mbSubstr(addr, firstCityPos+1, secondCityAbsPos-firstCityPos)
// 检查是否是重复的地名(如"北京市北京市"或"杭州市西湖区杭州市"
// 如果两个"市"之间包含"区"或"县",说明不是县级市,而是重复地名
if !mbStrstr(betweenCities, "区") && !mbStrstr(betweenCities, "县") {
// 第一个"市"及之前的内容
firstCityFull := mbSubstr(addr, 0, firstCityPos+1)
if betweenCities != firstCityFull {
// 不是重复地名,这是县级市
a3 = betweenCities
deep3KeywordPos = secondCityAbsPos
hasCountyLevelCity = true
}
}
}
}
}
}
if !hasCountyLevelCity {
// 处理旗
if mbStrstr(addr, "旗") {
deep3KeywordPos = mbStrpos(addr, "旗")
a3 = mbSubstr(addr, deep3KeywordPos-1, 2)
}
// 处理区
if mbStrstr(addr, "区") {
// 使用第一个"区"(避免重复地名干扰,如"西湖区杭州市西湖区"
deep3KeywordPos = mbStrpos(addr, "区")
if mbStrstr(addr, "市") {
// 策略:找到"区"之前的最后一个"市"
// 这样可以避免详细地址中的"市"字干扰(如"农贸市场")
zonePos := deep3KeywordPos
// 从开头到"区"的子串中,查找最后一个"市"
addrBeforeZone := mbSubstr(addr, 0, zonePos)
cityPos := mbStrripos(addrBeforeZone, "市")
if cityPos != -1 {
a3 = mbSubstr(addr, cityPos+1, zonePos-cityPos)
} else {
// 没有找到"市",使用默认逻辑
a3 = mbSubstr(addr, deep3KeywordPos-2, 3)
}
} else {
a3 = mbSubstr(addr, deep3KeywordPos-2, 3)
}
}
// 处理县
if mbStrstr(addr, "县") {
// 使用第一个"县"(避免重复地名干扰)
deep3KeywordPos = mbStrpos(addr, "县")
if mbStrstr(addr, "市") {
// 从开头到"县"的子串中,查找最后一个"市"
addrBeforeCounty := mbSubstr(addr, 0, deep3KeywordPos)
cityPos := mbStrripos(addrBeforeCounty, "市")
if cityPos != -1 {
a3 = mbSubstr(addr, cityPos+1, deep3KeywordPos-cityPos)
} else {
if mbStrstr(addr, "自治县") {
a3 = mbSubstr(addr, deep3KeywordPos-6, 7)
firstChar := mbSubstr(a3, 0, 1)
if firstChar == "省" || firstChar == "市" || firstChar == "州" {
a3 = mbSubstr(a3, 1, utf8.RuneCountInString(a3)-1)
}
} else {
a3 = mbSubstr(addr, deep3KeywordPos-2, 3)
}
}
} else {
if mbStrstr(addr, "自治县") {
a3 = mbSubstr(addr, deep3KeywordPos-6, 7)
firstChar := mbSubstr(a3, 0, 1)
if firstChar == "省" || firstChar == "市" || firstChar == "州" {
a3 = mbSubstr(a3, 1, utf8.RuneCountInString(a3)-1)
}
} else {
a3 = mbSubstr(addr, deep3KeywordPos-2, 3)
}
}
}
}
if deep3KeywordPos != -1 {
street = mbSubstr(addrOrigin, deep3KeywordPos+1, utf8.RuneCountInString(addrOrigin)-deep3KeywordPos-1)
}
} else {
// 处理市
if mbStrripos(addr, "市") != -1 {
cityCount := mbSubstrCount(addr, "市")
if cityCount == 1 {
deep3KeywordPos = mbStrripos(addr, "市")
a3 = mbSubstr(addr, deep3KeywordPos-2, 3)
street = mbSubstr(addrOrigin, deep3KeywordPos+1, utf8.RuneCountInString(addrOrigin)-deep3KeywordPos-1)
} else if cityCount >= 2 {
deep3KeywordPos = mbStrripos(addr, "市")
a3 = mbSubstr(addr, deep3KeywordPos-2, 3)
street = mbSubstr(addrOrigin, deep3KeywordPos+1, utf8.RuneCountInString(addrOrigin)-deep3KeywordPos-1)
}
} else {
a3 = ""
street = addr
}
}
// 提取市级地址
if mbStrpos(addr, "市") != -1 || mbStrstr(addr, "盟") || mbStrstr(addr, "州") {
tmpPos := -1
if tmpPos = mbStrpos(addr, "市"); tmpPos != -1 {
// 使用第一个"市"(避免重复地名干扰,如"杭州市西湖区杭州市"
// 向前查找省的位置,如果有省就从省后开始,否则从开头开始
addrBeforeCity := mbSubstr(addr, 0, tmpPos)
provincePos := mbStrripos(addrBeforeCity, "省")
startPos := 0
if provincePos != -1 {
startPos = provincePos + 1
}
a2 = mbSubstr(addr, startPos, tmpPos-startPos+1)
} else if tmpPos = mbStrpos(addr, "盟"); tmpPos != -1 {
a2 = mbSubstr(addr, tmpPos-2, 3)
} else if mbStrpos(addr, "州") != -1 {
if tmpPos = mbStrpos(addr, "自治州"); tmpPos != -1 {
a2 = mbSubstr(addr, tmpPos-4, 5)
} else {
tmpPos = mbStrpos(addr, "州")
a2 = mbSubstr(addr, tmpPos-2, 3)
}
}
}
return &fuzzyResult{
A1: a1,
A2: a2,
A3: a3,
Street: street,
}
}
// parse 智能解析出省市区
func parse(a1, a2, a3 string) *AddressInfo {
r := &AddressInfo{}
if a3 == "" {
return r
}
// 在三级地址数据中查找匹配
area3Matches := make(map[int]*Region)
for id, v := range A3Data {
if mbStrpos(v.Name, a3) != -1 {
area3Matches[id] = v
}
}
// 多个匹配项,需要通过二级地址筛选
if len(area3Matches) > 1 {
if a2 != "" {
area2Matches := make(map[int]*Region)
for id, v := range A2Data {
if mbStrpos(v.Name, a2) != -1 {
area2Matches[id] = v
}
}
if len(area2Matches) > 0 {
for _, v := range area3Matches {
if city, ok := area2Matches[v.PID]; ok {
r.City = city.Name
r.Region = v.Name
if province, ok := A1Data[city.PID]; ok {
r.Province = province.Name
}
}
}
}
} else {
r.Province = ""
r.City = ""
r.Region = a3
}
} else if len(area3Matches) == 1 {
// 唯一匹配
for _, v := range area3Matches {
r.Region = v.Name
if city, ok := A2Data[v.PID]; ok {
r.City = city.Name
if province, ok := A1Data[city.PID]; ok {
r.Province = province.Name
}
}
}
} else if len(area3Matches) == 0 && a2 == a3 {
// 没有匹配到三级地址,但二级地址等于三级地址,可能是直辖市
shengID := 0
for _, v := range A2Data {
if mbStrpos(v.Name, a2) != -1 {
r.City = v.Name
shengID = v.PID
break
}
}
if province, ok := A1Data[shengID]; ok {
r.Province = province.Name
}
r.Region = ""
}
return r
}

4470
formatter/address_data.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,75 @@
package formatter
import (
"strings"
"unicode/utf8"
)
// mbStrpos 返回字符串首次出现的位置UTF-8字符计数
func mbStrpos(haystack, needle string) int {
if needle == "" {
return 0
}
idx := strings.Index(haystack, needle)
if idx == -1 {
return -1
}
return utf8.RuneCountInString(haystack[:idx])
}
// mbStrripos 返回字符串最后出现的位置UTF-8字符计数
func mbStrripos(haystack, needle string) int {
if needle == "" {
return utf8.RuneCountInString(haystack)
}
idx := strings.LastIndex(haystack, needle)
if idx == -1 {
return -1
}
return utf8.RuneCountInString(haystack[:idx])
}
// mbStrstr 检查字符串是否包含子串
func mbStrstr(haystack, needle string) bool {
return strings.Contains(haystack, needle)
}
// mbSubstr 截取字符串UTF-8字符计数
// start: 起始位置从0开始
// length: 截取长度(字符数)
func mbSubstr(str string, start, length int) string {
runes := []rune(str)
strLen := len(runes)
// 处理负数起始位置
if start < 0 {
start = strLen + start
if start < 0 {
start = 0
}
}
// 起始位置超出字符串长度
if start >= strLen {
return ""
}
// 计算结束位置
end := start + length
if end > strLen {
end = strLen
}
if end < start {
return ""
}
return string(runes[start:end])
}
// mbSubstrCount 统计子串出现次数
func mbSubstrCount(haystack, needle string) int {
if needle == "" {
return 0
}
return strings.Count(haystack, needle)
}

360
formatter/address_test.go Normal file
View File

@@ -0,0 +1,360 @@
package formatter
import (
"encoding/json"
"testing"
)
func TestParseCNAddress(t *testing.T) {
tests := []struct {
name string
input string
withUser bool
want *AddressInfo
}{
{
name: "完整地址信息",
input: "张三 13800138000 北京市朝阳区建国路1号",
withUser: true,
want: &AddressInfo{
Name: "张三",
Mobile: "13800138000",
Province: "北京",
City: "北京市",
Region: "朝阳区",
Street: "建国路1号",
},
},
{
name: "带身份证和邮编",
input: "李四 18612345678 110101199001011234 100000 上海市浦东新区世纪大道100号",
withUser: true,
want: &AddressInfo{
Name: "李四",
Mobile: "18612345678",
IDN: "110101199001011234",
Postcode: "100000",
},
},
{
name: "仅地址不含用户信息",
input: "北京市海淀区中关村大街1号",
withUser: false,
want: &AddressInfo{
Province: "北京",
City: "北京市",
Region: "海淀区",
Street: "中关村大街1号",
},
},
{
name: "带收货关键词",
input: "收货人:王五 电话13900139000 收货地址天津市河西区友谊路20号",
withUser: true,
want: &AddressInfo{
Name: "王五",
Mobile: "13900139000",
Province: "天津",
City: "天津市",
Region: "河西区",
},
},
{
name: "紧凑格式地址",
input: "马云13593464918陕西省西安市雁塔区丈八沟街道高新四路南江国际",
withUser: true,
want: &AddressInfo{
Name: "马云",
Mobile: "13593464918",
Province: "陕西省",
City: "西安市",
Region: "雁塔区",
Street: "丈八沟街道高新四路南江国际",
},
},
{
name: "带座机号格式",
input: "姓名:马云\n联系电话800-8585222\n所在地区河北省石家庄市新华区\n详细地址:中华北大街68号鹿城商务中心6号楼1413室",
withUser: true,
want: &AddressInfo{
Name: "马云",
Mobile: "800-8585222",
Province: "河北省",
City: "石家庄市",
Region: "新华区",
Street: "中华北大街68号鹿城商务中心6号楼1413室",
},
},
{
name: "北京市重复格式",
input: "北京市北京市市辖区东城区",
withUser: false,
want: &AddressInfo{
Province: "北京",
City: "北京市",
Region: "东城区",
Street: "",
},
},
{
name: "河北省新乐市地址",
input: "河北省石家庄市新乐市经济开发区兴工街10号来优品仓库",
withUser: false,
want: &AddressInfo{
Province: "河北省",
City: "石家庄市",
Region: "新乐市",
Street: "经济开发区兴工街10号来优品仓库",
},
},
{
name: "江苏仪征市地址",
input: "江苏省扬州市仪征市真州镇解放东路99号",
withUser: false,
want: &AddressInfo{
Province: "江苏省",
City: "扬州市",
Region: "仪征市",
Street: "真州镇解放东路99号",
},
},
{
name: "新疆石河子市地址",
input: "新疆石河子市北三路25小区",
withUser: false,
want: &AddressInfo{
Province: "新疆维吾尔自治区",
City: "自治区直辖县级市",
Region: "石河子市",
},
},
{
name: "新疆石河子市-简化格式省+县级市",
input: "新疆维吾尔自治区石河子市",
withUser: false,
want: &AddressInfo{
Province: "新疆维吾尔自治区",
City: "自治区直辖县级市",
Region: "石河子市",
Street: "",
},
},
{
name: "新疆石河子市-完整行政区划表述",
input: "新疆维吾尔自治区自治区直辖县级市石河子市",
withUser: false,
want: &AddressInfo{
Province: "新疆维吾尔自治区",
City: "自治区直辖县级市",
Region: "石河子市",
Street: "",
},
},
{
name: "浙江杭州西湖区重复地址",
input: "浙江省杭州市西湖区杭州市西湖区人民政府109号",
withUser: false,
want: &AddressInfo{
Province: "浙江省",
City: "杭州市",
Region: "西湖区",
Street: "人民政府109号",
},
},
{
name: "湖南长沙市重复地址",
input: "湖南省长沙市岳麓区银盆岭街道长沙市人民政府长沙市政府大楼",
withUser: false,
want: &AddressInfo{
Province: "湖南省",
City: "长沙市",
Region: "岳麓区",
Street: "银盆岭街道人民政府政府大楼",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ParseCNAddress(tt.input, tt.withUser)
// 打印结果便于调试
jsonData, _ := json.MarshalIndent(got, "", " ")
t.Logf("Result: %s", jsonData)
// 验证主要字段
if tt.want.Name != "" && got.Name != tt.want.Name {
t.Errorf("Name = %v, want %v", got.Name, tt.want.Name)
}
if tt.want.Mobile != "" && got.Mobile != tt.want.Mobile {
t.Errorf("Mobile = %v, want %v", got.Mobile, tt.want.Mobile)
}
if tt.want.Province != "" && got.Province != tt.want.Province {
t.Errorf("Province = %v, want %v", got.Province, tt.want.Province)
}
if tt.want.City != "" && got.City != tt.want.City {
t.Errorf("City = %v, want %v", got.City, tt.want.City)
}
if tt.want.Region != "" && got.Region != tt.want.Region {
t.Errorf("Region = %v, want %v", got.Region, tt.want.Region)
}
})
}
}
func TestParsePersonInfo(t *testing.T) {
tests := []struct {
name string
input string
verify func(*testing.T, *AddressInfo)
}{
{
name: "提取姓名和手机号",
input: "张三 13800138000 北京市朝阳区",
verify: func(t *testing.T, got *AddressInfo) {
if got.Name != "张三" {
t.Errorf("Name = %v, want 张三", got.Name)
}
if got.Mobile != "13800138000" {
t.Errorf("Mobile = %v, want 13800138000", got.Mobile)
}
},
},
{
name: "提取身份证号",
input: "李四 110101199001011234 上海市",
verify: func(t *testing.T, got *AddressInfo) {
if got.Name != "李四" {
t.Errorf("Name = %v, want 李四", got.Name)
}
if got.IDN != "110101199001011234" {
t.Errorf("IDN = %v, want 110101199001011234", got.IDN)
}
},
},
{
name: "提取邮编",
input: "王五 100000 天津市",
verify: func(t *testing.T, got *AddressInfo) {
if got.Name != "王五" {
t.Errorf("Name = %v, want 王五", got.Name)
}
if got.Postcode != "100000" {
t.Errorf("Postcode = %v, want 100000", got.Postcode)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ParsePersonInfo(tt.input)
jsonData, _ := json.MarshalIndent(got, "", " ")
t.Logf("Result: %s", jsonData)
tt.verify(t, got)
})
}
}
func TestFuzz(t *testing.T) {
tests := []struct {
name string
input string
want *fuzzyResult
}{
{
name: "包含区",
input: "北京市朝阳区建国路1号",
want: &fuzzyResult{
A2: "北京市",
A3: "朝阳区",
Street: "建国路1号",
},
},
{
name: "包含县",
input: "河北省石家庄市正定县",
want: &fuzzyResult{
A2: "石家庄市",
A3: "正定县",
},
},
{
name: "复杂街道地址",
input: "浙江省杭州市拱墅区武林街道杭州锦麟宾馆中河片区",
want: &fuzzyResult{
A2: "杭州市",
A3: "拱墅区",
Street: "武林街道杭州锦麟宾馆中河片区",
},
},
{
name: "北京市重复格式",
input: "北京市北京市市辖区东城区",
want: &fuzzyResult{
A2: "北京市",
A3: "东城区",
Street: "",
},
},
{
name: "详细地址包含市字",
input: "北京市朝阳区建外大街1号国贸商城",
want: &fuzzyResult{
A2: "北京市",
A3: "朝阳区",
Street: "建外大街1号国贸商城",
},
},
{
name: "详细地址真的包含市字",
input: "北京市朝阳区农贸市场路1号",
want: &fuzzyResult{
A2: "北京市",
A3: "朝阳区",
Street: "农贸市场路1号",
},
},
{
name: "河北省新乐市地址",
input: "河北省石家庄市新乐市经济开发区兴工街10号来优品仓库",
want: &fuzzyResult{
A2: "石家庄市",
A3: "新乐市",
Street: "经济开发区兴工街10号来优品仓库",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := fuzz(tt.input)
jsonData, _ := json.MarshalIndent(got, "", " ")
t.Logf("Result: %s", jsonData)
if got.A2 != tt.want.A2 {
t.Errorf("A2 = %v, want %v", got.A2, tt.want.A2)
}
if got.A3 != tt.want.A3 {
t.Errorf("A3 = %v, want %v", got.A3, tt.want.A3)
}
if tt.want.Street != "" && got.Street != tt.want.Street {
t.Errorf("Street = %v, want %v", got.Street, tt.want.Street)
}
})
}
}
func ExampleParseCNAddress() {
// 解析包含用户信息的完整地址
result := ParseCNAddress("张三 13800138000 北京市朝阳区建国路1号", true)
jsonData, _ := json.MarshalIndent(result, "", " ")
println(string(jsonData))
}
func ExampleParsePersonInfo() {
// 分离用户信息
result := ParsePersonInfo("收货人:李四 电话18612345678 地址上海市浦东新区世纪大道100号")
jsonData, _ := json.MarshalIndent(result, "", " ")
println(string(jsonData))
}