1
0
mirror of https://github.com/duke-git/lancet.git synced 2026-02-13 17:22:27 +08:00

feat: add address.Smart and Decompose for parse CN address (#346)

* feat: add address.Smart and Decompose for parse CN address

* feat: add Xinjiang directly-administered county-level cities support

- Add '自治区直辖县级市' as a city-level unit (ID: 4043) in A2Data for Xinjiang
- Add 12 directly-administered county-level cities in A3Data (IDs: 4044-4055):
  * 石河子市 (Shihezi, 1976, 8th Division) - ID: 4044
  * 阿拉尔市 (Aral, 2002, 1st Division) - ID: 4045
  * 图木舒克市 (Tumxuk, 2002, 3rd Division) - ID: 4046
  * 五家渠市 (Wujiaqu, 2002, 6th Division) - ID: 4047
  * 北屯市 (Beitun, 2011, 10th Division) - ID: 4048
  * 铁门关市 (Tiemenguan, 2012, 2nd Division) - ID: 4049
  * 双河市 (Shuanghe, 2014, 5th Division) - ID: 4050
  * 可克达拉市 (Kokdala, 2015, 4th Division) - ID: 4051
  * 昆玉市 (Kunyu, 2016, 14th Division) - ID: 4052
  * 胡杨河市 (Huyanghe, 2019, 7th Division) - ID: 4053
  * 新星市 (Xinxing, 2021, 13th Division) - ID: 4054
  * 白杨市 (Baiyang, 2023, 9th Division) - ID: 4055
- All county-level cities are under PID 4043 (自治区直辖县级市)
- Add test case for Xinjiang Shihezi city address parsing
- Now supports parsing addresses like: 新疆石河子市北三路25小区

* docs: formated address data

* fix: parse repeat address error

* feat: update readme file

---------

Co-authored-by: Jiawen <im@linjiawen.com>
This commit is contained in:
Javen
2026-01-13 14:00:44 +08:00
committed by GitHub
parent a1cebec9f2
commit b3fd282b50
8 changed files with 5679 additions and 0 deletions

360
formatter/address_test.go Normal file
View File

@@ -0,0 +1,360 @@
package formatter
import (
"encoding/json"
"testing"
)
func TestParseCNAddress(t *testing.T) {
tests := []struct {
name string
input string
withUser bool
want *AddressInfo
}{
{
name: "完整地址信息",
input: "张三 13800138000 北京市朝阳区建国路1号",
withUser: true,
want: &AddressInfo{
Name: "张三",
Mobile: "13800138000",
Province: "北京",
City: "北京市",
Region: "朝阳区",
Street: "建国路1号",
},
},
{
name: "带身份证和邮编",
input: "李四 18612345678 110101199001011234 100000 上海市浦东新区世纪大道100号",
withUser: true,
want: &AddressInfo{
Name: "李四",
Mobile: "18612345678",
IDN: "110101199001011234",
Postcode: "100000",
},
},
{
name: "仅地址不含用户信息",
input: "北京市海淀区中关村大街1号",
withUser: false,
want: &AddressInfo{
Province: "北京",
City: "北京市",
Region: "海淀区",
Street: "中关村大街1号",
},
},
{
name: "带收货关键词",
input: "收货人:王五 电话13900139000 收货地址天津市河西区友谊路20号",
withUser: true,
want: &AddressInfo{
Name: "王五",
Mobile: "13900139000",
Province: "天津",
City: "天津市",
Region: "河西区",
},
},
{
name: "紧凑格式地址",
input: "马云13593464918陕西省西安市雁塔区丈八沟街道高新四路南江国际",
withUser: true,
want: &AddressInfo{
Name: "马云",
Mobile: "13593464918",
Province: "陕西省",
City: "西安市",
Region: "雁塔区",
Street: "丈八沟街道高新四路南江国际",
},
},
{
name: "带座机号格式",
input: "姓名:马云\n联系电话800-8585222\n所在地区河北省石家庄市新华区\n详细地址:中华北大街68号鹿城商务中心6号楼1413室",
withUser: true,
want: &AddressInfo{
Name: "马云",
Mobile: "800-8585222",
Province: "河北省",
City: "石家庄市",
Region: "新华区",
Street: "中华北大街68号鹿城商务中心6号楼1413室",
},
},
{
name: "北京市重复格式",
input: "北京市北京市市辖区东城区",
withUser: false,
want: &AddressInfo{
Province: "北京",
City: "北京市",
Region: "东城区",
Street: "",
},
},
{
name: "河北省新乐市地址",
input: "河北省石家庄市新乐市经济开发区兴工街10号来优品仓库",
withUser: false,
want: &AddressInfo{
Province: "河北省",
City: "石家庄市",
Region: "新乐市",
Street: "经济开发区兴工街10号来优品仓库",
},
},
{
name: "江苏仪征市地址",
input: "江苏省扬州市仪征市真州镇解放东路99号",
withUser: false,
want: &AddressInfo{
Province: "江苏省",
City: "扬州市",
Region: "仪征市",
Street: "真州镇解放东路99号",
},
},
{
name: "新疆石河子市地址",
input: "新疆石河子市北三路25小区",
withUser: false,
want: &AddressInfo{
Province: "新疆维吾尔自治区",
City: "自治区直辖县级市",
Region: "石河子市",
},
},
{
name: "新疆石河子市-简化格式省+县级市",
input: "新疆维吾尔自治区石河子市",
withUser: false,
want: &AddressInfo{
Province: "新疆维吾尔自治区",
City: "自治区直辖县级市",
Region: "石河子市",
Street: "",
},
},
{
name: "新疆石河子市-完整行政区划表述",
input: "新疆维吾尔自治区自治区直辖县级市石河子市",
withUser: false,
want: &AddressInfo{
Province: "新疆维吾尔自治区",
City: "自治区直辖县级市",
Region: "石河子市",
Street: "",
},
},
{
name: "浙江杭州西湖区重复地址",
input: "浙江省杭州市西湖区杭州市西湖区人民政府109号",
withUser: false,
want: &AddressInfo{
Province: "浙江省",
City: "杭州市",
Region: "西湖区",
Street: "人民政府109号",
},
},
{
name: "湖南长沙市重复地址",
input: "湖南省长沙市岳麓区银盆岭街道长沙市人民政府长沙市政府大楼",
withUser: false,
want: &AddressInfo{
Province: "湖南省",
City: "长沙市",
Region: "岳麓区",
Street: "银盆岭街道人民政府政府大楼",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ParseCNAddress(tt.input, tt.withUser)
// 打印结果便于调试
jsonData, _ := json.MarshalIndent(got, "", " ")
t.Logf("Result: %s", jsonData)
// 验证主要字段
if tt.want.Name != "" && got.Name != tt.want.Name {
t.Errorf("Name = %v, want %v", got.Name, tt.want.Name)
}
if tt.want.Mobile != "" && got.Mobile != tt.want.Mobile {
t.Errorf("Mobile = %v, want %v", got.Mobile, tt.want.Mobile)
}
if tt.want.Province != "" && got.Province != tt.want.Province {
t.Errorf("Province = %v, want %v", got.Province, tt.want.Province)
}
if tt.want.City != "" && got.City != tt.want.City {
t.Errorf("City = %v, want %v", got.City, tt.want.City)
}
if tt.want.Region != "" && got.Region != tt.want.Region {
t.Errorf("Region = %v, want %v", got.Region, tt.want.Region)
}
})
}
}
func TestParsePersonInfo(t *testing.T) {
tests := []struct {
name string
input string
verify func(*testing.T, *AddressInfo)
}{
{
name: "提取姓名和手机号",
input: "张三 13800138000 北京市朝阳区",
verify: func(t *testing.T, got *AddressInfo) {
if got.Name != "张三" {
t.Errorf("Name = %v, want 张三", got.Name)
}
if got.Mobile != "13800138000" {
t.Errorf("Mobile = %v, want 13800138000", got.Mobile)
}
},
},
{
name: "提取身份证号",
input: "李四 110101199001011234 上海市",
verify: func(t *testing.T, got *AddressInfo) {
if got.Name != "李四" {
t.Errorf("Name = %v, want 李四", got.Name)
}
if got.IDN != "110101199001011234" {
t.Errorf("IDN = %v, want 110101199001011234", got.IDN)
}
},
},
{
name: "提取邮编",
input: "王五 100000 天津市",
verify: func(t *testing.T, got *AddressInfo) {
if got.Name != "王五" {
t.Errorf("Name = %v, want 王五", got.Name)
}
if got.Postcode != "100000" {
t.Errorf("Postcode = %v, want 100000", got.Postcode)
}
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ParsePersonInfo(tt.input)
jsonData, _ := json.MarshalIndent(got, "", " ")
t.Logf("Result: %s", jsonData)
tt.verify(t, got)
})
}
}
func TestFuzz(t *testing.T) {
tests := []struct {
name string
input string
want *fuzzyResult
}{
{
name: "包含区",
input: "北京市朝阳区建国路1号",
want: &fuzzyResult{
A2: "北京市",
A3: "朝阳区",
Street: "建国路1号",
},
},
{
name: "包含县",
input: "河北省石家庄市正定县",
want: &fuzzyResult{
A2: "石家庄市",
A3: "正定县",
},
},
{
name: "复杂街道地址",
input: "浙江省杭州市拱墅区武林街道杭州锦麟宾馆中河片区",
want: &fuzzyResult{
A2: "杭州市",
A3: "拱墅区",
Street: "武林街道杭州锦麟宾馆中河片区",
},
},
{
name: "北京市重复格式",
input: "北京市北京市市辖区东城区",
want: &fuzzyResult{
A2: "北京市",
A3: "东城区",
Street: "",
},
},
{
name: "详细地址包含市字",
input: "北京市朝阳区建外大街1号国贸商城",
want: &fuzzyResult{
A2: "北京市",
A3: "朝阳区",
Street: "建外大街1号国贸商城",
},
},
{
name: "详细地址真的包含市字",
input: "北京市朝阳区农贸市场路1号",
want: &fuzzyResult{
A2: "北京市",
A3: "朝阳区",
Street: "农贸市场路1号",
},
},
{
name: "河北省新乐市地址",
input: "河北省石家庄市新乐市经济开发区兴工街10号来优品仓库",
want: &fuzzyResult{
A2: "石家庄市",
A3: "新乐市",
Street: "经济开发区兴工街10号来优品仓库",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := fuzz(tt.input)
jsonData, _ := json.MarshalIndent(got, "", " ")
t.Logf("Result: %s", jsonData)
if got.A2 != tt.want.A2 {
t.Errorf("A2 = %v, want %v", got.A2, tt.want.A2)
}
if got.A3 != tt.want.A3 {
t.Errorf("A3 = %v, want %v", got.A3, tt.want.A3)
}
if tt.want.Street != "" && got.Street != tt.want.Street {
t.Errorf("Street = %v, want %v", got.Street, tt.want.Street)
}
})
}
}
func ExampleParseCNAddress() {
// 解析包含用户信息的完整地址
result := ParseCNAddress("张三 13800138000 北京市朝阳区建国路1号", true)
jsonData, _ := json.MarshalIndent(result, "", " ")
println(string(jsonData))
}
func ExampleParsePersonInfo() {
// 分离用户信息
result := ParsePersonInfo("收货人:李四 电话18612345678 地址上海市浦东新区世纪大道100号")
jsonData, _ := json.MarshalIndent(result, "", " ")
println(string(jsonData))
}