1
0
mirror of https://github.com/duke-git/lancet.git synced 2026-02-08 06:32:28 +08:00

feat: add address.Smart and Decompose for parse CN address (#346)

* feat: add address.Smart and Decompose for parse CN address

* feat: add Xinjiang directly-administered county-level cities support

- Add '自治区直辖县级市' as a city-level unit (ID: 4043) in A2Data for Xinjiang
- Add 12 directly-administered county-level cities in A3Data (IDs: 4044-4055):
  * 石河子市 (Shihezi, 1976, 8th Division) - ID: 4044
  * 阿拉尔市 (Aral, 2002, 1st Division) - ID: 4045
  * 图木舒克市 (Tumxuk, 2002, 3rd Division) - ID: 4046
  * 五家渠市 (Wujiaqu, 2002, 6th Division) - ID: 4047
  * 北屯市 (Beitun, 2011, 10th Division) - ID: 4048
  * 铁门关市 (Tiemenguan, 2012, 2nd Division) - ID: 4049
  * 双河市 (Shuanghe, 2014, 5th Division) - ID: 4050
  * 可克达拉市 (Kokdala, 2015, 4th Division) - ID: 4051
  * 昆玉市 (Kunyu, 2016, 14th Division) - ID: 4052
  * 胡杨河市 (Huyanghe, 2019, 7th Division) - ID: 4053
  * 新星市 (Xinxing, 2021, 13th Division) - ID: 4054
  * 白杨市 (Baiyang, 2023, 9th Division) - ID: 4055
- All county-level cities are under PID 4043 (自治区直辖县级市)
- Add test case for Xinjiang Shihezi city address parsing
- Now supports parsing addresses like: 新疆石河子市北三路25小区

* docs: formated address data

* fix: parse repeat address error

* feat: update readme file

---------

Co-authored-by: Jiawen <im@linjiawen.com>
This commit is contained in:
Javen
2026-01-13 14:00:44 +08:00
committed by GitHub
parent a1cebec9f2
commit b3fd282b50
8 changed files with 5679 additions and 0 deletions

View File

@@ -8,6 +8,7 @@ formatter 格式化器包含一些数据格式化处理方法。
- [https://github.com/duke-git/lancet/blob/main/formatter/formatter.go](https://github.com/duke-git/lancet/blob/main/formatter/formatter.go)
- [https://github.com/duke-git/lancet/blob/main/formatter/byte.go](https://github.com/duke-git/lancet/blob/main/formatter/byte.go)
- [https://github.com/duke-git/lancet/blob/main/formatter/address.go](https://github.com/duke-git/lancet/blob/main/formatter/address.go)
<div STYLE="page-break-after: always;"></div>
@@ -30,6 +31,8 @@ import (
- [BinaryBytes](#BinaryBytes)
- [ParseDecimalBytes](#ParseDecimalBytes)
- [ParseBinaryBytes](#ParseBinaryBytes)
- [ParseCNAddress](#ParseCNAddress)
- [ParsePersonInfo](#ParsePersonInfo)
<div STYLE="page-break-after: always;"></div>
@@ -308,3 +311,134 @@ func main() {
// 12492
}
```
### <span id="ParseCNAddress">ParseCNAddress</span>
<p>智能解析中国地址字符串并提取结构化信息。可以解析带或不带用户信息(姓名、电话、身份证等)的地址。当 withUser 为 true 时,从地址字符串中提取用户信息。当 withUser 为 false 时,仅解析位置信息。支持多种地址格式:标准格式、紧凑格式、带关键词格式、县级市格式等。</p>
<b>函数签名:</b>
```go
func ParseCNAddress(str string, withUser bool) *AddressInfo
```
<b>示例:<span style="float:right;display:inline-block;">[运行](https://go.dev/play/p/o5l09hQopEV)</span></b>
```go
package main
import (
"encoding/json"
"fmt"
"github.com/duke-git/lancet/v2/formatter"
)
func main() {
// 解析包含用户信息的完整地址
result1 := formatter.ParseCNAddress("张三 13800138000 北京市朝阳区建国路1号", true)
jsonData1, _ := json.MarshalIndent(result1, "", " ")
fmt.Println("示例 1 - 带用户信息:")
fmt.Println(string(jsonData1))
// 仅解析地址,不提取用户信息
result2 := formatter.ParseCNAddress("北京市海淀区中关村大街1号", false)
fmt.Printf("\n示例 2 - 仅地址:\n")
fmt.Printf("省: %s, 市: %s, 区: %s, 街道: %s\n",
result2.Province, result2.City, result2.Region, result2.Street)
// 解析县级市地址
result3 := formatter.ParseCNAddress("河北省石家庄市新乐市经济开发区兴工街10号", false)
fmt.Printf("\n示例 3 - 县级市:\n")
fmt.Printf("省: %s, 市: %s, 区/县: %s, 街道: %s\n",
result3.Province, result3.City, result3.Region, result3.Street)
// 紧凑格式
result4 := formatter.ParseCNAddress("马云13593464918陕西省西安市雁塔区丈八沟街道", true)
fmt.Printf("\n示例 4 - 紧凑格式:\n")
fmt.Printf("姓名: %s, 电话: %s, 地址: %s%s%s%s\n",
result4.Name, result4.Mobile, result4.Province, result4.City, result4.Region, result4.Street)
// Output:
// 示例 1 - 带用户信息:
// {
// "name": "张三",
// "mobile": "13800138000",
// "idn": "",
// "postcode": "",
// "province": "北京",
// "city": "北京市",
// "region": "朝阳区",
// "street": "建国路1号",
// "addr": "北京市朝阳区建国路1号"
// }
//
// 示例 2 - 仅地址:
// 省: 北京, 市: 北京市, 区: 海淀区, 街道: 中关村大街1号
//
// 示例 3 - 县级市:
// 省: 河北省, 市: 石家庄市, 区/县: 新乐市, 街道: 经济开发区兴工街10号
//
// 示例 4 - 紧凑格式:
// 姓名: 马云, 电话: 13593464918, 地址: 陕西省西安市雁塔区丈八沟街道
}
```
### <span id="ParsePersonInfo">ParsePersonInfo</span>
<p>从地址字符串中提取用户信息(姓名、电话、身份证、邮编)。将个人信息与地址分离,支持带标签格式、紧凑格式、带分隔符格式。返回包含提取的用户信息和清理后地址字符串的 AddressInfo。</p>
<b>函数签名:</b>
```go
func ParsePersonInfo(str string) *AddressInfo
```
<b>示例:<span style="float:right;display:inline-block;">[运行](https://go.dev/play/p/JO-uTlJlTy7)</span></b>
```go
package main
import (
"encoding/json"
"fmt"
"github.com/duke-git/lancet/v2/formatter"
)
func main() {
// 提取姓名和手机号
result1 := formatter.ParsePersonInfo("张三 13800138000 北京市朝阳区")
fmt.Println("示例 1 - 姓名和手机号:")
fmt.Printf("姓名: %s, 手机: %s, 地址: %s\n", result1.Name, result1.Mobile, result1.Addr)
// 提取身份证号
result2 := formatter.ParsePersonInfo("李四 110101199001011234 上海市")
fmt.Println("\n示例 2 - 身份证号:")
fmt.Printf("姓名: %s, 身份证: %s, 地址: %s\n", result2.Name, result2.IDN, result2.Addr)
// 带标签格式
result3 := formatter.ParsePersonInfo("收货人:王五 电话13900139000 收货地址天津市河西区友谊路20号")
jsonData3, _ := json.MarshalIndent(result3, "", " ")
fmt.Println("\n示例 3 - 带标签格式:")
fmt.Println(string(jsonData3))
// Output:
// 示例 1 - 姓名和手机号:
// 姓名: 张三, 手机: 13800138000, 地址: 北京市朝阳区
//
// 示例 2 - 身份证号:
// 姓名: 李四, 身份证: 110101199001011234, 地址: 上海市
//
// 示例 3 - 带标签格式:
// {
// "name": "王五",
// "mobile": "13900139000",
// "idn": "",
// "postcode": "",
// "province": "",
// "city": "",
// "region": "",
// "street": "",
// "addr": "天津市河西区友谊路20号"
// }
}
```

View File

@@ -8,6 +8,7 @@ formatter contains some functions for data formatting.
- [https://github.com/duke-git/lancet/blob/main/formatter/formatter.go](https://github.com/duke-git/lancet/blob/main/formatter/formatter.go)
- [https://github.com/duke-git/lancet/blob/main/formatter/byte.go](https://github.com/duke-git/lancet/blob/main/formatter/byte.go)
- [https://github.com/duke-git/lancet/blob/main/formatter/address.go](https://github.com/duke-git/lancet/blob/main/formatter/address.go)
<div STYLE="page-break-after: always;"></div>
@@ -30,6 +31,8 @@ import (
- [BinaryBytes](#BinaryBytes)
- [ParseDecimalBytes](#ParseDecimalBytes)
- [ParseBinaryBytes](#ParseBinaryBytes)
- [ParseCNAddress](#ParseCNAddress)
- [ParsePersonInfo](#ParsePersonInfo)
<div STYLE="page-break-after: always;"></div>
@@ -308,3 +311,134 @@ func main() {
// 12492
}
```
### <span id="ParseCNAddress">ParseCNAddress</span>
<p>Parses a Chinese address string intelligently and extracts structured information. It can parse addresses with or without user information (name, phone, ID card, etc.). When withUser is true, it extracts user information from the address string. When withUser is false, it only parses the location information. Supports various address formats: standard format, compact format, labeled format, county-level cities format, etc.</p>
<b>Signature:</b>
```go
func ParseCNAddress(str string, withUser bool) *AddressInfo
```
<b>Example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/o5l09hQopEV)</span></b>
```go
package main
import (
"encoding/json"
"fmt"
"github.com/duke-git/lancet/v2/formatter"
)
func main() {
// Parse complete address with user information
result1 := formatter.ParseCNAddress("张三 13800138000 北京市朝阳区建国路1号", true)
jsonData1, _ := json.MarshalIndent(result1, "", " ")
fmt.Println("Example 1 - With user info:")
fmt.Println(string(jsonData1))
// Parse address only, without extracting user information
result2 := formatter.ParseCNAddress("北京市海淀区中关村大街1号", false)
fmt.Printf("\nExample 2 - Address only:\n")
fmt.Printf("Province: %s, City: %s, Region: %s, Street: %s\n",
result2.Province, result2.City, result2.Region, result2.Street)
// Parse county-level city address
result3 := formatter.ParseCNAddress("河北省石家庄市新乐市经济开发区兴工街10号", false)
fmt.Printf("\nExample 3 - County-level city:\n")
fmt.Printf("Province: %s, City: %s, Region: %s, Street: %s\n",
result3.Province, result3.City, result3.Region, result3.Street)
// Compact format
result4 := formatter.ParseCNAddress("马云13593464918陕西省西安市雁塔区丈八沟街道", true)
fmt.Printf("\nExample 4 - Compact format:\n")
fmt.Printf("Name: %s, Phone: %s, Address: %s%s%s%s\n",
result4.Name, result4.Mobile, result4.Province, result4.City, result4.Region, result4.Street)
// Output:
// Example 1 - With user info:
// {
// "name": "张三",
// "mobile": "13800138000",
// "idn": "",
// "postcode": "",
// "province": "北京",
// "city": "北京市",
// "region": "朝阳区",
// "street": "建国路1号",
// "addr": "北京市朝阳区建国路1号"
// }
//
// Example 2 - Address only:
// Province: 北京, City: 北京市, Region: 海淀区, Street: 中关村大街1号
//
// Example 3 - County-level city:
// Province: 河北省, City: 石家庄市, Region: 新乐市, Street: 经济开发区兴工街10号
//
// Example 4 - Compact format:
// Name: 马云, Phone: 13593464918, Address: 陕西省西安市雁塔区丈八沟街道
}
```
### <span id="ParsePersonInfo">ParsePersonInfo</span>
<p>Extracts user information (name, phone, ID card, postal code) from an address string. It separates personal information from the address, supporting labeled format, compact format, and formats with separators. Returns an AddressInfo with extracted user information and cleaned address string.</p>
<b>Signature:</b>
```go
func ParsePersonInfo(str string) *AddressInfo
```
<b>Example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JO-uTlJlTy7)</span></b>
```go
package main
import (
"encoding/json"
"fmt"
"github.com/duke-git/lancet/v2/formatter"
)
func main() {
// Extract name and phone
result1 := formatter.ParsePersonInfo("张三 13800138000 北京市朝阳区")
fmt.Println("Example 1 - Name and phone:")
fmt.Printf("Name: %s, Phone: %s, Address: %s\n", result1.Name, result1.Mobile, result1.Addr)
// Extract ID card number
result2 := formatter.ParsePersonInfo("李四 110101199001011234 上海市")
fmt.Println("\nExample 2 - ID card number:")
fmt.Printf("Name: %s, ID Card: %s, Address: %s\n", result2.Name, result2.IDN, result2.Addr)
// Labeled format
result3 := formatter.ParsePersonInfo("收货人:王五 电话13900139000 收货地址天津市河西区友谊路20号")
jsonData3, _ := json.MarshalIndent(result3, "", " ")
fmt.Println("\nExample 3 - Labeled format:")
fmt.Println(string(jsonData3))
// Output:
// Example 1 - Name and phone:
// Name: 张三, Phone: 13800138000, Address: 北京市朝阳区
//
// Example 2 - ID card number:
// Name: 李四, ID Card: 110101199001011234, Address: 上海市
//
// Example 3 - Labeled format:
// {
// "name": "王五",
// "mobile": "13900139000",
// "idn": "",
// "postcode": "",
// "province": "",
// "city": "",
// "region": "",
// "street": "",
// "addr": "天津市河西区友谊路20号"
// }
}
```