diff --git a/docs/validator.md b/docs/validator.md index a0f1f0c..f6127bd 100644 --- a/docs/validator.md +++ b/docs/validator.md @@ -47,7 +47,7 @@ import ( - [IsUrl](#IsUrl) - [IsWeakPassword](#IsWeakPassword) - [IsZeroValue](#IsZeroValue) - +- [IsGBK](#IsGBK)
@@ -819,7 +819,34 @@ func main() { - +### IsGBK +Checks if data encoding is gbk(Chinese character internal code extension specification). this function is implemented by whether double bytes fall within the encoding range of gbk,while each byte of utf-8 encoding format falls within the encoding range of gbk.Therefore, utf8.valid() should be called first to check whether it is not utf-8 encoding and then call IsGBK() to check gbk encoding. like the example.
+ +Signature: + +```go +func IsGBK(data []byte) bool +``` +Example: + +```go +import ( + "fmt" + "github.com/duke-git/lancet/validator" +) + +func main() { + data := []byte("你好") + + // check utf8 first + if utf8.Valid(data) { + fmt.Println("data encoding is utf-8") + }else if(validator.IsGBK(data)) { + fmt.Println("data encoding is GBK") + } + fmt.Println("data encoding is unknown") +} +``` diff --git a/docs/validator_zh-CN.md b/docs/validator_zh-CN.md index 0012f65..a283f64 100644 --- a/docs/validator_zh-CN.md +++ b/docs/validator_zh-CN.md @@ -47,7 +47,7 @@ import ( - [IsUrl](#IsUrl) - [IsWeakPassword](#IsWeakPassword) - [IsZeroValue](#IsZeroValue) - +- [IsGBK](#IsGBK) @@ -821,6 +821,33 @@ func main() { - +### IsGBK +检查数据编码是否为gbk(汉字内部代码扩展规范)。该函数的实现取决于双字节是否在gbk的编码范围内,而utf-8编码格式的每个字节都在gbk编码范围内。因此,应该首先调用utf8.valid检查它是否是utf-8编码,然后调用IsGBK检查gbk编码。如示例所示。
+ +函数签名: + +```go +func IsGBK(data []byte) bool +``` +例子: + +```go +import ( + "fmt" + "github.com/duke-git/lancet/validator" +) + +func main() { + data := []byte("你好") + + // 先检查utf8编码 + if utf8.Valid(data) { + fmt.Println("data encoding is utf-8") + }else if(validator.IsGBK(data)) { + fmt.Println("data encoding is GBK") + } + fmt.Println("data encoding is unknown") +} +``` diff --git a/go.mod b/go.mod index 02610a3..051ca21 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/duke-git/lancet go 1.16 + +require golang.org/x/text v0.5.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..bbfdb85 --- /dev/null +++ b/go.sum @@ -0,0 +1,25 @@ +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.5.0 h1:OLmvp0KP+FVG99Ct/qFiL/Fhk4zp4QQnZ7b2U+5piUM= +golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/validator/validator.go b/validator/validator.go index 467bc63..474390c 100644 --- a/validator/validator.go +++ b/validator/validator.go @@ -288,3 +288,40 @@ func IsZeroValue(value interface{}) bool { return reflect.DeepEqual(rv.Interface(), reflect.Zero(rv.Type()).Interface()) } + +// IsGBK check if data encoding is gbk +// Note: this function is implemented by whether double bytes fall within the encoding range of gbk, +// while each byte of utf-8 encoding format falls within the encoding range of gbk. +// Therefore, utf8.valid() should be called first to check whether it is not utf-8 encoding, +// and then call IsGBK() to check gbk encoding. like below +/** + data := []byte("你好") + if utf8.Valid(data) { + fmt.Println("data encoding is utf-8") + }else if(IsGBK(data)) { + fmt.Println("data encoding is GBK") + } + fmt.Println("data encoding is unknown") +**/ +func IsGBK(data []byte) bool { + i := 0 + for i < len(data) { + if data[i] <= 0xff { + i++ + continue + } else { + if data[i] >= 0x81 && + data[i] <= 0xfe && + data[i+1] >= 0x40 && + data[i+1] <= 0xfe && + data[i+1] != 0xf7 { + i += 2 + continue + } else { + return false + } + } + } + + return true +} diff --git a/validator/validator_test.go b/validator/validator_test.go index 5ef7242..bb25103 100644 --- a/validator/validator_test.go +++ b/validator/validator_test.go @@ -4,8 +4,10 @@ import ( "fmt" "testing" "time" + "unicode/utf8" "github.com/duke-git/lancet/internal" + "golang.org/x/text/encoding/simplifiedchinese" ) func TestIsAllUpper(t *testing.T) { @@ -388,3 +390,13 @@ func TestIsZeroValue(t *testing.T) { assert.Equal(false, IsZeroValue(value)) } } + +func TestIsGBK(t *testing.T) { + assert := internal.NewAssert(t, "TestIsGBK") + + str := "你好" + gbkData, _ := simplifiedchinese.GBK.NewEncoder().Bytes([]byte(str)) + + assert.Equal(true, IsGBK(gbkData)) + assert.Equal(false, utf8.Valid(gbkData)) +}