mirror of
https://github.com/eiblog/eiblog.git
synced 2026-02-14 02:12:26 +08:00
209 lines
4.3 KiB
Go
209 lines
4.3 KiB
Go
package url
|
|
|
|
import (
|
|
"strconv"
|
|
)
|
|
|
|
type Encoding int
|
|
|
|
const (
|
|
EncodePath Encoding = 1 + iota
|
|
EncodeUserPassword
|
|
EncodeQueryComponent
|
|
EncodeFragment
|
|
)
|
|
|
|
type EscapeError string
|
|
|
|
func (e EscapeError) Error() string {
|
|
return "invalid URL escape " + strconv.Quote(string(e))
|
|
}
|
|
|
|
func ishex(c byte) bool {
|
|
switch {
|
|
case '0' <= c && c <= '9':
|
|
return true
|
|
case 'a' <= c && c <= 'f':
|
|
return true
|
|
case 'A' <= c && c <= 'F':
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func unhex(c byte) byte {
|
|
switch {
|
|
case '0' <= c && c <= '9':
|
|
return c - '0'
|
|
case 'a' <= c && c <= 'f':
|
|
return c - 'a' + 10
|
|
case 'A' <= c && c <= 'F':
|
|
return c - 'A' + 10
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// Return true if the specified character should be escaped when
|
|
// appearing in a URL string, according to RFC 3986.
|
|
// When 'all' is true the full range of reserved characters are matched.
|
|
func shouldEscape(c byte, mode Encoding) bool {
|
|
|
|
// §2.3 Unreserved characters (alphanum)
|
|
if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
|
|
return false
|
|
}
|
|
|
|
switch c {
|
|
case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
|
|
return false
|
|
|
|
case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
|
|
// Different sections of the URL allow a few of
|
|
// the reserved characters to appear unescaped.
|
|
switch mode {
|
|
case EncodePath: // §3.3
|
|
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
|
// meaning to individual path segments. This package
|
|
// only manipulates the path as a whole, so we allow those
|
|
// last two as well. That leaves only ? to escape.
|
|
return c == '?'
|
|
|
|
case EncodeUserPassword: // §3.2.2
|
|
// The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
|
|
// The parsing of userinfo treats : as special so we must escape that too.
|
|
return c == '@' || c == '/' || c == ':'
|
|
|
|
case EncodeQueryComponent: // §3.4
|
|
// The RFC reserves (so we must escape) everything.
|
|
return true
|
|
|
|
case EncodeFragment: // §4.1
|
|
// The RFC text is silent but the grammar allows
|
|
// everything, so escape nothing.
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Everything else must be escaped.
|
|
return true
|
|
}
|
|
|
|
// QueryUnescape does the inverse transformation of QueryEscape, converting
|
|
// %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
|
|
// any % is not followed by two hexadecimal digits.
|
|
func QueryUnescape(s string) (string, error) {
|
|
|
|
return UnescapeEx(s, EncodeQueryComponent)
|
|
}
|
|
|
|
func Unescape(s string) (string, error) {
|
|
|
|
return UnescapeEx(s, EncodePath)
|
|
}
|
|
|
|
// UnescapeEx unescapes a string; the mode specifies
|
|
// which section of the URL string is being unescaped.
|
|
func UnescapeEx(s string, mode Encoding) (string, error) {
|
|
|
|
// Count %, check that they're well-formed.
|
|
n := 0
|
|
hasPlus := false
|
|
for i := 0; i < len(s); {
|
|
switch s[i] {
|
|
case '%':
|
|
n++
|
|
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
|
|
s = s[i:]
|
|
if len(s) > 3 {
|
|
s = s[0:3]
|
|
}
|
|
return "", EscapeError(s)
|
|
}
|
|
i += 3
|
|
case '+':
|
|
hasPlus = mode == EncodeQueryComponent
|
|
i++
|
|
default:
|
|
i++
|
|
}
|
|
}
|
|
|
|
if n == 0 && !hasPlus {
|
|
return s, nil
|
|
}
|
|
|
|
t := make([]byte, len(s)-2*n)
|
|
j := 0
|
|
for i := 0; i < len(s); {
|
|
switch s[i] {
|
|
case '%':
|
|
t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
|
|
j++
|
|
i += 3
|
|
case '+':
|
|
if mode == EncodeQueryComponent {
|
|
t[j] = ' '
|
|
} else {
|
|
t[j] = '+'
|
|
}
|
|
j++
|
|
i++
|
|
default:
|
|
t[j] = s[i]
|
|
j++
|
|
i++
|
|
}
|
|
}
|
|
return string(t), nil
|
|
}
|
|
|
|
// QueryEscape escapes the string so it can be safely placed
|
|
// inside a URL query.
|
|
func QueryEscape(s string) string {
|
|
|
|
return EscapeEx(s, EncodeQueryComponent)
|
|
}
|
|
|
|
func Escape(s string) string {
|
|
|
|
return EscapeEx(s, EncodePath)
|
|
}
|
|
|
|
func EscapeEx(s string, mode Encoding) string {
|
|
|
|
spaceCount, hexCount := 0, 0
|
|
for i := 0; i < len(s); i++ {
|
|
c := s[i]
|
|
if shouldEscape(c, mode) {
|
|
if c == ' ' && mode == EncodeQueryComponent {
|
|
spaceCount++
|
|
} else {
|
|
hexCount++
|
|
}
|
|
}
|
|
}
|
|
|
|
if spaceCount == 0 && hexCount == 0 {
|
|
return s
|
|
}
|
|
|
|
t := make([]byte, len(s)+2*hexCount)
|
|
j := 0
|
|
for i := 0; i < len(s); i++ {
|
|
switch c := s[i]; {
|
|
case c == ' ' && mode == EncodeQueryComponent:
|
|
t[j] = '+'
|
|
j++
|
|
case shouldEscape(c, mode):
|
|
t[j] = '%'
|
|
t[j+1] = "0123456789ABCDEF"[c>>4]
|
|
t[j+2] = "0123456789ABCDEF"[c&15]
|
|
j += 3
|
|
default:
|
|
t[j] = s[i]
|
|
j++
|
|
}
|
|
}
|
|
return string(t)
|
|
}
|