1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
12 "golang.org/x/net/idna"
15 var isTokenTable = [127]bool{
95 func IsTokenRune(r rune) bool {
97 return i < len(isTokenTable) && isTokenTable[i]
100 func isNotToken(r rune) bool {
101 return !IsTokenRune(r)
104 // HeaderValuesContainsToken reports whether any string in values
105 // contains the provided token, ASCII case-insensitively.
106 func HeaderValuesContainsToken(values []string, token string) bool {
107 for _, v := range values {
108 if headerValueContainsToken(v, token) {
115 // isOWS reports whether b is an optional whitespace byte, as defined
116 // by RFC 7230 section 3.2.3.
117 func isOWS(b byte) bool { return b == ' ' || b == '\t' }
119 // trimOWS returns x with all optional whitespace removes from the
120 // beginning and end.
121 func trimOWS(x string) string {
122 // TODO: consider using strings.Trim(x, " \t") instead,
123 // if and when it's fast enough. See issue 10292.
124 // But this ASCII-only code will probably always beat UTF-8
126 for len(x) > 0 && isOWS(x[0]) {
129 for len(x) > 0 && isOWS(x[len(x)-1]) {
135 // headerValueContainsToken reports whether v (assumed to be a
136 // 0#element, in the ABNF extension described in RFC 7230 section 7)
137 // contains token amongst its comma-separated tokens, ASCII
138 // case-insensitively.
139 func headerValueContainsToken(v string, token string) bool {
141 if comma := strings.IndexByte(v, ','); comma != -1 {
142 return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token)
144 return tokenEqual(v, token)
147 // lowerASCII returns the ASCII lowercase version of b.
148 func lowerASCII(b byte) byte {
149 if 'A' <= b && b <= 'Z' {
150 return b + ('a' - 'A')
155 // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
156 func tokenEqual(t1, t2 string) bool {
157 if len(t1) != len(t2) {
160 for i, b := range t1 {
161 if b >= utf8.RuneSelf {
162 // No UTF-8 or non-ASCII allowed in tokens.
165 if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
172 // isLWS reports whether b is linear white space, according
173 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
174 // LWS = [CRLF] 1*( SP | HT )
175 func isLWS(b byte) bool { return b == ' ' || b == '\t' }
177 // isCTL reports whether b is a control byte, according
178 // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
179 // CTL = <any US-ASCII control character
180 // (octets 0 - 31) and DEL (127)>
181 func isCTL(b byte) bool {
182 const del = 0x7f // a CTL
183 return b < ' ' || b == del
186 // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
187 // HTTP/2 imposes the additional restriction that uppercase ASCII
188 // letters are not allowed.
191 // header-field = field-name ":" OWS field-value OWS
192 // field-name = token
194 // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
195 // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
196 func ValidHeaderFieldName(v string) bool {
200 for _, r := range v {
208 // ValidHostHeader reports whether h is a valid host header.
209 func ValidHostHeader(h string) bool {
210 // The latest spec is actually this:
212 // http://tools.ietf.org/html/rfc7230#section-5.4
213 // Host = uri-host [ ":" port ]
215 // Where uri-host is:
216 // http://tools.ietf.org/html/rfc3986#section-3.2.2
218 // But we're going to be much more lenient for now and just
219 // search for any byte that's not a valid byte in any of those
221 for i := 0; i < len(h); i++ {
222 if !validHostByte[h[i]] {
229 // See the validHostHeader comment.
230 var validHostByte = [256]bool{
231 '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
232 '8': true, '9': true,
234 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
235 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
236 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
237 'y': true, 'z': true,
239 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
240 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
241 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
242 'Y': true, 'Z': true,
244 '!': true, // sub-delims
245 '$': true, // sub-delims
246 '%': true, // pct-encoded (and used in IPv6 zones)
247 '&': true, // sub-delims
248 '(': true, // sub-delims
249 ')': true, // sub-delims
250 '*': true, // sub-delims
251 '+': true, // sub-delims
252 ',': true, // sub-delims
253 '-': true, // unreserved
254 '.': true, // unreserved
255 ':': true, // IPv6address + Host expression's optional port
256 ';': true, // sub-delims
257 '=': true, // sub-delims
259 '\'': true, // sub-delims
261 '_': true, // unreserved
262 '~': true, // unreserved
265 // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
266 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
268 // message-header = field-name ":" [ field-value ]
269 // field-value = *( field-content | LWS )
270 // field-content = <the OCTETs making up the field-value
271 // and consisting of either *TEXT or combinations
272 // of token, separators, and quoted-string>
274 // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
276 // TEXT = <any OCTET except CTLs,
277 // but including LWS>
278 // LWS = [CRLF] 1*( SP | HT )
279 // CTL = <any US-ASCII control character
280 // (octets 0 - 31) and DEL (127)>
283 // field-value = *( field-content / obs-fold )
284 // obj-fold = N/A to http2, and deprecated
285 // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
286 // field-vchar = VCHAR / obs-text
287 // obs-text = %x80-FF
288 // VCHAR = "any visible [USASCII] character"
290 // http2 further says: "Similarly, HTTP/2 allows header field values
291 // that are not valid. While most of the values that can be encoded
292 // will not alter header field parsing, carriage return (CR, ASCII
293 // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
294 // 0x0) might be exploited by an attacker if they are translated
295 // verbatim. Any request or response that contains a character not
296 // permitted in a header field value MUST be treated as malformed
297 // (Section 8.1.2.6). Valid characters are defined by the
298 // field-content ABNF rule in Section 3.2 of [RFC7230]."
300 // This function does not (yet?) properly handle the rejection of
301 // strings that begin or end with SP or HTAB.
302 func ValidHeaderFieldValue(v string) bool {
303 for i := 0; i < len(v); i++ {
305 if isCTL(b) && !isLWS(b) {
312 func isASCII(s string) bool {
313 for i := 0; i < len(s); i++ {
314 if s[i] >= utf8.RuneSelf {
321 // PunycodeHostPort returns the IDNA Punycode version
322 // of the provided "host" or "host:port" string.
323 func PunycodeHostPort(v string) (string, error) {
328 host, port, err := net.SplitHostPort(v)
330 // The input 'v' argument was just a "host" argument,
331 // without a port. This error should not be returned
336 host, err = idna.ToASCII(host)
338 // Non-UTF-8? Not representable in Punycode, in any
345 return net.JoinHostPort(host, port), nil