]> Cypherpunks.ru repositories - gostls13.git/commitdiff
net/http: extended routing patterns
authorJonathan Amsterdam <jba@google.com>
Fri, 8 Sep 2023 00:14:47 +0000 (20:14 -0400)
committerJonathan Amsterdam <jba@google.com>
Mon, 11 Sep 2023 17:11:17 +0000 (17:11 +0000)
This is the first of several CLs implementing the proposal
for enhanced ServeMux routing, https://go.dev/issue/61410.

Define a type to represent extended routing patterns and a function to
parse a string into one.

Updates #61410.

Change-Id: I779689acf1f14b20d12c9264251f7dc002b68c49
Reviewed-on: https://go-review.googlesource.com/c/go/+/526815
Run-TryBot: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Eli Bendersky <eliben@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>

src/net/http/pattern.go [new file with mode: 0644]
src/net/http/pattern_test.go [new file with mode: 0644]

diff --git a/src/net/http/pattern.go b/src/net/http/pattern.go
new file mode 100644 (file)
index 0000000..a04fd90
--- /dev/null
@@ -0,0 +1,187 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Patterns for ServeMux routing.
+
+package http
+
+import (
+       "errors"
+       "fmt"
+       "strings"
+       "unicode"
+)
+
+// A pattern is something that can be matched against an HTTP request.
+// It has an optional method, an optional host, and a path.
+type pattern struct {
+       str    string // original string
+       method string
+       host   string
+       // The representation of a path differs from the surface syntax, which
+       // simplifies most algorithms.
+       //
+       // Paths ending in '/' are represented with an anonymous "..." wildcard.
+       // For example, the path "a/" is represented as a literal segment "a" followed
+       // by a segment with multi==true.
+       //
+       // Paths ending in "{$}" are represented with the literal segment "/".
+       // For example, the path "a/{$}" is represented as a literal segment "a" followed
+       // by a literal segment "/".
+       segments []segment
+       loc      string // source location of registering call, for helpful messages
+}
+
+// A segment is a pattern piece that matches one or more path segments, or
+// a trailing slash.
+//
+// If wild is false, it matches a literal segment, or, if s == "/", a trailing slash.
+// Examples:
+//
+//     "a" => segment{s: "a"}
+//     "/{$}" => segment{s: "/"}
+//
+// If wild is true and multi is false, it matches a single path segment.
+// Example:
+//
+//     "{x}" => segment{s: "x", wild: true}
+//
+// If both wild and multi are true, it matches all remaining path segments.
+// Example:
+//
+//     "{rest...}" => segment{s: "rest", wild: true, multi: true}
+type segment struct {
+       s     string // literal or wildcard name or "/" for "/{$}".
+       wild  bool
+       multi bool // "..." wildcard
+}
+
+// parsePattern parses a string into a Pattern.
+// The string's syntax is
+//
+//     [METHOD] [HOST]/[PATH]
+//
+// where:
+//   - METHOD is an HTTP method
+//   - HOST is a hostname
+//   - PATH consists of slash-separated segments, where each segment is either
+//     a literal or a wildcard of the form "{name}", "{name...}", or "{$}".
+//
+// METHOD, HOST and PATH are all optional; that is, the string can be "/".
+// If METHOD is present, it must be followed by a single space.
+// Wildcard names must be valid Go identifiers.
+// The "{$}" and "{name...}" wildcard must occur at the end of PATH.
+// PATH may end with a '/'.
+// Wildcard names in a path must be distinct.
+func parsePattern(s string) (*pattern, error) {
+       if len(s) == 0 {
+               return nil, errors.New("empty pattern")
+       }
+       // TODO(jba): record the rune offset in s to provide more information in errors.
+       method, rest, found := strings.Cut(s, " ")
+       if !found {
+               rest = method
+               method = ""
+       }
+       if method != "" && !validMethod(method) {
+               return nil, fmt.Errorf("net/http: invalid method %q", method)
+       }
+       p := &pattern{str: s, method: method}
+
+       i := strings.IndexByte(rest, '/')
+       if i < 0 {
+               return nil, errors.New("host/path missing /")
+       }
+       p.host = rest[:i]
+       rest = rest[i:]
+       if strings.IndexByte(p.host, '{') >= 0 {
+               return nil, errors.New("host contains '{' (missing initial '/'?)")
+       }
+       // At this point, rest is the path.
+
+       // An unclean path with a method that is not CONNECT can never match,
+       // because paths are cleaned before matching.
+       if method != "" && method != "CONNECT" && rest != cleanPath(rest) {
+               return nil, errors.New("non-CONNECT pattern with unclean path can never match")
+       }
+
+       seenNames := map[string]bool{} // remember wildcard names to catch dups
+       for len(rest) > 0 {
+               // Invariant: rest[0] == '/'.
+               rest = rest[1:]
+               if len(rest) == 0 {
+                       // Trailing slash.
+                       p.segments = append(p.segments, segment{wild: true, multi: true})
+                       break
+               }
+               i := strings.IndexByte(rest, '/')
+               if i < 0 {
+                       i = len(rest)
+               }
+               var seg string
+               seg, rest = rest[:i], rest[i:]
+               if i := strings.IndexByte(seg, '{'); i < 0 {
+                       // Literal.
+                       p.segments = append(p.segments, segment{s: seg})
+               } else {
+                       // Wildcard.
+                       if i != 0 {
+                               return nil, errors.New("bad wildcard segment (must start with '{')")
+                       }
+                       if seg[len(seg)-1] != '}' {
+                               return nil, errors.New("bad wildcard segment (must end with '}')")
+                       }
+                       name := seg[1 : len(seg)-1]
+                       if name == "$" {
+                               if len(rest) != 0 {
+                                       return nil, errors.New("{$} not at end")
+                               }
+                               p.segments = append(p.segments, segment{s: "/"})
+                               break
+                       }
+                       name, multi := strings.CutSuffix(name, "...")
+                       if multi && len(rest) != 0 {
+                               return nil, errors.New("{...} wildcard not at end")
+                       }
+                       if name == "" {
+                               return nil, errors.New("empty wildcard")
+                       }
+                       if !isValidWildcardName(name) {
+                               return nil, fmt.Errorf("bad wildcard name %q", name)
+                       }
+                       if seenNames[name] {
+                               return nil, fmt.Errorf("duplicate wildcard name %q", name)
+                       }
+                       seenNames[name] = true
+                       p.segments = append(p.segments, segment{s: name, wild: true, multi: multi})
+               }
+       }
+       return p, nil
+}
+
+func isValidHTTPToken(s string) bool {
+       if s == "" {
+               return false
+       }
+       // See https://www.rfc-editor.org/rfc/rfc9110#section-5.6.2.
+       for _, r := range s {
+               if !unicode.IsLetter(r) && !unicode.IsDigit(r) && !strings.ContainsRune("!#$%&'*+.^_`|~-", r) {
+                       return false
+               }
+       }
+       return true
+}
+
+func isValidWildcardName(s string) bool {
+       if s == "" {
+               return false
+       }
+       // Valid Go identifier.
+       for i, c := range s {
+               if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
+                       return false
+               }
+       }
+       return true
+}
diff --git a/src/net/http/pattern_test.go b/src/net/http/pattern_test.go
new file mode 100644 (file)
index 0000000..759e126
--- /dev/null
@@ -0,0 +1,167 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http
+
+import (
+       "slices"
+       "strings"
+       "testing"
+)
+
+func TestParsePattern(t *testing.T) {
+       lit := func(name string) segment {
+               return segment{s: name}
+       }
+
+       wild := func(name string) segment {
+               return segment{s: name, wild: true}
+       }
+
+       multi := func(name string) segment {
+               s := wild(name)
+               s.multi = true
+               return s
+       }
+
+       for _, test := range []struct {
+               in   string
+               want pattern
+       }{
+               {"/", pattern{segments: []segment{multi("")}}},
+               {"/a", pattern{segments: []segment{lit("a")}}},
+               {
+                       "/a/",
+                       pattern{segments: []segment{lit("a"), multi("")}},
+               },
+               {"/path/to/something", pattern{segments: []segment{
+                       lit("path"), lit("to"), lit("something"),
+               }}},
+               {
+                       "/{w1}/lit/{w2}",
+                       pattern{
+                               segments: []segment{wild("w1"), lit("lit"), wild("w2")},
+                       },
+               },
+               {
+                       "/{w1}/lit/{w2}/",
+                       pattern{
+                               segments: []segment{wild("w1"), lit("lit"), wild("w2"), multi("")},
+                       },
+               },
+               {
+                       "example.com/",
+                       pattern{host: "example.com", segments: []segment{multi("")}},
+               },
+               {
+                       "GET /",
+                       pattern{method: "GET", segments: []segment{multi("")}},
+               },
+               {
+                       "POST example.com/foo/{w}",
+                       pattern{
+                               method:   "POST",
+                               host:     "example.com",
+                               segments: []segment{lit("foo"), wild("w")},
+                       },
+               },
+               {
+                       "/{$}",
+                       pattern{segments: []segment{lit("/")}},
+               },
+               {
+                       "DELETE example.com/a/{foo12}/{$}",
+                       pattern{method: "DELETE", host: "example.com", segments: []segment{lit("a"), wild("foo12"), lit("/")}},
+               },
+               {
+                       "/foo/{$}",
+                       pattern{segments: []segment{lit("foo"), lit("/")}},
+               },
+               {
+                       "/{a}/foo/{rest...}",
+                       pattern{segments: []segment{wild("a"), lit("foo"), multi("rest")}},
+               },
+               {
+                       "//",
+                       pattern{segments: []segment{lit(""), multi("")}},
+               },
+               {
+                       "/foo///./../bar",
+                       pattern{segments: []segment{lit("foo"), lit(""), lit(""), lit("."), lit(".."), lit("bar")}},
+               },
+               {
+                       "a.com/foo//",
+                       pattern{host: "a.com", segments: []segment{lit("foo"), lit(""), multi("")}},
+               },
+       } {
+               got := mustParsePattern(t, test.in)
+               if !got.equal(&test.want) {
+                       t.Errorf("%q:\ngot  %#v\nwant %#v", test.in, got, &test.want)
+               }
+       }
+}
+
+func TestParsePatternError(t *testing.T) {
+       for _, test := range []struct {
+               in       string
+               contains string
+       }{
+               {"", "empty pattern"},
+               {"A=B /", "invalid method"},
+               {" ", "missing /"},
+               {"/{w}x", "bad wildcard segment"},
+               {"/x{w}", "bad wildcard segment"},
+               {"/{wx", "bad wildcard segment"},
+               {"/{a$}", "bad wildcard name"},
+               {"/{}", "empty wildcard"},
+               {"/{...}", "empty wildcard"},
+               {"/{$...}", "bad wildcard"},
+               {"/{$}/", "{$} not at end"},
+               {"/{$}/x", "{$} not at end"},
+               {"/{a...}/", "not at end"},
+               {"/{a...}/x", "not at end"},
+               {"{a}/b", "missing initial '/'"},
+               {"/a/{x}/b/{x...}", "duplicate wildcard name"},
+               {"GET //", "unclean path"},
+       } {
+               _, err := parsePattern(test.in)
+               if err == nil || !strings.Contains(err.Error(), test.contains) {
+                       t.Errorf("%q:\ngot %v, want error containing %q", test.in, err, test.contains)
+               }
+       }
+}
+
+func (p1 *pattern) equal(p2 *pattern) bool {
+       return p1.method == p2.method && p1.host == p2.host &&
+               slices.Equal(p1.segments, p2.segments)
+}
+
+func TestIsValidHTTPToken(t *testing.T) {
+       for _, test := range []struct {
+               in   string
+               want bool
+       }{
+               {"", false},
+               {"GET", true},
+               {"get", true},
+               {"white space", false},
+               {"#!~", true},
+               {"a-b1_2", true},
+               {"notok)", false},
+       } {
+               got := isValidHTTPToken(test.in)
+               if g, w := got, test.want; g != w {
+                       t.Errorf("%q: got %t, want %t", test.in, g, w)
+               }
+       }
+}
+
+func mustParsePattern(t *testing.T, s string) *pattern {
+       t.Helper()
+       p, err := parsePattern(s)
+       if err != nil {
+               t.Fatal(err)
+       }
+       return p
+}