--- /dev/null
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Patterns for ServeMux routing.
+
+package http
+
+import (
+ "errors"
+ "fmt"
+ "strings"
+ "unicode"
+)
+
+// A pattern is something that can be matched against an HTTP request.
+// It has an optional method, an optional host, and a path.
+type pattern struct {
+ str string // original string
+ method string
+ host string
+ // The representation of a path differs from the surface syntax, which
+ // simplifies most algorithms.
+ //
+ // Paths ending in '/' are represented with an anonymous "..." wildcard.
+ // For example, the path "a/" is represented as a literal segment "a" followed
+ // by a segment with multi==true.
+ //
+ // Paths ending in "{$}" are represented with the literal segment "/".
+ // For example, the path "a/{$}" is represented as a literal segment "a" followed
+ // by a literal segment "/".
+ segments []segment
+ loc string // source location of registering call, for helpful messages
+}
+
+// A segment is a pattern piece that matches one or more path segments, or
+// a trailing slash.
+//
+// If wild is false, it matches a literal segment, or, if s == "/", a trailing slash.
+// Examples:
+//
+// "a" => segment{s: "a"}
+// "/{$}" => segment{s: "/"}
+//
+// If wild is true and multi is false, it matches a single path segment.
+// Example:
+//
+// "{x}" => segment{s: "x", wild: true}
+//
+// If both wild and multi are true, it matches all remaining path segments.
+// Example:
+//
+// "{rest...}" => segment{s: "rest", wild: true, multi: true}
+type segment struct {
+ s string // literal or wildcard name or "/" for "/{$}".
+ wild bool
+ multi bool // "..." wildcard
+}
+
+// parsePattern parses a string into a Pattern.
+// The string's syntax is
+//
+// [METHOD] [HOST]/[PATH]
+//
+// where:
+// - METHOD is an HTTP method
+// - HOST is a hostname
+// - PATH consists of slash-separated segments, where each segment is either
+// a literal or a wildcard of the form "{name}", "{name...}", or "{$}".
+//
+// METHOD, HOST and PATH are all optional; that is, the string can be "/".
+// If METHOD is present, it must be followed by a single space.
+// Wildcard names must be valid Go identifiers.
+// The "{$}" and "{name...}" wildcard must occur at the end of PATH.
+// PATH may end with a '/'.
+// Wildcard names in a path must be distinct.
+func parsePattern(s string) (*pattern, error) {
+ if len(s) == 0 {
+ return nil, errors.New("empty pattern")
+ }
+ // TODO(jba): record the rune offset in s to provide more information in errors.
+ method, rest, found := strings.Cut(s, " ")
+ if !found {
+ rest = method
+ method = ""
+ }
+ if method != "" && !validMethod(method) {
+ return nil, fmt.Errorf("net/http: invalid method %q", method)
+ }
+ p := &pattern{str: s, method: method}
+
+ i := strings.IndexByte(rest, '/')
+ if i < 0 {
+ return nil, errors.New("host/path missing /")
+ }
+ p.host = rest[:i]
+ rest = rest[i:]
+ if strings.IndexByte(p.host, '{') >= 0 {
+ return nil, errors.New("host contains '{' (missing initial '/'?)")
+ }
+ // At this point, rest is the path.
+
+ // An unclean path with a method that is not CONNECT can never match,
+ // because paths are cleaned before matching.
+ if method != "" && method != "CONNECT" && rest != cleanPath(rest) {
+ return nil, errors.New("non-CONNECT pattern with unclean path can never match")
+ }
+
+ seenNames := map[string]bool{} // remember wildcard names to catch dups
+ for len(rest) > 0 {
+ // Invariant: rest[0] == '/'.
+ rest = rest[1:]
+ if len(rest) == 0 {
+ // Trailing slash.
+ p.segments = append(p.segments, segment{wild: true, multi: true})
+ break
+ }
+ i := strings.IndexByte(rest, '/')
+ if i < 0 {
+ i = len(rest)
+ }
+ var seg string
+ seg, rest = rest[:i], rest[i:]
+ if i := strings.IndexByte(seg, '{'); i < 0 {
+ // Literal.
+ p.segments = append(p.segments, segment{s: seg})
+ } else {
+ // Wildcard.
+ if i != 0 {
+ return nil, errors.New("bad wildcard segment (must start with '{')")
+ }
+ if seg[len(seg)-1] != '}' {
+ return nil, errors.New("bad wildcard segment (must end with '}')")
+ }
+ name := seg[1 : len(seg)-1]
+ if name == "$" {
+ if len(rest) != 0 {
+ return nil, errors.New("{$} not at end")
+ }
+ p.segments = append(p.segments, segment{s: "/"})
+ break
+ }
+ name, multi := strings.CutSuffix(name, "...")
+ if multi && len(rest) != 0 {
+ return nil, errors.New("{...} wildcard not at end")
+ }
+ if name == "" {
+ return nil, errors.New("empty wildcard")
+ }
+ if !isValidWildcardName(name) {
+ return nil, fmt.Errorf("bad wildcard name %q", name)
+ }
+ if seenNames[name] {
+ return nil, fmt.Errorf("duplicate wildcard name %q", name)
+ }
+ seenNames[name] = true
+ p.segments = append(p.segments, segment{s: name, wild: true, multi: multi})
+ }
+ }
+ return p, nil
+}
+
+func isValidHTTPToken(s string) bool {
+ if s == "" {
+ return false
+ }
+ // See https://www.rfc-editor.org/rfc/rfc9110#section-5.6.2.
+ for _, r := range s {
+ if !unicode.IsLetter(r) && !unicode.IsDigit(r) && !strings.ContainsRune("!#$%&'*+.^_`|~-", r) {
+ return false
+ }
+ }
+ return true
+}
+
+func isValidWildcardName(s string) bool {
+ if s == "" {
+ return false
+ }
+ // Valid Go identifier.
+ for i, c := range s {
+ if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
+ return false
+ }
+ }
+ return true
+}
--- /dev/null
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http
+
+import (
+ "slices"
+ "strings"
+ "testing"
+)
+
+func TestParsePattern(t *testing.T) {
+ lit := func(name string) segment {
+ return segment{s: name}
+ }
+
+ wild := func(name string) segment {
+ return segment{s: name, wild: true}
+ }
+
+ multi := func(name string) segment {
+ s := wild(name)
+ s.multi = true
+ return s
+ }
+
+ for _, test := range []struct {
+ in string
+ want pattern
+ }{
+ {"/", pattern{segments: []segment{multi("")}}},
+ {"/a", pattern{segments: []segment{lit("a")}}},
+ {
+ "/a/",
+ pattern{segments: []segment{lit("a"), multi("")}},
+ },
+ {"/path/to/something", pattern{segments: []segment{
+ lit("path"), lit("to"), lit("something"),
+ }}},
+ {
+ "/{w1}/lit/{w2}",
+ pattern{
+ segments: []segment{wild("w1"), lit("lit"), wild("w2")},
+ },
+ },
+ {
+ "/{w1}/lit/{w2}/",
+ pattern{
+ segments: []segment{wild("w1"), lit("lit"), wild("w2"), multi("")},
+ },
+ },
+ {
+ "example.com/",
+ pattern{host: "example.com", segments: []segment{multi("")}},
+ },
+ {
+ "GET /",
+ pattern{method: "GET", segments: []segment{multi("")}},
+ },
+ {
+ "POST example.com/foo/{w}",
+ pattern{
+ method: "POST",
+ host: "example.com",
+ segments: []segment{lit("foo"), wild("w")},
+ },
+ },
+ {
+ "/{$}",
+ pattern{segments: []segment{lit("/")}},
+ },
+ {
+ "DELETE example.com/a/{foo12}/{$}",
+ pattern{method: "DELETE", host: "example.com", segments: []segment{lit("a"), wild("foo12"), lit("/")}},
+ },
+ {
+ "/foo/{$}",
+ pattern{segments: []segment{lit("foo"), lit("/")}},
+ },
+ {
+ "/{a}/foo/{rest...}",
+ pattern{segments: []segment{wild("a"), lit("foo"), multi("rest")}},
+ },
+ {
+ "//",
+ pattern{segments: []segment{lit(""), multi("")}},
+ },
+ {
+ "/foo///./../bar",
+ pattern{segments: []segment{lit("foo"), lit(""), lit(""), lit("."), lit(".."), lit("bar")}},
+ },
+ {
+ "a.com/foo//",
+ pattern{host: "a.com", segments: []segment{lit("foo"), lit(""), multi("")}},
+ },
+ } {
+ got := mustParsePattern(t, test.in)
+ if !got.equal(&test.want) {
+ t.Errorf("%q:\ngot %#v\nwant %#v", test.in, got, &test.want)
+ }
+ }
+}
+
+func TestParsePatternError(t *testing.T) {
+ for _, test := range []struct {
+ in string
+ contains string
+ }{
+ {"", "empty pattern"},
+ {"A=B /", "invalid method"},
+ {" ", "missing /"},
+ {"/{w}x", "bad wildcard segment"},
+ {"/x{w}", "bad wildcard segment"},
+ {"/{wx", "bad wildcard segment"},
+ {"/{a$}", "bad wildcard name"},
+ {"/{}", "empty wildcard"},
+ {"/{...}", "empty wildcard"},
+ {"/{$...}", "bad wildcard"},
+ {"/{$}/", "{$} not at end"},
+ {"/{$}/x", "{$} not at end"},
+ {"/{a...}/", "not at end"},
+ {"/{a...}/x", "not at end"},
+ {"{a}/b", "missing initial '/'"},
+ {"/a/{x}/b/{x...}", "duplicate wildcard name"},
+ {"GET //", "unclean path"},
+ } {
+ _, err := parsePattern(test.in)
+ if err == nil || !strings.Contains(err.Error(), test.contains) {
+ t.Errorf("%q:\ngot %v, want error containing %q", test.in, err, test.contains)
+ }
+ }
+}
+
+func (p1 *pattern) equal(p2 *pattern) bool {
+ return p1.method == p2.method && p1.host == p2.host &&
+ slices.Equal(p1.segments, p2.segments)
+}
+
+func TestIsValidHTTPToken(t *testing.T) {
+ for _, test := range []struct {
+ in string
+ want bool
+ }{
+ {"", false},
+ {"GET", true},
+ {"get", true},
+ {"white space", false},
+ {"#!~", true},
+ {"a-b1_2", true},
+ {"notok)", false},
+ } {
+ got := isValidHTTPToken(test.in)
+ if g, w := got, test.want; g != w {
+ t.Errorf("%q: got %t, want %t", test.in, g, w)
+ }
+ }
+}
+
+func mustParsePattern(t *testing.T, s string) *pattern {
+ t.Helper()
+ p, err := parsePattern(s)
+ if err != nil {
+ t.Fatal(err)
+ }
+ return p
+}