]> Cypherpunks.ru repositories - gostls13.git/commitdiff
[release-branch.go1.21] html/template: properly handle special tags within the script...
authorRoland Shoemaker <bracewell@google.com>
Thu, 3 Aug 2023 19:28:28 +0000 (12:28 -0700)
committerCherry Mui <cherryyz@google.com>
Wed, 6 Sep 2023 14:20:20 +0000 (14:20 +0000)
The HTML specification has incredibly complex rules for how to handle
"<!--", "<script", and "</script" when they appear within literals in
the script context. Rather than attempting to apply these restrictions
(which require a significantly more complex state machine) we apply
the workaround suggested in section 4.12.1.3 of the HTML specification [1].

More precisely, when "<!--", "<script", and "</script" appear within
literals (strings and regular expressions, ignoring comments since we
already elide their content) we replace the "<" with "\x3C". This avoids
the unintuitive behavior that using these tags within literals can cause,
by simply preventing the rendered content from triggering it. This may
break some correct usages of these tags, but on balance is more likely
to prevent XSS attacks where users are unknowingly either closing or not
closing the script blocks where they think they are.

Thanks to Takeshi Kaneko (GMO Cybersecurity by Ierae, Inc.) for
reporting this issue.

Fixes #62197
Fixes #62398
Fixes CVE-2023-39319

[1] https://html.spec.whatwg.org/#restrictions-for-contents-of-script-elements

Change-Id: Iab57b0532694827e3eddf57a7497ba1fab1746dc
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1976594
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
Run-TryBot: Roland Shoemaker <bracewell@google.com>
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/2014619
Reviewed-on: https://go-review.googlesource.com/c/go/+/526097
Run-TryBot: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>

src/go/build/deps_test.go
src/html/template/context.go
src/html/template/escape.go
src/html/template/escape_test.go
src/html/template/transition.go

index 2f335068b8a62fea40cc699681d333c983ab243b..bea398fa9566e8928b58463bf2c1ffff6b34a696 100644 (file)
@@ -245,15 +245,15 @@ var depsRules = `
        < text/template
        < internal/lazytemplate;
 
-       encoding/json, html, text/template
-       < html/template;
-
        # regexp
        FMT
        < regexp/syntax
        < regexp
        < internal/lazyregexp;
 
+       encoding/json, html, text/template, regexp
+       < html/template;
+
        # suffix array
        encoding/binary, regexp
        < index/suffixarray;
index e07a0c4a027d06fd1ae74241fc50bd70a287dddc..7987713c65b614f209b1a0e36dcd65236c576676 100644 (file)
@@ -174,6 +174,20 @@ func isInTag(s state) bool {
        return false
 }
 
+// isInScriptLiteral returns true if s is one of the literal states within a
+// <script> tag, and as such occurances of "<!--", "<script", and "</script"
+// need to be treated specially.
+func isInScriptLiteral(s state) bool {
+       // Ignore the comment states (stateJSBlockCmt, stateJSLineCmt,
+       // stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already
+       // omitted from the output.
+       switch s {
+       case stateJSDqStr, stateJSSqStr, stateJSBqStr, stateJSRegexp:
+               return true
+       }
+       return false
+}
+
 // delim is the delimiter that will end the current HTML attribute.
 type delim uint8
 
index 473564c37ad5c939f8a2e15bf0783571cb20240f..01f6303a448a9abbcc5555677b0ae8e0140c8308 100644 (file)
@@ -10,6 +10,7 @@ import (
        "html"
        "internal/godebug"
        "io"
+       "regexp"
        "text/template"
        "text/template/parse"
 )
@@ -729,6 +730,26 @@ var delimEnds = [...]string{
        delimSpaceOrTagEnd: " \t\n\f\r>",
 }
 
+var (
+       // Per WHATWG HTML specification, section 4.12.1.3, there are extremely
+       // complicated rules for how to handle the set of opening tags <!--,
+       // <script, and </script when they appear in JS literals (i.e. strings,
+       // regexs, and comments). The specification suggests a simple solution,
+       // rather than implementing the arcane ABNF, which involves simply escaping
+       // the opening bracket with \x3C. We use the below regex for this, since it
+       // makes doing the case-insensitive find-replace much simpler.
+       specialScriptTagRE          = regexp.MustCompile("(?i)<(script|/script|!--)")
+       specialScriptTagReplacement = []byte("\\x3C$1")
+)
+
+func containsSpecialScriptTag(s []byte) bool {
+       return specialScriptTagRE.Match(s)
+}
+
+func escapeSpecialScriptTags(s []byte) []byte {
+       return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement)
+}
+
 var doctypeBytes = []byte("<!DOCTYPE")
 
 // escapeText escapes a text template node.
@@ -787,6 +808,11 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context {
                        b.Write(s[written:cs])
                        written = i1
                }
+               if isInScriptLiteral(c.state) && containsSpecialScriptTag(s[i:i1]) {
+                       b.Write(s[written:i])
+                       b.Write(escapeSpecialScriptTags(s[i:i1]))
+                       written = i1
+               }
                if i == i1 && c.state == c1.state {
                        panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
                }
index f60c875927011df9e94874565e360759c79c39f4..8a4f62e92fae73ddc753ea4b5e7c14d3af3748e1 100644 (file)
@@ -513,6 +513,21 @@ func TestEscape(t *testing.T) {
                        "<script>#! beep\n</script>",
                        "<script>\n</script>",
                },
+               {
+                       "Special tags in <script> string literals",
+                       `<script>var a = "asd < 123 <!-- 456 < fgh <script jkl < 789 </script"</script>`,
+                       `<script>var a = "asd < 123 \x3C!-- 456 < fgh \x3Cscript jkl < 789 \x3C/script"</script>`,
+               },
+               {
+                       "Special tags in <script> string literals (mixed case)",
+                       `<script>var a = "<!-- <ScripT </ScripT"</script>`,
+                       `<script>var a = "\x3C!-- \x3CScripT \x3C/ScripT"</script>`,
+               },
+               {
+                       "Special tags in <script> regex literals (mixed case)",
+                       `<script>var a = /<!-- <ScripT </ScripT/</script>`,
+                       `<script>var a = /\x3C!-- \x3CScripT \x3C/ScripT/</script>`,
+               },
                {
                        "CSS comments",
                        "<style>p// paragraph\n" +
@@ -1533,8 +1548,38 @@ func TestEscapeText(t *testing.T) {
                        context{state: stateJS, element: elementScript},
                },
                {
+                       // <script and </script tags are escaped, so </script> should not
+                       // cause us to exit the JS state.
                        `<script>document.write("<script>alert(1)</script>");`,
-                       context{state: stateText},
+                       context{state: stateJS, element: elementScript},
+               },
+               {
+                       `<script>document.write("<script>`,
+                       context{state: stateJSDqStr, element: elementScript},
+               },
+               {
+                       `<script>document.write("<script>alert(1)</script>`,
+                       context{state: stateJSDqStr, element: elementScript},
+               },
+               {
+                       `<script>document.write("<script>alert(1)<!--`,
+                       context{state: stateJSDqStr, element: elementScript},
+               },
+               {
+                       `<script>document.write("<script>alert(1)</Script>");`,
+                       context{state: stateJS, element: elementScript},
+               },
+               {
+                       `<script>document.write("<!--");`,
+                       context{state: stateJS, element: elementScript},
+               },
+               {
+                       `<script>let a = /</script`,
+                       context{state: stateJSRegexp, element: elementScript},
+               },
+               {
+                       `<script>let a = /</script/`,
+                       context{state: stateJS, element: elementScript, jsCtx: jsCtxDivOp},
                },
                {
                        `<script type="text/template">`,
index d8ff18abb08200a769f56b83af1311c7181fa13d..432c365d3c381904a8fa1221bdeeb5dd80a04ea1 100644 (file)
@@ -214,6 +214,11 @@ var (
 // element states.
 func tSpecialTagEnd(c context, s []byte) (context, int) {
        if c.element != elementNone {
+               // script end tags ("</script") within script literals are ignored, so that
+               // we can properly escape them.
+               if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
+                       return c, len(s)
+               }
                if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
                        return context{}, i
                }
@@ -353,6 +358,16 @@ func tJSDelimited(c context, s []byte) (context, int) {
                        inCharset = true
                case ']':
                        inCharset = false
+               case '/':
+                       // If "</script" appears in a regex literal, the '/' should not
+                       // close the regex literal, and it will later be escaped to
+                       // "\x3C/script" in escapeText.
+                       if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 {
+                               i++
+                       } else if !inCharset {
+                               c.state, c.jsCtx = stateJS, jsCtxDivOp
+                               return c, i + 1
+                       }
                default:
                        // end delimiter
                        if !inCharset {