From 5b21d9046aaf86e5929676c24b5673109f4b4796 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Sun, 28 Aug 2022 17:01:09 +0300 Subject: [PATCH] Faster key-value parsing --- r.go | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/r.go b/r.go index 483fc2a..dc6084c 100644 --- a/r.go +++ b/r.go @@ -21,12 +21,9 @@ import ( "bufio" "errors" "io" - "regexp" "strings" ) -var KeyValRe = regexp.MustCompile(`([a-zA-Z%][a-zA-Z0-9_]*):\s*(.*)$`) - type Reader struct { scanner *bufio.Scanner } @@ -37,6 +34,31 @@ func NewReader(r io.Reader) *Reader { return &Reader{bufio.NewScanner(r)} } +func getKeyValue(text string) (string, string) { + cols := strings.SplitN(text, ":", 2) + if len(cols) != 2 { + return "", "" + } + k := cols[0] + if len(k) == 0 { + return "", "" + } + if !((k[0] == '%') || + ('a' <= k[0] && k[0] <= 'z') || + ('A' <= k[0] && k[0] <= 'Z')) { + return "", "" + } + for _, c := range k { + if !((c == '_') || + ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9')) { + return "", "" + } + } + return k, strings.TrimPrefix(cols[1], " ") +} + // Get next record. Each record is just a collection of fields. io.EOF // is returned if there is nothing to read more. func (r *Reader) Next() ([]Field, error) { @@ -94,12 +116,10 @@ func (r *Reader) Next() ([]Field, error) { break } - matches := KeyValRe.FindStringSubmatch(text) - if len(matches) == 0 { + name, line = getKeyValue(text) + if name == "" { return fields, errors.New("invalid field format") } - name = matches[1] - line = matches[2] if len(line) > 0 && line[len(line)-1] == '\\' { continuation = true -- 2.44.0