Parsing speed increase of facter 2/3

Implemented my own little tokenizer in scanner.go. I only use the stuff I need.
2012-02-21 22:41:00 +01:00 · 2012-02-21 22:41:00 +01:00 · f9d3068eb9
parent f8fb563af9
commit f9d3068eb9
4 changed files with 38 additions and 47 deletions
--- a/README.markdown
+++ b/README.markdown
@ -19,7 +19,7 @@ Features:
 * RFC 1035 zone file parsing;
 * Fast: 
    * reply speed around 35/40K qps (Faster hardware -> more qps);
-    * Parsing RRs (zone files) with 35/40K RR/s, that's 5M records in about 125 seconds;
+    * Parsing RRs (zone files) with 80/90K RR/s, that's 5M records in about 58 seconds;
    * This is expected to be optimized further.
 * Client and server side programming (mimicking the net/http package);
 * Asynchronous queries/replies for client and server;
--- a/kscan.go
+++ b/kscan.go
@ -6,7 +6,6 @@ import (
 	"io"
 	"math/big"
 	"strings"
-	"text/scanner"
 )

 // ReadPrivateKey reads a private key from the io.Reader q.
@ -92,13 +91,10 @@ func readPrivateKeyECDSA(m map[string]string) (PrivateKey, error) {
 // parseKey reads a private key from r. It returns a map[string]string,
 // with the key-value pairs, or an error when the file is not correct.
 func parseKey(r io.Reader, file string) (map[string]string, error) {
-	var s scanner.Scanner
+        s := scanInit(r)
 	m := make(map[string]string)
 	c := make(chan lex)
 	k := ""
-	s.Init(r)
-	s.Mode = 0
-	s.Whitespace = 0
 	// Start the lexer
 	go klexer(s, c)
 	for l := range c {
@ -119,18 +115,18 @@ func parseKey(r io.Reader, file string) (map[string]string, error) {
 }

 // klexer scans the sourcefile and returns tokens on the channel c.
-func klexer(s scanner.Scanner, c chan lex) {
+func klexer(s *scan, c chan lex) {
 	var l lex
 	str := "" // Hold the current read text
 	commt := false
 	key := true
-	tok := s.Scan()
+	x, err := s.tokenText()
 	defer close(c)
-	for tok != scanner.EOF {
-		l.column = s.Position.Column
-		l.line = s.Position.Line
-		switch x := s.TokenText(); x {
-		case ":":
+	for err == nil {
+		l.column = s.position.Column
+		l.line = s.position.Line
+		switch x {
+		case ':':
 			if commt {
 				break
 			}
@ -139,15 +135,15 @@ func klexer(s scanner.Scanner, c chan lex) {
 				l.value = _KEY
 				c <- l
 				// Next token is a space, eat it
-				s.Scan()
+				s.tokenText()
 				key = false
 				str = ""
 			} else {
 				l.value = _VALUE
 			}
-		case ";":
+		case ';':
 			commt = true
-		case "\n":
+		case '\n':
 			if commt {
 				// Reset a comment
 				commt = false
@ -162,9 +158,9 @@ func klexer(s scanner.Scanner, c chan lex) {
 			if commt {
 				break
 			}
-			str += x
+			str += string(x)
 		}
-		tok = s.Scan()
+                x, err = s.tokenText()
 	}
 	if len(str) > 0 {
 		// Send remainder
--- a/scanner.go
+++ b/scanner.go
@ -9,7 +9,7 @@ import (

 type scan struct {
 	src *bufio.Reader
-        scanner.Position
+        position scanner.Position
 }

 func scanInit(r io.Reader) *scan {
@ -25,9 +25,9 @@ func (s *scan) tokenText() (byte, error) {
                return c, err
        }
        if c == '\n' {
-                s.Postion.Line++
-                s.Postion.Column = 0
+                s.position.Line++
+                s.position.Column = 0
        }
-        s.Position.Column++
+        s.position.Column++
        return c, nil
 }
--- a/zscan.go
+++ b/zscan.go
@ -6,7 +6,6 @@ import (
 	"os"
 	"strconv"
 	"strings"
-	"text/scanner"
 )

 // Only used when debugging the parser itself.
@ -128,11 +127,8 @@ func parseZone(r io.Reader, origin, f string, t chan Token, include int) {
 			close(t)
 		}
 	}()
-	var s scanner.Scanner
+	s := scanInit(r)
 	c := make(chan lex)
-	s.Init(r)
-	s.Mode = 0
-	s.Whitespace = 0
 	// Start the lexer
 	go zlexer(s, c)
 	// 6 possible beginnings of a line, _ is a space
@ -453,7 +449,7 @@ func (l lex) String() string {
 }

 // zlexer scans the sourcefile and returns tokens on the channel c.
-func zlexer(s scanner.Scanner, c chan lex) {
+func zlexer(s *scan, c chan lex) {
 	var l lex
 	str := make([]byte, maxTok) // Should be enough for any token
 	stri := 0                   // Offset in str (0 means empty)
@ -464,23 +460,22 @@ func zlexer(s scanner.Scanner, c chan lex) {
 	rrtype := false
 	owner := true
 	brace := 0
-	tok := s.Scan()
+	x, err := s.tokenText()
 	defer close(c)
-	for tok != scanner.EOF {
-		l.column = s.Position.Column
-		l.line = s.Position.Line
+	for err == nil {
+		l.column = s.position.Column
+		l.line = s.position.Line
 		if stri > maxTok {
 			l.err = "tok length insufficient for parsing"
 			c <- l
 			return
 		}
-		// Each token we get is one byte, so we switch on that x[0]. This
-		// avoids a len(x) that Go otherwise will perform when comparing strings.
-		switch x := s.TokenText(); x[0] {
+
+		switch x {
 		case ' ', '\t':
 			if quote {
 				// Inside quotes this is legal
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
@ -541,13 +536,13 @@ func zlexer(s scanner.Scanner, c chan lex) {
 		case ';':
 			if quote {
 				// Inside quotes this is legal
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
 			if escape {
 				escape = false
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
@ -564,7 +559,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
 		case '\n':
 			// Escaped newline
 			if quote {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
@ -612,12 +607,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			if escape {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				escape = false
 				break
 			}
-			str[stri] = x[0]
+			str[stri] = x
 			stri++
 			escape = true
 		case '"':
@ -625,7 +620,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			if escape {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				escape = false
 				break
@ -644,7 +639,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
 			quote = !quote
 		case '(', ')':
 			if quote {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
@ -652,12 +647,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			if escape {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				escape = false
 				break
 			}
-			switch x[0] {
+			switch x {
 			case ')':
 				brace--
 				if brace < 0 {
@ -673,11 +668,11 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			escape = false
-			str[stri] = x[0]
+			str[stri] = x
 			stri++
 			space = false
 		}
-		tok = s.Scan()
+		x, err = s.tokenText()
 	}
 	// Hmm.
 	if stri > 0 {