From f9d3068eb97f232644639c36572cea2f15212648 Mon Sep 17 00:00:00 2001
From: Miek Gieben <miek@miek.nl>
Date: Tue, 21 Feb 2012 22:41:00 +0100
Subject: [PATCH] Parsing speed increase of facter 2/3

Implemented my own little tokenizer in scanner.go. I only use the
stuff I need.
---
 README.markdown |  2 +-
 kscan.go        | 30 +++++++++++++-----------------
 scanner.go      |  8 ++++----
 zscan.go        | 45 ++++++++++++++++++++-------------------------
 4 files changed, 38 insertions(+), 47 deletions(-)

diff --git a/README.markdown b/README.markdown
index ec656446..28f08471 100644
--- a/README.markdown
+++ b/README.markdown
@@ -19,7 +19,7 @@ Features:
 * RFC 1035 zone file parsing;
 * Fast: 
     * reply speed around 35/40K qps (Faster hardware -> more qps);
-    * Parsing RRs (zone files) with 35/40K RR/s, that's 5M records in about 125 seconds;
+    * Parsing RRs (zone files) with 80/90K RR/s, that's 5M records in about 58 seconds;
     * This is expected to be optimized further.
 * Client and server side programming (mimicking the net/http package);
 * Asynchronous queries/replies for client and server;
diff --git a/kscan.go b/kscan.go
index f05f7e58..47427368 100644
--- a/kscan.go
+++ b/kscan.go
@@ -6,7 +6,6 @@ import (
 	"io"
 	"math/big"
 	"strings"
-	"text/scanner"
 )
 
 // ReadPrivateKey reads a private key from the io.Reader q.
@@ -92,13 +91,10 @@ func readPrivateKeyECDSA(m map[string]string) (PrivateKey, error) {
 // parseKey reads a private key from r. It returns a map[string]string,
 // with the key-value pairs, or an error when the file is not correct.
 func parseKey(r io.Reader, file string) (map[string]string, error) {
-	var s scanner.Scanner
+        s := scanInit(r)
 	m := make(map[string]string)
 	c := make(chan lex)
 	k := ""
-	s.Init(r)
-	s.Mode = 0
-	s.Whitespace = 0
 	// Start the lexer
 	go klexer(s, c)
 	for l := range c {
@@ -119,18 +115,18 @@ func parseKey(r io.Reader, file string) (map[string]string, error) {
 }
 
 // klexer scans the sourcefile and returns tokens on the channel c.
-func klexer(s scanner.Scanner, c chan lex) {
+func klexer(s *scan, c chan lex) {
 	var l lex
 	str := "" // Hold the current read text
 	commt := false
 	key := true
-	tok := s.Scan()
+	x, err := s.tokenText()
 	defer close(c)
-	for tok != scanner.EOF {
-		l.column = s.Position.Column
-		l.line = s.Position.Line
-		switch x := s.TokenText(); x {
-		case ":":
+	for err == nil {
+		l.column = s.position.Column
+		l.line = s.position.Line
+		switch x {
+		case ':':
 			if commt {
 				break
 			}
@@ -139,15 +135,15 @@ func klexer(s scanner.Scanner, c chan lex) {
 				l.value = _KEY
 				c <- l
 				// Next token is a space, eat it
-				s.Scan()
+				s.tokenText()
 				key = false
 				str = ""
 			} else {
 				l.value = _VALUE
 			}
-		case ";":
+		case ';':
 			commt = true
-		case "\n":
+		case '\n':
 			if commt {
 				// Reset a comment
 				commt = false
@@ -162,9 +158,9 @@ func klexer(s scanner.Scanner, c chan lex) {
 			if commt {
 				break
 			}
-			str += x
+			str += string(x)
 		}
-		tok = s.Scan()
+                x, err = s.tokenText()
 	}
 	if len(str) > 0 {
 		// Send remainder
diff --git a/scanner.go b/scanner.go
index 927a1d18..ebf255e4 100644
--- a/scanner.go
+++ b/scanner.go
@@ -9,7 +9,7 @@ import (
 
 type scan struct {
 	src *bufio.Reader
-        scanner.Position
+        position scanner.Position
 }
 
 func scanInit(r io.Reader) *scan {
@@ -25,9 +25,9 @@ func (s *scan) tokenText() (byte, error) {
                 return c, err
         }
         if c == '\n' {
-                s.Postion.Line++
-                s.Postion.Column = 0
+                s.position.Line++
+                s.position.Column = 0
         }
-        s.Position.Column++
+        s.position.Column++
         return c, nil
 }
diff --git a/zscan.go b/zscan.go
index 5eaca6cc..016db934 100644
--- a/zscan.go
+++ b/zscan.go
@@ -6,7 +6,6 @@ import (
 	"os"
 	"strconv"
 	"strings"
-	"text/scanner"
 )
 
 // Only used when debugging the parser itself.
@@ -128,11 +127,8 @@ func parseZone(r io.Reader, origin, f string, t chan Token, include int) {
 			close(t)
 		}
 	}()
-	var s scanner.Scanner
+	s := scanInit(r)
 	c := make(chan lex)
-	s.Init(r)
-	s.Mode = 0
-	s.Whitespace = 0
 	// Start the lexer
 	go zlexer(s, c)
 	// 6 possible beginnings of a line, _ is a space
@@ -453,7 +449,7 @@ func (l lex) String() string {
 }
 
 // zlexer scans the sourcefile and returns tokens on the channel c.
-func zlexer(s scanner.Scanner, c chan lex) {
+func zlexer(s *scan, c chan lex) {
 	var l lex
 	str := make([]byte, maxTok) // Should be enough for any token
 	stri := 0                   // Offset in str (0 means empty)
@@ -464,23 +460,22 @@ func zlexer(s scanner.Scanner, c chan lex) {
 	rrtype := false
 	owner := true
 	brace := 0
-	tok := s.Scan()
+	x, err := s.tokenText()
 	defer close(c)
-	for tok != scanner.EOF {
-		l.column = s.Position.Column
-		l.line = s.Position.Line
+	for err == nil {
+		l.column = s.position.Column
+		l.line = s.position.Line
 		if stri > maxTok {
 			l.err = "tok length insufficient for parsing"
 			c <- l
 			return
 		}
-		// Each token we get is one byte, so we switch on that x[0]. This
-		// avoids a len(x) that Go otherwise will perform when comparing strings.
-		switch x := s.TokenText(); x[0] {
+
+		switch x {
 		case ' ', '\t':
 			if quote {
 				// Inside quotes this is legal
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
@@ -541,13 +536,13 @@ func zlexer(s scanner.Scanner, c chan lex) {
 		case ';':
 			if quote {
 				// Inside quotes this is legal
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
 			if escape {
 				escape = false
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
@@ -564,7 +559,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
 		case '\n':
 			// Escaped newline
 			if quote {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
@@ -612,12 +607,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			if escape {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				escape = false
 				break
 			}
-			str[stri] = x[0]
+			str[stri] = x
 			stri++
 			escape = true
 		case '"':
@@ -625,7 +620,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			if escape {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				escape = false
 				break
@@ -644,7 +639,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
 			quote = !quote
 		case '(', ')':
 			if quote {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				break
 			}
@@ -652,12 +647,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			if escape {
-				str[stri] = x[0]
+				str[stri] = x
 				stri++
 				escape = false
 				break
 			}
-			switch x[0] {
+			switch x {
 			case ')':
 				brace--
 				if brace < 0 {
@@ -673,11 +668,11 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			escape = false
-			str[stri] = x[0]
+			str[stri] = x
 			stri++
 			space = false
 		}
-		tok = s.Scan()
+		x, err = s.tokenText()
 	}
 	// Hmm.
 	if stri > 0 {