From f9d3068eb97f232644639c36572cea2f15212648 Mon Sep 17 00:00:00 2001 From: Miek Gieben Date: Tue, 21 Feb 2012 22:41:00 +0100 Subject: [PATCH] Parsing speed increase of facter 2/3 Implemented my own little tokenizer in scanner.go. I only use the stuff I need. --- README.markdown | 2 +- kscan.go | 30 +++++++++++++----------------- scanner.go | 8 ++++---- zscan.go | 45 ++++++++++++++++++++------------------------- 4 files changed, 38 insertions(+), 47 deletions(-) diff --git a/README.markdown b/README.markdown index ec656446..28f08471 100644 --- a/README.markdown +++ b/README.markdown @@ -19,7 +19,7 @@ Features: * RFC 1035 zone file parsing; * Fast: * reply speed around 35/40K qps (Faster hardware -> more qps); - * Parsing RRs (zone files) with 35/40K RR/s, that's 5M records in about 125 seconds; + * Parsing RRs (zone files) with 80/90K RR/s, that's 5M records in about 58 seconds; * This is expected to be optimized further. * Client and server side programming (mimicking the net/http package); * Asynchronous queries/replies for client and server; diff --git a/kscan.go b/kscan.go index f05f7e58..47427368 100644 --- a/kscan.go +++ b/kscan.go @@ -6,7 +6,6 @@ import ( "io" "math/big" "strings" - "text/scanner" ) // ReadPrivateKey reads a private key from the io.Reader q. @@ -92,13 +91,10 @@ func readPrivateKeyECDSA(m map[string]string) (PrivateKey, error) { // parseKey reads a private key from r. It returns a map[string]string, // with the key-value pairs, or an error when the file is not correct. func parseKey(r io.Reader, file string) (map[string]string, error) { - var s scanner.Scanner + s := scanInit(r) m := make(map[string]string) c := make(chan lex) k := "" - s.Init(r) - s.Mode = 0 - s.Whitespace = 0 // Start the lexer go klexer(s, c) for l := range c { @@ -119,18 +115,18 @@ func parseKey(r io.Reader, file string) (map[string]string, error) { } // klexer scans the sourcefile and returns tokens on the channel c. -func klexer(s scanner.Scanner, c chan lex) { +func klexer(s *scan, c chan lex) { var l lex str := "" // Hold the current read text commt := false key := true - tok := s.Scan() + x, err := s.tokenText() defer close(c) - for tok != scanner.EOF { - l.column = s.Position.Column - l.line = s.Position.Line - switch x := s.TokenText(); x { - case ":": + for err == nil { + l.column = s.position.Column + l.line = s.position.Line + switch x { + case ':': if commt { break } @@ -139,15 +135,15 @@ func klexer(s scanner.Scanner, c chan lex) { l.value = _KEY c <- l // Next token is a space, eat it - s.Scan() + s.tokenText() key = false str = "" } else { l.value = _VALUE } - case ";": + case ';': commt = true - case "\n": + case '\n': if commt { // Reset a comment commt = false @@ -162,9 +158,9 @@ func klexer(s scanner.Scanner, c chan lex) { if commt { break } - str += x + str += string(x) } - tok = s.Scan() + x, err = s.tokenText() } if len(str) > 0 { // Send remainder diff --git a/scanner.go b/scanner.go index 927a1d18..ebf255e4 100644 --- a/scanner.go +++ b/scanner.go @@ -9,7 +9,7 @@ import ( type scan struct { src *bufio.Reader - scanner.Position + position scanner.Position } func scanInit(r io.Reader) *scan { @@ -25,9 +25,9 @@ func (s *scan) tokenText() (byte, error) { return c, err } if c == '\n' { - s.Postion.Line++ - s.Postion.Column = 0 + s.position.Line++ + s.position.Column = 0 } - s.Position.Column++ + s.position.Column++ return c, nil } diff --git a/zscan.go b/zscan.go index 5eaca6cc..016db934 100644 --- a/zscan.go +++ b/zscan.go @@ -6,7 +6,6 @@ import ( "os" "strconv" "strings" - "text/scanner" ) // Only used when debugging the parser itself. @@ -128,11 +127,8 @@ func parseZone(r io.Reader, origin, f string, t chan Token, include int) { close(t) } }() - var s scanner.Scanner + s := scanInit(r) c := make(chan lex) - s.Init(r) - s.Mode = 0 - s.Whitespace = 0 // Start the lexer go zlexer(s, c) // 6 possible beginnings of a line, _ is a space @@ -453,7 +449,7 @@ func (l lex) String() string { } // zlexer scans the sourcefile and returns tokens on the channel c. -func zlexer(s scanner.Scanner, c chan lex) { +func zlexer(s *scan, c chan lex) { var l lex str := make([]byte, maxTok) // Should be enough for any token stri := 0 // Offset in str (0 means empty) @@ -464,23 +460,22 @@ func zlexer(s scanner.Scanner, c chan lex) { rrtype := false owner := true brace := 0 - tok := s.Scan() + x, err := s.tokenText() defer close(c) - for tok != scanner.EOF { - l.column = s.Position.Column - l.line = s.Position.Line + for err == nil { + l.column = s.position.Column + l.line = s.position.Line if stri > maxTok { l.err = "tok length insufficient for parsing" c <- l return } - // Each token we get is one byte, so we switch on that x[0]. This - // avoids a len(x) that Go otherwise will perform when comparing strings. - switch x := s.TokenText(); x[0] { + + switch x { case ' ', '\t': if quote { // Inside quotes this is legal - str[stri] = x[0] + str[stri] = x stri++ break } @@ -541,13 +536,13 @@ func zlexer(s scanner.Scanner, c chan lex) { case ';': if quote { // Inside quotes this is legal - str[stri] = x[0] + str[stri] = x stri++ break } if escape { escape = false - str[stri] = x[0] + str[stri] = x stri++ break } @@ -564,7 +559,7 @@ func zlexer(s scanner.Scanner, c chan lex) { case '\n': // Escaped newline if quote { - str[stri] = x[0] + str[stri] = x stri++ break } @@ -612,12 +607,12 @@ func zlexer(s scanner.Scanner, c chan lex) { break } if escape { - str[stri] = x[0] + str[stri] = x stri++ escape = false break } - str[stri] = x[0] + str[stri] = x stri++ escape = true case '"': @@ -625,7 +620,7 @@ func zlexer(s scanner.Scanner, c chan lex) { break } if escape { - str[stri] = x[0] + str[stri] = x stri++ escape = false break @@ -644,7 +639,7 @@ func zlexer(s scanner.Scanner, c chan lex) { quote = !quote case '(', ')': if quote { - str[stri] = x[0] + str[stri] = x stri++ break } @@ -652,12 +647,12 @@ func zlexer(s scanner.Scanner, c chan lex) { break } if escape { - str[stri] = x[0] + str[stri] = x stri++ escape = false break } - switch x[0] { + switch x { case ')': brace-- if brace < 0 { @@ -673,11 +668,11 @@ func zlexer(s scanner.Scanner, c chan lex) { break } escape = false - str[stri] = x[0] + str[stri] = x stri++ space = false } - tok = s.Scan() + x, err = s.tokenText() } // Hmm. if stri > 0 {