Parsing speed increase of facter 2/3

Implemented my own little tokenizer in scanner.go. I only use the
stuff I need.
This commit is contained in:
Miek Gieben 2012-02-21 22:41:00 +01:00
parent f8fb563af9
commit f9d3068eb9
4 changed files with 38 additions and 47 deletions

View File

@ -19,7 +19,7 @@ Features:
* RFC 1035 zone file parsing; * RFC 1035 zone file parsing;
* Fast: * Fast:
* reply speed around 35/40K qps (Faster hardware -> more qps); * reply speed around 35/40K qps (Faster hardware -> more qps);
* Parsing RRs (zone files) with 35/40K RR/s, that's 5M records in about 125 seconds; * Parsing RRs (zone files) with 80/90K RR/s, that's 5M records in about 58 seconds;
* This is expected to be optimized further. * This is expected to be optimized further.
* Client and server side programming (mimicking the net/http package); * Client and server side programming (mimicking the net/http package);
* Asynchronous queries/replies for client and server; * Asynchronous queries/replies for client and server;

View File

@ -6,7 +6,6 @@ import (
"io" "io"
"math/big" "math/big"
"strings" "strings"
"text/scanner"
) )
// ReadPrivateKey reads a private key from the io.Reader q. // ReadPrivateKey reads a private key from the io.Reader q.
@ -92,13 +91,10 @@ func readPrivateKeyECDSA(m map[string]string) (PrivateKey, error) {
// parseKey reads a private key from r. It returns a map[string]string, // parseKey reads a private key from r. It returns a map[string]string,
// with the key-value pairs, or an error when the file is not correct. // with the key-value pairs, or an error when the file is not correct.
func parseKey(r io.Reader, file string) (map[string]string, error) { func parseKey(r io.Reader, file string) (map[string]string, error) {
var s scanner.Scanner s := scanInit(r)
m := make(map[string]string) m := make(map[string]string)
c := make(chan lex) c := make(chan lex)
k := "" k := ""
s.Init(r)
s.Mode = 0
s.Whitespace = 0
// Start the lexer // Start the lexer
go klexer(s, c) go klexer(s, c)
for l := range c { for l := range c {
@ -119,18 +115,18 @@ func parseKey(r io.Reader, file string) (map[string]string, error) {
} }
// klexer scans the sourcefile and returns tokens on the channel c. // klexer scans the sourcefile and returns tokens on the channel c.
func klexer(s scanner.Scanner, c chan lex) { func klexer(s *scan, c chan lex) {
var l lex var l lex
str := "" // Hold the current read text str := "" // Hold the current read text
commt := false commt := false
key := true key := true
tok := s.Scan() x, err := s.tokenText()
defer close(c) defer close(c)
for tok != scanner.EOF { for err == nil {
l.column = s.Position.Column l.column = s.position.Column
l.line = s.Position.Line l.line = s.position.Line
switch x := s.TokenText(); x { switch x {
case ":": case ':':
if commt { if commt {
break break
} }
@ -139,15 +135,15 @@ func klexer(s scanner.Scanner, c chan lex) {
l.value = _KEY l.value = _KEY
c <- l c <- l
// Next token is a space, eat it // Next token is a space, eat it
s.Scan() s.tokenText()
key = false key = false
str = "" str = ""
} else { } else {
l.value = _VALUE l.value = _VALUE
} }
case ";": case ';':
commt = true commt = true
case "\n": case '\n':
if commt { if commt {
// Reset a comment // Reset a comment
commt = false commt = false
@ -162,9 +158,9 @@ func klexer(s scanner.Scanner, c chan lex) {
if commt { if commt {
break break
} }
str += x str += string(x)
} }
tok = s.Scan() x, err = s.tokenText()
} }
if len(str) > 0 { if len(str) > 0 {
// Send remainder // Send remainder

View File

@ -9,7 +9,7 @@ import (
type scan struct { type scan struct {
src *bufio.Reader src *bufio.Reader
scanner.Position position scanner.Position
} }
func scanInit(r io.Reader) *scan { func scanInit(r io.Reader) *scan {
@ -25,9 +25,9 @@ func (s *scan) tokenText() (byte, error) {
return c, err return c, err
} }
if c == '\n' { if c == '\n' {
s.Postion.Line++ s.position.Line++
s.Postion.Column = 0 s.position.Column = 0
} }
s.Position.Column++ s.position.Column++
return c, nil return c, nil
} }

View File

@ -6,7 +6,6 @@ import (
"os" "os"
"strconv" "strconv"
"strings" "strings"
"text/scanner"
) )
// Only used when debugging the parser itself. // Only used when debugging the parser itself.
@ -128,11 +127,8 @@ func parseZone(r io.Reader, origin, f string, t chan Token, include int) {
close(t) close(t)
} }
}() }()
var s scanner.Scanner s := scanInit(r)
c := make(chan lex) c := make(chan lex)
s.Init(r)
s.Mode = 0
s.Whitespace = 0
// Start the lexer // Start the lexer
go zlexer(s, c) go zlexer(s, c)
// 6 possible beginnings of a line, _ is a space // 6 possible beginnings of a line, _ is a space
@ -453,7 +449,7 @@ func (l lex) String() string {
} }
// zlexer scans the sourcefile and returns tokens on the channel c. // zlexer scans the sourcefile and returns tokens on the channel c.
func zlexer(s scanner.Scanner, c chan lex) { func zlexer(s *scan, c chan lex) {
var l lex var l lex
str := make([]byte, maxTok) // Should be enough for any token str := make([]byte, maxTok) // Should be enough for any token
stri := 0 // Offset in str (0 means empty) stri := 0 // Offset in str (0 means empty)
@ -464,23 +460,22 @@ func zlexer(s scanner.Scanner, c chan lex) {
rrtype := false rrtype := false
owner := true owner := true
brace := 0 brace := 0
tok := s.Scan() x, err := s.tokenText()
defer close(c) defer close(c)
for tok != scanner.EOF { for err == nil {
l.column = s.Position.Column l.column = s.position.Column
l.line = s.Position.Line l.line = s.position.Line
if stri > maxTok { if stri > maxTok {
l.err = "tok length insufficient for parsing" l.err = "tok length insufficient for parsing"
c <- l c <- l
return return
} }
// Each token we get is one byte, so we switch on that x[0]. This
// avoids a len(x) that Go otherwise will perform when comparing strings. switch x {
switch x := s.TokenText(); x[0] {
case ' ', '\t': case ' ', '\t':
if quote { if quote {
// Inside quotes this is legal // Inside quotes this is legal
str[stri] = x[0] str[stri] = x
stri++ stri++
break break
} }
@ -541,13 +536,13 @@ func zlexer(s scanner.Scanner, c chan lex) {
case ';': case ';':
if quote { if quote {
// Inside quotes this is legal // Inside quotes this is legal
str[stri] = x[0] str[stri] = x
stri++ stri++
break break
} }
if escape { if escape {
escape = false escape = false
str[stri] = x[0] str[stri] = x
stri++ stri++
break break
} }
@ -564,7 +559,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
case '\n': case '\n':
// Escaped newline // Escaped newline
if quote { if quote {
str[stri] = x[0] str[stri] = x
stri++ stri++
break break
} }
@ -612,12 +607,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
break break
} }
if escape { if escape {
str[stri] = x[0] str[stri] = x
stri++ stri++
escape = false escape = false
break break
} }
str[stri] = x[0] str[stri] = x
stri++ stri++
escape = true escape = true
case '"': case '"':
@ -625,7 +620,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
break break
} }
if escape { if escape {
str[stri] = x[0] str[stri] = x
stri++ stri++
escape = false escape = false
break break
@ -644,7 +639,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
quote = !quote quote = !quote
case '(', ')': case '(', ')':
if quote { if quote {
str[stri] = x[0] str[stri] = x
stri++ stri++
break break
} }
@ -652,12 +647,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
break break
} }
if escape { if escape {
str[stri] = x[0] str[stri] = x
stri++ stri++
escape = false escape = false
break break
} }
switch x[0] { switch x {
case ')': case ')':
brace-- brace--
if brace < 0 { if brace < 0 {
@ -673,11 +668,11 @@ func zlexer(s scanner.Scanner, c chan lex) {
break break
} }
escape = false escape = false
str[stri] = x[0] str[stri] = x
stri++ stri++
space = false space = false
} }
tok = s.Scan() x, err = s.tokenText()
} }
// Hmm. // Hmm.
if stri > 0 { if stri > 0 {