Parsing speed increase of facter 2/3

Implemented my own little tokenizer in scanner.go. I only use the
stuff I need.
This commit is contained in:
Miek Gieben 2012-02-21 22:41:00 +01:00
parent f8fb563af9
commit f9d3068eb9
4 changed files with 38 additions and 47 deletions

View File

@ -19,7 +19,7 @@ Features:
* RFC 1035 zone file parsing;
* Fast:
* reply speed around 35/40K qps (Faster hardware -> more qps);
* Parsing RRs (zone files) with 35/40K RR/s, that's 5M records in about 125 seconds;
* Parsing RRs (zone files) with 80/90K RR/s, that's 5M records in about 58 seconds;
* This is expected to be optimized further.
* Client and server side programming (mimicking the net/http package);
* Asynchronous queries/replies for client and server;

View File

@ -6,7 +6,6 @@ import (
"io"
"math/big"
"strings"
"text/scanner"
)
// ReadPrivateKey reads a private key from the io.Reader q.
@ -92,13 +91,10 @@ func readPrivateKeyECDSA(m map[string]string) (PrivateKey, error) {
// parseKey reads a private key from r. It returns a map[string]string,
// with the key-value pairs, or an error when the file is not correct.
func parseKey(r io.Reader, file string) (map[string]string, error) {
var s scanner.Scanner
s := scanInit(r)
m := make(map[string]string)
c := make(chan lex)
k := ""
s.Init(r)
s.Mode = 0
s.Whitespace = 0
// Start the lexer
go klexer(s, c)
for l := range c {
@ -119,18 +115,18 @@ func parseKey(r io.Reader, file string) (map[string]string, error) {
}
// klexer scans the sourcefile and returns tokens on the channel c.
func klexer(s scanner.Scanner, c chan lex) {
func klexer(s *scan, c chan lex) {
var l lex
str := "" // Hold the current read text
commt := false
key := true
tok := s.Scan()
x, err := s.tokenText()
defer close(c)
for tok != scanner.EOF {
l.column = s.Position.Column
l.line = s.Position.Line
switch x := s.TokenText(); x {
case ":":
for err == nil {
l.column = s.position.Column
l.line = s.position.Line
switch x {
case ':':
if commt {
break
}
@ -139,15 +135,15 @@ func klexer(s scanner.Scanner, c chan lex) {
l.value = _KEY
c <- l
// Next token is a space, eat it
s.Scan()
s.tokenText()
key = false
str = ""
} else {
l.value = _VALUE
}
case ";":
case ';':
commt = true
case "\n":
case '\n':
if commt {
// Reset a comment
commt = false
@ -162,9 +158,9 @@ func klexer(s scanner.Scanner, c chan lex) {
if commt {
break
}
str += x
str += string(x)
}
tok = s.Scan()
x, err = s.tokenText()
}
if len(str) > 0 {
// Send remainder

View File

@ -9,7 +9,7 @@ import (
type scan struct {
src *bufio.Reader
scanner.Position
position scanner.Position
}
func scanInit(r io.Reader) *scan {
@ -25,9 +25,9 @@ func (s *scan) tokenText() (byte, error) {
return c, err
}
if c == '\n' {
s.Postion.Line++
s.Postion.Column = 0
s.position.Line++
s.position.Column = 0
}
s.Position.Column++
s.position.Column++
return c, nil
}

View File

@ -6,7 +6,6 @@ import (
"os"
"strconv"
"strings"
"text/scanner"
)
// Only used when debugging the parser itself.
@ -128,11 +127,8 @@ func parseZone(r io.Reader, origin, f string, t chan Token, include int) {
close(t)
}
}()
var s scanner.Scanner
s := scanInit(r)
c := make(chan lex)
s.Init(r)
s.Mode = 0
s.Whitespace = 0
// Start the lexer
go zlexer(s, c)
// 6 possible beginnings of a line, _ is a space
@ -453,7 +449,7 @@ func (l lex) String() string {
}
// zlexer scans the sourcefile and returns tokens on the channel c.
func zlexer(s scanner.Scanner, c chan lex) {
func zlexer(s *scan, c chan lex) {
var l lex
str := make([]byte, maxTok) // Should be enough for any token
stri := 0 // Offset in str (0 means empty)
@ -464,23 +460,22 @@ func zlexer(s scanner.Scanner, c chan lex) {
rrtype := false
owner := true
brace := 0
tok := s.Scan()
x, err := s.tokenText()
defer close(c)
for tok != scanner.EOF {
l.column = s.Position.Column
l.line = s.Position.Line
for err == nil {
l.column = s.position.Column
l.line = s.position.Line
if stri > maxTok {
l.err = "tok length insufficient for parsing"
c <- l
return
}
// Each token we get is one byte, so we switch on that x[0]. This
// avoids a len(x) that Go otherwise will perform when comparing strings.
switch x := s.TokenText(); x[0] {
switch x {
case ' ', '\t':
if quote {
// Inside quotes this is legal
str[stri] = x[0]
str[stri] = x
stri++
break
}
@ -541,13 +536,13 @@ func zlexer(s scanner.Scanner, c chan lex) {
case ';':
if quote {
// Inside quotes this is legal
str[stri] = x[0]
str[stri] = x
stri++
break
}
if escape {
escape = false
str[stri] = x[0]
str[stri] = x
stri++
break
}
@ -564,7 +559,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
case '\n':
// Escaped newline
if quote {
str[stri] = x[0]
str[stri] = x
stri++
break
}
@ -612,12 +607,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
break
}
if escape {
str[stri] = x[0]
str[stri] = x
stri++
escape = false
break
}
str[stri] = x[0]
str[stri] = x
stri++
escape = true
case '"':
@ -625,7 +620,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
break
}
if escape {
str[stri] = x[0]
str[stri] = x
stri++
escape = false
break
@ -644,7 +639,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
quote = !quote
case '(', ')':
if quote {
str[stri] = x[0]
str[stri] = x
stri++
break
}
@ -652,12 +647,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
break
}
if escape {
str[stri] = x[0]
str[stri] = x
stri++
escape = false
break
}
switch x[0] {
switch x {
case ')':
brace--
if brace < 0 {
@ -673,11 +668,11 @@ func zlexer(s scanner.Scanner, c chan lex) {
break
}
escape = false
str[stri] = x[0]
str[stri] = x
stri++
space = false
}
tok = s.Scan()
x, err = s.tokenText()
}
// Hmm.
if stri > 0 {