Make parsing 50% faster by elminating allocation

* We now use a [1024]byte buffer to store the token
* Return an error when this is to small
This commit is contained in:
Miek Gieben 2012-01-27 23:59:21 +01:00
parent 431b800ae0
commit 06ab0a998a
1 changed files with 47 additions and 32 deletions

View File

@ -11,6 +11,7 @@ import (
// Only used when debugging the parser itself. // Only used when debugging the parser itself.
var _DEBUG = false var _DEBUG = false
const maxTok = 1024
// Tokinize a RFC 1035 zone file. The tokenizer will normalize it: // Tokinize a RFC 1035 zone file. The tokenizer will normalize it:
// * Add ownernames if they are left blank; // * Add ownernames if they are left blank;
@ -88,9 +89,9 @@ type Token struct {
// The class defaults to IN and TTL defaults to DefaultTtl // The class defaults to IN and TTL defaults to DefaultTtl
func NewRR(s string) (RR, error) { func NewRR(s string) (RR, error) {
if s[len(s)-1] != '\n' { // We need a closing newline if s[len(s)-1] != '\n' { // We need a closing newline
return ReadRR(strings.NewReader(s+"\n"), "") return ReadRR(strings.NewReader(s+"\n"), "")
} }
return ReadRR(strings.NewReader(s), "") return ReadRR(strings.NewReader(s), "")
} }
// Ioreader here, or filename which *we* open....??? // Ioreader here, or filename which *we* open....???
@ -98,11 +99,11 @@ func NewRR(s string) (RR, error) {
// ReadRR reads the RR contained in q. Only the first RR is returned. // ReadRR reads the RR contained in q. Only the first RR is returned.
// The class defaults to IN and TTL defaults to DefaultTtl // The class defaults to IN and TTL defaults to DefaultTtl
func ReadRR(q io.Reader, filename string) (RR, error) { func ReadRR(q io.Reader, filename string) (RR, error) {
r := <-ParseZone(q, filename) r := <-ParseZone(q, filename)
if r.Error != nil { if r.Error != nil {
return nil, r.Error return nil, r.Error
} }
return r.RR, nil return r.RR, nil
} }
// ParseZone reads a RFC 1035 zone from r. It returns each parsed RR or on error // ParseZone reads a RFC 1035 zone from r. It returns each parsed RR or on error
@ -194,10 +195,10 @@ func parseZone(r io.Reader, f string, t chan Token, include int) {
t <- Token{Error: &ParseError{f, "Failed to open `" + l.token + "'", l}} t <- Token{Error: &ParseError{f, "Failed to open `" + l.token + "'", l}}
return return
} }
if include + 1 > 7 { if include+1 > 7 {
t <- Token{Error: &ParseError{f, "Too deeply nested $INCLUDE", l}} t <- Token{Error: &ParseError{f, "Too deeply nested $INCLUDE", l}}
return return
} }
parseZone(r1, l.token, t, include+1) parseZone(r1, l.token, t, include+1)
st = _EXPECT_OWNER_DIR st = _EXPECT_OWNER_DIR
case _EXPECT_DIRTTL_BL: case _EXPECT_DIRTTL_BL:
@ -357,7 +358,8 @@ func (l lex) String() string {
// zlexer scans the sourcefile and returns tokens on the channel c. // zlexer scans the sourcefile and returns tokens on the channel c.
func zlexer(s scanner.Scanner, c chan lex) { func zlexer(s scanner.Scanner, c chan lex) {
var l lex var l lex
str := "" // Hold the current read text str := make([]byte, maxTok) // Should be enough for any token
stri := 0 // Offset in str (0 means empty)
quote := false quote := false
escape := false escape := false
space := false space := false
@ -370,21 +372,26 @@ func zlexer(s scanner.Scanner, c chan lex) {
for tok != scanner.EOF { for tok != scanner.EOF {
l.column = s.Position.Column l.column = s.Position.Column
l.line = s.Position.Line l.line = s.Position.Line
if stri > maxTok {
l.err = "tok length insufficient for parsing"
c <- l
return
}
switch x := s.TokenText(); x { switch x := s.TokenText(); x {
case " ", "\t": case " ", "\t":
escape = false escape = false
if commt { if commt {
break break
} }
if str == "" { if stri == 0 {
//l.value = _BLANK //l.value = _BLANK
//l.token = " " //l.token = " "
} else if owner { } else if owner {
// If we have a string and its the first, make it an owner // If we have a string and its the first, make it an owner
l.value = _OWNER l.value = _OWNER
l.token = str l.token = string(str[:stri])
// escape $... start with a \ not a $, so this will work // escape $... start with a \ not a $, so this will work
switch str { switch string(str[:stri]) {
case "$TTL": case "$TTL":
l.value = _DIRTTL l.value = _DIRTTL
case "$ORIGIN": case "$ORIGIN":
@ -395,7 +402,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
c <- l c <- l
} else { } else {
l.value = _STRING l.value = _STRING
l.token = str l.token = string(str[:stri])
if !rrtype { if !rrtype {
if _, ok := Str_rr[strings.ToUpper(l.token)]; ok { if _, ok := Str_rr[strings.ToUpper(l.token)]; ok {
@ -408,7 +415,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
} }
c <- l c <- l
} }
str = "" stri = 0
if !space && !commt { if !space && !commt {
l.value = _BLANK l.value = _BLANK
l.token = " " l.token = " "
@ -419,12 +426,14 @@ func zlexer(s scanner.Scanner, c chan lex) {
case ";": case ";":
if escape { if escape {
escape = false escape = false
str += ";" str[stri] = ';'
stri++
break break
} }
if quote { if quote {
// Inside quoted text we allow ; // Inside quoted text we allow ;
str += ";" str[stri] = ';'
stri++
break break
} }
commt = true commt = true
@ -435,7 +444,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
// Reset a comment // Reset a comment
commt = false commt = false
rrtype = false rrtype = false
str = "" stri = 0
// If not in a brace this ends the comment AND the RR // If not in a brace this ends the comment AND the RR
if brace == 0 { if brace == 0 {
owner = true owner = true
@ -445,9 +454,9 @@ func zlexer(s scanner.Scanner, c chan lex) {
} }
break break
} }
if str != "" { if stri != 0 {
l.value = _STRING l.value = _STRING
l.token = str l.token = string(str[:stri])
if !rrtype { if !rrtype {
if _, ok := Str_rr[strings.ToUpper(l.token)]; ok { if _, ok := Str_rr[strings.ToUpper(l.token)]; ok {
l.value = _RRTYPE l.value = _RRTYPE
@ -471,7 +480,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
space = true space = true
} }
str = "" stri = 0
commt = false commt = false
rrtype = false rrtype = false
owner = true owner = true
@ -480,18 +489,21 @@ func zlexer(s scanner.Scanner, c chan lex) {
break break
} }
if escape { if escape {
str += "\\" str[stri] = '\\'
stri++
escape = false escape = false
break break
} }
str += "\\" str[stri] = '\\'
stri++
escape = true escape = true
case "\"": case "\"":
if commt { if commt {
break break
} }
if escape { if escape {
str += "\"" str[stri] = '"'
stri++
escape = false escape = false
break break
} }
@ -502,7 +514,8 @@ func zlexer(s scanner.Scanner, c chan lex) {
break break
} }
if escape { if escape {
str += "(" str[stri] = '('
stri++
escape = false escape = false
break break
} }
@ -512,13 +525,14 @@ func zlexer(s scanner.Scanner, c chan lex) {
break break
} }
if escape { if escape {
str += ")" str[stri] = ')'
stri++
escape = false escape = false
break break
} }
brace-- brace--
if brace < 0 { if brace < 0 {
l.err = "Extra closing brace" l.err = "extra closing brace"
c <- l c <- l
return return
} }
@ -527,15 +541,16 @@ func zlexer(s scanner.Scanner, c chan lex) {
break break
} }
escape = false escape = false
str += x str[stri] = byte(x[0]) // This should be ok...
stri++
space = false space = false
} }
tok = s.Scan() tok = s.Scan()
} }
// Hmm. // Hmm.
if len(str) > 0 { if stri > 0 {
// Send remainder // Send remainder
l.token = str l.token = string(str[:stri])
l.value = _STRING l.value = _STRING
c <- l c <- l
} }