dns/zscan.go

506 lines
11 KiB
Go
Raw Normal View History

2011-12-14 08:26:31 +00:00
package dns
2011-12-14 08:00:39 +00:00
import (
"fmt"
2011-12-14 08:26:31 +00:00
"io"
2011-12-14 08:00:39 +00:00
"strconv"
"strings"
)
2011-12-16 09:30:42 +00:00
// Only used when debugging the parser itself.
var _DEBUG = false
2011-12-16 09:30:42 +00:00
2011-12-14 08:00:39 +00:00
// Tokinize a RFC 1035 zone file. The tokenizer will normalize it:
2011-12-15 11:27:05 +00:00
// * Add ownernames if they are left blank;
2011-12-14 08:00:39 +00:00
// * Suppress sequences of spaces;
// * Make each RR fit on one line (NEWLINE is send as last)
// * Handle comments: ;
2011-12-15 11:27:05 +00:00
// * Handle braces.
2011-12-14 08:00:39 +00:00
const (
2011-12-15 21:40:07 +00:00
// Zonefile
2011-12-14 14:37:36 +00:00
_EOF = iota // Don't let it start with zero
_STRING
2011-12-14 08:00:39 +00:00
_BLANK
_NEWLINE
_RRTYPE
_OWNER
_CLASS
2011-12-18 16:58:06 +00:00
_DIRORIGIN // $ORIGIN
_DIRTTL // $TTL
_DIRINCLUDE // $INCLUDE
2011-12-14 08:00:39 +00:00
2011-12-15 21:40:07 +00:00
// Privatekey file
_VALUE
_KEY
2011-12-18 16:58:06 +00:00
_EXPECT_OWNER_DIR // Ownername
2011-12-14 14:37:36 +00:00
_EXPECT_OWNER_BL // Whitespace after the ownername
_EXPECT_ANY // Expect rrtype, ttl or class
_EXPECT_ANY_NOCLASS // Expect rrtype or ttl
_EXPECT_ANY_NOCLASS_BL // The Whitespace after _EXPECT_ANY_NOCLASS
_EXPECT_ANY_NOTTL // Expect rrtype or class
_EXPECT_ANY_NOTTL_BL // Whitespace after _EXPECT_ANY_NOTTL
_EXPECT_RRTYPE // Expect rrtype
_EXPECT_RRTYPE_BL // Whitespace BEFORE rrtype
_EXPECT_RDATA // The first element of the rdata
2011-12-18 16:58:06 +00:00
_EXPECT_DIRTTL_BL // Space after directive $TTL
_EXPECT_DIRTTL // Directive $TTL
2011-12-14 08:00:39 +00:00
)
2011-12-16 18:42:23 +00:00
// ParseError contains the parse error and the location in the io.Reader
// where the error occured.
type ParseError struct {
err string
2011-12-16 18:34:30 +00:00
lex lex
}
func (e *ParseError) Error() string {
2011-12-16 13:48:30 +00:00
va := strconv.Itoa(e.lex.value)
s := e.err + ": `" + e.lex.token + "' (value: " + va + ") at line: " +
strconv.Itoa(e.lex.line) + " and column: " +
strconv.Itoa(e.lex.column)
return s
}
2011-12-16 18:34:30 +00:00
type lex struct {
token string // Text of the token
err string // Error text when the lexer detects it. Not used by the grammar
value int // Value: _STRING, _BLANK, etc.
line int // Line in the file
column int // Column in the fil
2011-12-14 08:00:39 +00:00
}
type Token struct {
2011-12-16 13:48:30 +00:00
Rr RR // the scanned resource record
2011-12-16 09:26:32 +00:00
Error *ParseError // when an error occured, this is the specifics
}
// NewRR parses the string s and returns the RR contained in there. If the string
// contains more than one RR, only the first is returned. If an error is detected
2011-12-16 18:42:23 +00:00
// that error is returned.
2011-12-15 21:40:07 +00:00
// If the class is not specified, the IN class is assumed. If the TTL is not
// specified DefaultTtl is assumed.
func NewRR(s string) (RR, error) {
t := make(chan Token)
2011-12-15 21:40:07 +00:00
if s[len(s)-1] != '\n' { // We need a closing newline
2012-01-04 20:31:11 +00:00
t = ParseZone(strings.NewReader(s + "\n"))
2011-12-15 21:40:07 +00:00
} else {
2012-01-04 20:31:11 +00:00
t = ParseZone(strings.NewReader(s))
2011-12-15 21:40:07 +00:00
}
r := <-t
2011-12-16 09:26:32 +00:00
if r.Error != nil {
return nil, r.Error
}
2011-12-16 09:26:32 +00:00
return r.Rr, nil
}
// ParseZone reads a RFC 1035 zone from r. It returns each parsed RR or on error
2011-12-19 18:20:55 +00:00
// on the returned channel. The channel t is closed by ParseZone when the end of r is reached.
2012-01-04 20:31:11 +00:00
func ParseZone(r io.Reader) chan Token {
t := make(chan Token)
go parseZone(r, t)
return t
2011-12-19 18:20:55 +00:00
}
func parseZone(r io.Reader, t chan Token) {
defer close(t)
2011-12-16 18:34:30 +00:00
c := make(chan lex)
// Start the lexer
2012-01-04 20:31:11 +00:00
go zlexer(r, c)
2011-12-14 08:00:39 +00:00
// 5 possible beginnings of a line, _ is a space
// 1. _OWNER _ _RRTYPE -> class/ttl omitted
// 2. _OWNER _ _STRING _ _RRTYPE -> class omitted
// 3. _OWNER _ _STRING _ _CLASS _ _RRTYPE -> ttl/class
// 4. _OWNER _ _CLASS _ _RRTYPE -> ttl omitted
// 5. _OWNER _ _CLASS _ _STRING _ _RRTYPE -> class/ttl (reversed)
// After detecting these, we know the _RRTYPE so we can jump to functions
// handling the rdata for each of these types.
2011-12-18 16:58:06 +00:00
st := _EXPECT_OWNER_DIR
2011-12-14 08:26:31 +00:00
var h RR_Header
2011-12-14 08:00:39 +00:00
var ok bool
2011-12-18 16:58:06 +00:00
var defttl uint32 = DefaultTtl
2011-12-14 08:00:39 +00:00
for l := range c {
2011-12-16 09:30:42 +00:00
if _DEBUG {
2011-12-14 14:37:36 +00:00
fmt.Printf("[%v]\n", l)
}
// Lexer spotted an error already
if l.err != "" {
t <- Token{Error: &ParseError{l.err, l}}
return
}
2011-12-14 08:00:39 +00:00
switch st {
2011-12-18 16:58:06 +00:00
case _EXPECT_OWNER_DIR:
// We can also expect a directive, like $TTL or $ORIGIN
h.Ttl = defttl
h.Class = ClassINET
2011-12-14 08:00:39 +00:00
switch l.value {
2011-12-14 08:26:31 +00:00
case _NEWLINE: // Empty line
2011-12-18 16:58:06 +00:00
st = _EXPECT_OWNER_DIR
2011-12-14 08:00:39 +00:00
case _OWNER:
h.Name = l.token
st = _EXPECT_OWNER_BL
2011-12-18 16:58:06 +00:00
case _DIRTTL:
st = _EXPECT_DIRTTL_BL
2011-12-14 08:00:39 +00:00
default:
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"Error at the start", l}}
return
2011-12-14 08:00:39 +00:00
}
2011-12-18 16:58:06 +00:00
case _EXPECT_DIRTTL_BL:
if l.value != _BLANK {
t <- Token{Error: &ParseError{"No blank after $-directive", l}}
return
}
st = _EXPECT_DIRTTL
case _EXPECT_DIRTTL:
if l.value != _STRING {
t <- Token{Error: &ParseError{"Expecting $TTL value, not this...", l}}
return
}
if ttl, ok := stringToTtl(l, t); !ok {
return
} else {
defttl = ttl
}
st = _EXPECT_OWNER_DIR
2011-12-14 08:26:31 +00:00
case _EXPECT_OWNER_BL:
2011-12-14 08:00:39 +00:00
if l.value != _BLANK {
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"No blank after owner", l}}
return
2011-12-14 08:00:39 +00:00
}
st = _EXPECT_ANY
2011-12-14 08:26:31 +00:00
case _EXPECT_ANY:
2011-12-14 08:00:39 +00:00
switch l.value {
case _RRTYPE:
2011-12-14 13:35:45 +00:00
h.Rrtype, _ = Str_rr[strings.ToUpper(l.token)]
2011-12-14 14:37:36 +00:00
st = _EXPECT_RDATA
2011-12-14 08:00:39 +00:00
case _CLASS:
2011-12-14 08:26:31 +00:00
h.Class, ok = Str_class[strings.ToUpper(l.token)]
2011-12-14 08:00:39 +00:00
if !ok {
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"Unknown class", l}}
return
2011-12-14 08:00:39 +00:00
}
2011-12-14 13:35:45 +00:00
st = _EXPECT_ANY_NOCLASS_BL
2011-12-14 08:26:31 +00:00
case _STRING: // TTL is this case
2011-12-18 16:58:06 +00:00
if ttl, ok := stringToTtl(l, t); !ok {
return
2011-12-14 08:00:39 +00:00
} else {
2011-12-18 16:58:06 +00:00
h.Ttl = ttl
2011-12-14 08:00:39 +00:00
}
st = _EXPECT_ANY_NOTTL_BL
default:
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"Expecting RR type, TTL or class, not this...", l}}
return
2011-12-14 08:00:39 +00:00
}
2011-12-14 13:35:45 +00:00
case _EXPECT_ANY_NOCLASS_BL:
2011-12-14 08:00:39 +00:00
if l.value != _BLANK {
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"No blank before NOCLASS", l}}
return
2011-12-14 08:00:39 +00:00
}
2011-12-14 13:35:45 +00:00
st = _EXPECT_ANY_NOCLASS
2011-12-14 08:00:39 +00:00
case _EXPECT_ANY_NOTTL_BL:
if l.value != _BLANK {
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"No blank before NOTTL", l}}
return
2011-12-14 08:00:39 +00:00
}
st = _EXPECT_ANY_NOTTL
case _EXPECT_ANY_NOTTL:
switch l.value {
case _CLASS:
2011-12-14 08:26:31 +00:00
h.Class, ok = Str_class[strings.ToUpper(l.token)]
2011-12-14 08:00:39 +00:00
if !ok {
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"Unknown class", l}}
return
2011-12-14 08:00:39 +00:00
}
st = _EXPECT_RRTYPE_BL
case _RRTYPE:
2011-12-14 13:35:45 +00:00
h.Rrtype, _ = Str_rr[strings.ToUpper(l.token)]
2011-12-14 14:37:36 +00:00
st = _EXPECT_RDATA
2011-12-14 08:00:39 +00:00
}
2011-12-14 13:35:45 +00:00
case _EXPECT_ANY_NOCLASS:
2011-12-14 08:00:39 +00:00
switch l.value {
case _STRING: // TTL
2011-12-18 16:58:06 +00:00
if ttl, ok := stringToTtl(l, t); !ok {
return
2011-12-14 08:00:39 +00:00
} else {
2011-12-18 16:58:06 +00:00
h.Ttl = ttl
2011-12-14 08:00:39 +00:00
}
2011-12-15 10:22:54 +00:00
st = _EXPECT_RRTYPE_BL
2011-12-14 08:00:39 +00:00
case _RRTYPE:
2011-12-14 13:35:45 +00:00
h.Rrtype, _ = Str_rr[strings.ToUpper(l.token)]
2011-12-14 14:37:36 +00:00
st = _EXPECT_RDATA
2011-12-14 08:00:39 +00:00
default:
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"Expecting RR type or TTL, not this...", l}}
return
2011-12-14 08:00:39 +00:00
}
case _EXPECT_RRTYPE_BL:
if l.value != _BLANK {
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"No blank after", l}}
return
2011-12-14 08:00:39 +00:00
}
st = _EXPECT_RRTYPE
case _EXPECT_RRTYPE:
if l.value != _RRTYPE {
2011-12-16 09:26:32 +00:00
t <- Token{Error: &ParseError{"Unknown RR type", l}}
return
2011-12-14 08:00:39 +00:00
}
2011-12-14 13:35:45 +00:00
h.Rrtype, _ = Str_rr[strings.ToUpper(l.token)]
2011-12-14 08:00:39 +00:00
st = _EXPECT_RDATA
case _EXPECT_RDATA:
// I could save my token here...? l
r, e := setRR(h, c)
if e != nil {
2011-12-16 18:34:30 +00:00
// If e.lex is nil than we have encounter a unknown RR type
// in that case we substitute our current lex token
2011-12-16 13:48:30 +00:00
if e.lex.token == "" && e.lex.value == 0 {
e.lex = l // Uh, dirty
}
2011-12-16 09:26:32 +00:00
t <- Token{Error: e}
return
2011-12-14 08:00:39 +00:00
}
2011-12-16 09:26:32 +00:00
t <- Token{Rr: r}
2011-12-18 16:58:06 +00:00
st = _EXPECT_OWNER_DIR
2011-12-14 08:00:39 +00:00
}
}
}
2011-12-16 18:34:30 +00:00
func (l lex) String() string {
2011-12-14 08:00:39 +00:00
switch l.value {
case _STRING:
return "S:" + l.token + "$"
2011-12-14 08:00:39 +00:00
case _BLANK:
return "_"
2011-12-14 08:00:39 +00:00
case _NEWLINE:
return "|\n"
case _RRTYPE:
return "R:" + l.token + "$"
2011-12-14 08:00:39 +00:00
case _OWNER:
return "O:" + l.token + "$"
2011-12-14 08:00:39 +00:00
case _CLASS:
return "C:" + l.token + "$"
2011-12-18 16:58:06 +00:00
case _DIRTTL:
return "T:" + l.token + "$"
2011-12-14 08:00:39 +00:00
}
return ""
}
// zlexer scans the sourcefile and returns tokens on the channel c.
2012-01-04 20:31:11 +00:00
func zlexer(r io.Reader, c chan lex) {
2011-12-16 18:34:30 +00:00
var l lex
2012-01-04 20:31:11 +00:00
defer close(c)
2011-12-14 08:00:39 +00:00
quote := false
2011-12-18 18:59:01 +00:00
escape := false
2011-12-14 08:00:39 +00:00
space := false
commt := false
rrtype := false
owner := true
brace := 0
2012-01-04 20:31:11 +00:00
p, q := 0, 0
buf := make([]byte, 4096)
2012-01-04 20:31:11 +00:00
n, err := r.Read(buf)
for err != io.EOF {
l.column = 0
l.line = 0
2012-01-04 20:31:11 +00:00
switch buf[q] {
case ' ', '\t':
escape = false
2011-12-14 08:00:39 +00:00
if commt {
2012-01-04 20:31:11 +00:00
p++
2011-12-14 08:00:39 +00:00
break
}
2012-01-04 20:31:11 +00:00
if p == q {
2011-12-14 08:00:39 +00:00
//l.value = _BLANK
//l.token = " "
} else if owner {
2011-12-14 08:26:31 +00:00
// If we have a string and its the first, make it an owner
2011-12-14 08:00:39 +00:00
l.value = _OWNER
2012-01-04 20:31:11 +00:00
l.token = string(buf[p:q])
// escape $... start with a \ not a $, so this will work
if l.token == "$TTL" {
2011-12-18 18:59:01 +00:00
l.value = _DIRTTL
}
2012-01-04 20:31:11 +00:00
if l.token == "$ORIGIN" {
2011-12-18 18:59:01 +00:00
l.value = _DIRORIGIN
}
2011-12-14 08:00:39 +00:00
c <- l
2012-01-04 20:31:11 +00:00
p = q + 1
2011-12-14 08:00:39 +00:00
} else {
l.value = _STRING
2012-01-04 20:31:11 +00:00
l.token = string(buf[p:q])
2011-12-14 08:00:39 +00:00
2011-12-14 08:26:31 +00:00
if !rrtype {
if _, ok := Str_rr[strings.ToUpper(l.token)]; ok {
l.value = _RRTYPE
rrtype = true
2011-12-14 08:26:31 +00:00
}
if _, ok := Str_class[strings.ToUpper(l.token)]; ok {
l.value = _CLASS
}
2011-12-14 08:00:39 +00:00
}
c <- l
2012-01-04 20:31:11 +00:00
p = q + 1
2011-12-14 08:00:39 +00:00
}
if !space && !commt {
l.value = _BLANK
l.token = " "
c <- l
2012-01-04 20:31:11 +00:00
p = q + 1
}
if space || commt {
p++
2011-12-14 08:00:39 +00:00
}
owner = false
space = true
2012-01-04 20:31:11 +00:00
case ';':
if escape {
escape = false
break
}
2011-12-14 08:00:39 +00:00
if quote {
// Inside quoted text we allow ;
break
}
2012-01-04 20:31:11 +00:00
p++
2011-12-14 08:00:39 +00:00
commt = true
2012-01-04 20:31:11 +00:00
case '\n':
// Hmmm, escape newline
escape = false
2011-12-14 08:00:39 +00:00
if commt {
2011-12-14 08:26:31 +00:00
// Reset a comment
2011-12-14 08:00:39 +00:00
commt = false
rrtype = false
2012-01-04 20:31:11 +00:00
p++
// If not in a brace this ends the comment AND the RR
if brace == 0 {
owner = true
2011-12-18 16:58:06 +00:00
l.value = _NEWLINE
l.token = "\n"
c <- l
2012-01-04 20:31:11 +00:00
p = q + 1
}
2011-12-14 08:00:39 +00:00
break
}
2012-01-04 20:31:11 +00:00
if p != q {
2011-12-14 08:00:39 +00:00
l.value = _STRING
2012-01-04 20:31:11 +00:00
l.token = string(buf[p:q])
if !rrtype {
if _, ok := Str_rr[strings.ToUpper(l.token)]; ok {
l.value = _RRTYPE
rrtype = true
}
}
2011-12-14 08:00:39 +00:00
c <- l
2012-01-04 20:31:11 +00:00
p = q + 1
2011-12-14 08:00:39 +00:00
}
if brace > 0 {
l.value = _BLANK
2012-01-04 20:31:11 +00:00
p++
2011-12-14 08:00:39 +00:00
if !space {
c <- l
2012-01-04 20:31:11 +00:00
p = q + 1
2011-12-14 08:00:39 +00:00
}
} else {
l.value = _NEWLINE
l.token = "\n"
c <- l
2012-01-04 20:31:11 +00:00
p = q + 1
2011-12-14 08:00:39 +00:00
}
if l.value == _BLANK {
space = true
}
2012-01-04 20:31:11 +00:00
p = q + 1
2011-12-14 08:00:39 +00:00
commt = false
rrtype = false
owner = true
2012-01-04 20:31:11 +00:00
case '\\':
2011-12-18 18:59:01 +00:00
if commt {
2012-01-04 20:31:11 +00:00
p++
break
}
if escape {
escape = false
2011-12-18 18:59:01 +00:00
break
}
escape = true
2012-01-04 20:31:11 +00:00
case '"':
2011-12-14 08:00:39 +00:00
if commt {
2012-01-04 20:31:11 +00:00
p++
break
}
if escape {
escape = false
2011-12-14 08:00:39 +00:00
break
}
quote = !quote
2012-01-04 20:31:11 +00:00
case '(':
2011-12-14 08:00:39 +00:00
if commt {
2012-01-04 20:31:11 +00:00
p++
break
}
if escape {
escape = false
2011-12-14 08:00:39 +00:00
break
}
2012-01-04 21:30:40 +00:00
p++
2011-12-14 08:00:39 +00:00
brace++
2012-01-04 20:31:11 +00:00
case ')':
2011-12-14 08:00:39 +00:00
if commt {
p++
2012-01-04 20:31:11 +00:00
break
}
if escape {
escape = false
2011-12-14 08:00:39 +00:00
break
}
brace--
if brace < 0 {
l.err = "Extra closing brace"
c <- l
2012-01-04 20:31:11 +00:00
p = q + 1
return
2011-12-14 08:00:39 +00:00
}
2012-01-04 21:30:40 +00:00
p++
2011-12-14 08:00:39 +00:00
default:
if commt {
2012-01-04 20:31:11 +00:00
p++
2011-12-14 08:00:39 +00:00
break
}
2012-01-04 20:31:11 +00:00
escape = false
2011-12-14 08:00:39 +00:00
space = false
}
2012-01-04 20:31:11 +00:00
// tok, err = r.ReadByte() read extra bytes
q++
if q > n-1 { // Funny, 'cause q starts at zero
2012-01-04 21:30:40 +00:00
// Read in a new chunk. Every thing before p
// can be discarded.
n1 := copy(buf, buf[p:])
2012-01-04 21:30:40 +00:00
// Reset the indices
q = q - p
p = 0
// Read a new chunk
n, err = r.Read(buf[n1:])
2012-01-04 20:31:11 +00:00
}
2011-12-14 08:00:39 +00:00
}
2012-01-04 21:30:40 +00:00
// It this need anymore???
/*
if p != q {
// Send remainder
l.token = string(buf[p:q])
l.value = _STRING
c <- l
}
*/
2011-12-14 08:00:39 +00:00
}
2011-12-18 16:58:06 +00:00
func stringToTtl(l lex, t chan Token) (uint32, bool) {
if ttl, ok := strconv.Atoi(l.token); ok != nil {
t <- Token{Error: &ParseError{"Not a TTL", l}}
return 0, false
} else {
return uint32(ttl), true
}
panic("not reached")
}