dns/zscan.go

package dns

import (
	"fmt"
	"io"
	"strconv"
	"strings"
)

// Only used when debugging the parser itself.
var _DEBUG = false

// Tokinize a RFC 1035 zone file. The tokenizer will normalize it:
// * Add ownernames if they are left blank;
// * Suppress sequences of spaces;
// * Make each RR fit on one line (NEWLINE is send as last)
// * Handle comments: ;
// * Handle braces.
const (
	// Zonefile
	_EOF = iota // Don't let it start with zero
	_STRING
	_BLANK
	_NEWLINE
	_RRTYPE
	_OWNER
	_CLASS
	_DIRORIGIN  // $ORIGIN
	_DIRTTL     // $TTL
	_DIRINCLUDE // $INCLUDE

	// Privatekey file
	_VALUE
	_KEY

	_EXPECT_OWNER_DIR      // Ownername
	_EXPECT_OWNER_BL       // Whitespace after the ownername
	_EXPECT_ANY            // Expect rrtype, ttl or class
	_EXPECT_ANY_NOCLASS    // Expect rrtype or ttl
	_EXPECT_ANY_NOCLASS_BL // The Whitespace after _EXPECT_ANY_NOCLASS
	_EXPECT_ANY_NOTTL      // Expect rrtype or class
	_EXPECT_ANY_NOTTL_BL   // Whitespace after _EXPECT_ANY_NOTTL
	_EXPECT_RRTYPE         // Expect rrtype
	_EXPECT_RRTYPE_BL      // Whitespace BEFORE rrtype
	_EXPECT_RDATA          // The first element of the rdata
	_EXPECT_DIRTTL_BL      // Space after directive $TTL
	_EXPECT_DIRTTL         // Directive $TTL
)

// ParseError contains the parse error and the location in the io.Reader
// where the error occured.
type ParseError struct {
	err string
	lex lex
}

func (e *ParseError) Error() string {
	va := strconv.Itoa(e.lex.value)
	s := e.err + ": `" + e.lex.token + "' (value: " + va + ") at line: " +
		strconv.Itoa(e.lex.line) + " and column: " +
		strconv.Itoa(e.lex.column)
	return s
}

type lex struct {
	token  string // Text of the token
	err    string // Error text when the lexer detects it. Not used by the grammar
	value  int    // Value: _STRING, _BLANK, etc.
	line   int    // Line in the file
	column int    // Column in the fil
}

type Token struct {
	Rr    RR          // the scanned resource record
	Error *ParseError // when an error occured, this is the specifics
}

// NewRR parses the string s and returns the RR contained in there. If the string
// contains more than one RR, only the first is returned. If an error is detected
// that error is returned. 
// If the class is not specified, the IN class is assumed. If the TTL is not
// specified DefaultTtl is assumed.
func NewRR(s string) (RR, error) {
	t := make(chan Token)
	if s[len(s)-1] != '\n' { // We need a closing newline
		t = ParseZone(strings.NewReader(s + "\n"))
	} else {
		t = ParseZone(strings.NewReader(s))
	}
	r := <-t
	if r.Error != nil {
		return nil, r.Error
	}
	return r.Rr, nil
}

// ParseZone reads a RFC 1035 zone from r. It returns each parsed RR or on error
// on the returned channel. The channel t is closed by ParseZone when the end of r is reached.
func ParseZone(r io.Reader) chan Token {
	t := make(chan Token)
	go parseZone(r, t)
	return t
}

func parseZone(r io.Reader, t chan Token) {
	defer close(t)
	c := make(chan lex)
	// Start the lexer
	go zlexer(r, c)
	// 5 possible beginnings of a line, _ is a space
	// 1. _OWNER _ _RRTYPE                     -> class/ttl omitted
	// 2. _OWNER _ _STRING _ _RRTYPE           -> class omitted
	// 3. _OWNER _ _STRING _ _CLASS  _ _RRTYPE -> ttl/class
	// 4. _OWNER _ _CLASS  _ _RRTYPE           -> ttl omitted
	// 5. _OWNER _ _CLASS  _ _STRING _ _RRTYPE -> class/ttl (reversed)
	// After detecting these, we know the _RRTYPE so we can jump to functions
	// handling the rdata for each of these types.
	st := _EXPECT_OWNER_DIR
	var h RR_Header
	var ok bool
	var defttl uint32 = DefaultTtl
	for l := range c {
		if _DEBUG {
			fmt.Printf("[%v]\n", l)
		}
		// Lexer spotted an error already
		if l.err != "" {
			t <- Token{Error: &ParseError{l.err, l}}
			return

		}
		switch st {
		case _EXPECT_OWNER_DIR:
			// We can also expect a directive, like $TTL or $ORIGIN
			h.Ttl = defttl
			h.Class = ClassINET
			switch l.value {
			case _NEWLINE: // Empty line
				st = _EXPECT_OWNER_DIR
			case _OWNER:
				h.Name = l.token
				st = _EXPECT_OWNER_BL
			case _DIRTTL:
				st = _EXPECT_DIRTTL_BL
			default:
				t <- Token{Error: &ParseError{"Error at the start", l}}
				return
			}
		case _EXPECT_DIRTTL_BL:
			if l.value != _BLANK {
				t <- Token{Error: &ParseError{"No blank after $-directive", l}}
				return
			}
			st = _EXPECT_DIRTTL
		case _EXPECT_DIRTTL:
			if l.value != _STRING {
				t <- Token{Error: &ParseError{"Expecting $TTL value, not this...", l}}
				return
			}
			if ttl, ok := stringToTtl(l, t); !ok {
				return
			} else {
				defttl = ttl
			}

			st = _EXPECT_OWNER_DIR
		case _EXPECT_OWNER_BL:
			if l.value != _BLANK {
				t <- Token{Error: &ParseError{"No blank after owner", l}}
				return
			}
			st = _EXPECT_ANY
		case _EXPECT_ANY:
			switch l.value {
			case _RRTYPE:
				h.Rrtype, _ = Str_rr[strings.ToUpper(l.token)]
				st = _EXPECT_RDATA
			case _CLASS:
				h.Class, ok = Str_class[strings.ToUpper(l.token)]
				if !ok {
					t <- Token{Error: &ParseError{"Unknown class", l}}
					return
				}
				st = _EXPECT_ANY_NOCLASS_BL
			case _STRING: // TTL is this case
				if ttl, ok := stringToTtl(l, t); !ok {
					return
				} else {
					h.Ttl = ttl
				}
				st = _EXPECT_ANY_NOTTL_BL
			default:
				t <- Token{Error: &ParseError{"Expecting RR type, TTL or class, not this...", l}}
				return
			}
		case _EXPECT_ANY_NOCLASS_BL:
			if l.value != _BLANK {
				t <- Token{Error: &ParseError{"No blank before NOCLASS", l}}
				return
			}
			st = _EXPECT_ANY_NOCLASS
		case _EXPECT_ANY_NOTTL_BL:
			if l.value != _BLANK {
				t <- Token{Error: &ParseError{"No blank before NOTTL", l}}
				return
			}
			st = _EXPECT_ANY_NOTTL
		case _EXPECT_ANY_NOTTL:
			switch l.value {
			case _CLASS:
				h.Class, ok = Str_class[strings.ToUpper(l.token)]
				if !ok {
					t <- Token{Error: &ParseError{"Unknown class", l}}
					return
				}
				st = _EXPECT_RRTYPE_BL
			case _RRTYPE:
				h.Rrtype, _ = Str_rr[strings.ToUpper(l.token)]
				st = _EXPECT_RDATA
			}
		case _EXPECT_ANY_NOCLASS:
			switch l.value {
			case _STRING: // TTL
				if ttl, ok := stringToTtl(l, t); !ok {
					return
				} else {
					h.Ttl = ttl
				}
				st = _EXPECT_RRTYPE_BL
			case _RRTYPE:
				h.Rrtype, _ = Str_rr[strings.ToUpper(l.token)]
				st = _EXPECT_RDATA
			default:
				t <- Token{Error: &ParseError{"Expecting RR type or TTL, not this...", l}}
				return
			}
		case _EXPECT_RRTYPE_BL:
			if l.value != _BLANK {
				t <- Token{Error: &ParseError{"No blank after", l}}
				return
			}
			st = _EXPECT_RRTYPE
		case _EXPECT_RRTYPE:
			if l.value != _RRTYPE {
				t <- Token{Error: &ParseError{"Unknown RR type", l}}
				return
			}
			h.Rrtype, _ = Str_rr[strings.ToUpper(l.token)]
			st = _EXPECT_RDATA
		case _EXPECT_RDATA:
			// I could save my token here...? l
			r, e := setRR(h, c)
			if e != nil {
				// If e.lex is nil than we have encounter a unknown RR type
				// in that case we substitute our current lex token
				if e.lex.token == "" && e.lex.value == 0 {
					e.lex = l // Uh, dirty
				}
				t <- Token{Error: e}
				return
			}
			t <- Token{Rr: r}
			st = _EXPECT_OWNER_DIR
		}
	}
}

func (l lex) String() string {
	switch l.value {
	case _STRING:
		return "S:" + l.token + "$"
	case _BLANK:
		return "_"
	case _NEWLINE:
		return "|\n"
	case _RRTYPE:
		return "R:" + l.token + "$"
	case _OWNER:
		return "O:" + l.token + "$"
	case _CLASS:
		return "C:" + l.token + "$"
	case _DIRTTL:
		return "T:" + l.token + "$"
	}
	return ""
}

// zlexer scans the sourcefile and returns tokens on the channel c.
func zlexer(r io.Reader, c chan lex) {
	var l lex
	defer close(c)
	quote := false
	escape := false
	space := false
	commt := false
	rrtype := false
	owner := true
	brace := 0
	p, q := 0, 0
	buf := make([]byte, 4096)
	n, err := r.Read(buf)
	for err != io.EOF {
		l.column = 0
		l.line = 0
		switch buf[q] {
		case ' ', '\t':
			escape = false
			if commt {
				p++
				break
			}
			if p == q {
				//l.value = _BLANK
				//l.token = " "
			} else if owner {
				// If we have a string and its the first, make it an owner
				l.value = _OWNER
				l.token = string(buf[p:q])
				// escape $... start with a \ not a $, so this will work
				if l.token == "$TTL" {
					l.value = _DIRTTL
				}
				if l.token == "$ORIGIN" {
					l.value = _DIRORIGIN
				}
				c <- l
				p = q + 1
			} else {
				l.value = _STRING
				l.token = string(buf[p:q])

				if !rrtype {
					if _, ok := Str_rr[strings.ToUpper(l.token)]; ok {
						l.value = _RRTYPE
						rrtype = true
					}
					if _, ok := Str_class[strings.ToUpper(l.token)]; ok {
						l.value = _CLASS
					}
				}
				c <- l
				p = q + 1
			}
			if !space && !commt {
				l.value = _BLANK
				l.token = " "
				c <- l
				p = q + 1
			}
			if space || commt {
				p++
			}
			owner = false
			space = true
		case ';':
			if escape {
				escape = false
				break
			}
			if quote {
				// Inside quoted text we allow ;
				break
			}
			p++
			commt = true
		case '\n':
			// Hmmm, escape newline
			escape = false
			if commt {
				// Reset a comment
				commt = false
				rrtype = false
				p++
				// If not in a brace this ends the comment AND the RR
				if brace == 0 {
					owner = true
					l.value = _NEWLINE
					l.token = "\n"
					c <- l
					p = q + 1
				}
				break
			}
			if p != q {
				l.value = _STRING
				l.token = string(buf[p:q])
				if !rrtype {
					if _, ok := Str_rr[strings.ToUpper(l.token)]; ok {
						l.value = _RRTYPE
						rrtype = true
					}
				}
				c <- l
				p = q + 1
			}
			if brace > 0 {
				l.value = _BLANK
				p++
				if !space {
					c <- l
					p = q + 1
				}
			} else {
				l.value = _NEWLINE
				l.token = "\n"
				c <- l
				p = q + 1
			}
			if l.value == _BLANK {
				space = true
			}

			p = q + 1
			commt = false
			rrtype = false
			owner = true
		case '\\':
			if commt {
				p++
				break
			}
			if escape {
				escape = false
				break
			}
			escape = true
		case '"':
			if commt {
				p++
				break
			}
			if escape {
				escape = false
				break
			}
			quote = !quote
		case '(':
			if commt {
				p++
				break
			}
			if escape {
				escape = false
				break
			}
			p++
			brace++
		case ')':
			if commt {
				p++
				break
			}
			if escape {
				escape = false
				break
			}
			brace--
			if brace < 0 {
				l.err = "Extra closing brace"
				c <- l
				p = q + 1
				return
			}
			p++
		default:
			if commt {
				p++
				break
			}
			escape = false
			space = false
		}
		// tok, err = r.ReadByte() read extra bytes
		q++
		if q > n-1 {  // Funny, 'cause q starts at zero
			// Read in a new chunk. Every thing before p 
			// can be discarded. 
                        n1 := copy(buf, buf[p:])
			// Reset the indices
			q = q - p
			p = 0
			// Read a new chunk
			n, err = r.Read(buf[n1:])
		}
	}
	// It this need anymore???
	/*
		if p != q {
			// Send remainder
			l.token = string(buf[p:q])
			l.value = _STRING
			c <- l
		}
	*/
}

func stringToTtl(l lex, t chan Token) (uint32, bool) {
	if ttl, ok := strconv.Atoi(l.token); ok != nil {
		t <- Token{Error: &ParseError{"Not a TTL", l}}
		return 0, false
	} else {
		return uint32(ttl), true
	}
	panic("not reached")
}