added quotes as important chars in the parser

2012-02-12 19:06:32 +01:00 · 2012-02-12 19:06:32 +01:00 · 5a48b54327
parent 059c08af9f
commit 5a48b54327
4 changed files with 107 additions and 19 deletions
--- a/TODO.markdown
+++ b/TODO.markdown
@ -16,6 +16,7 @@ things that need to be fixed.
 * Add tsig check in 'q'?
 * More RRs to add. Parsing of strings within the rdata
 * Unknown RR parsing
 * \DDD in zonefiles
 ## BUGS
--- a/parse_test.go
+++ b/parse_test.go
@ -197,6 +197,43 @@ func TestParseNSEC(t *testing.T) {
 	}
 }
 func TestQuotes(t *testing.T) {
 	tests := map[string]string{
                `t.example.com. IN TXT "a bc"`:             "t.example.com.\t3600\tIN\tTXT\t\"a bc\"",
                `t.example.com. IN TXT "a
 bc"`:                                                      "t.example.com.\t3600\tIN\tTXT\t\"a\n bc\"",
                `t.example.com. IN TXT "aaa" ;`:            "t.example.com.\t3600\tIN\tTXT\t\"aaa\"",
                `t.example.com. IN TXT "abc" "DEF"`:      "t.example.com.\t3600\tIN\tTXT\t\"abcDEF\"",
 //                `t.example.com. IN TXT "abc" ( "DEF" )`:  "t.example.com.\t3600\tIN\tTXT\t",
 //                `t.example.com. IN TXT;`:                 "t.example.com.\t3600\tIN\tTXT\t",
 //                `t.example.com. IN TXT ;`:                "t.example.com.\t3600\tIN\tTXT\t",
 //                `t.example.com. IN TXT aaa ;`:            "t.example.com.\t3600\tIN\tTXT\t \"aaaa\"",
 //                `t.example.com. IN TXT aaa`:              "t.example.com.\t3600\tIN\tTXT\t\"aaa\"",
 //                "cid.urn.arpa. NAPTR 100 50 \"s\" \"z3950+I2L+I2C\"    \"\" _z3950._tcp.gatech.edu.":
 //                        "cid.urn.arpa.\t3600\tIN\tNAPTR\t100 50 \"s\" \"z3950+I2L+I2C\" \"\" _z3950._tcp.gatech.edu.",
 //                "cid.urn.arpa. NAPTR 100 50 \"s\" \"rcds+I2C\"         \"\" _rcds._udp.gatech.edu.":
 //                        "cid.urn.arpa.\t3600\tIN\tNAPTR\t100 50 \"s\" \"rcds+I2C\" \"\" _rcds._udp.gatech.edu.",
 //                "cid.urn.arpa. NAPTR 100 50 \"s\" \"http+I2L+I2C+I2R\" \"\" _http._tcp.gatech.edu.":
 //                        "cid.urn.arpa.\t3600\tN\tNAPTR\t100 50 \"s\" \"http+I2L+I2C+I2R\" \"\" _http._tcp.gatech.edu.",
 //                "cid.urn.arpa. NAPTR 100 10 \"\" \"\" \"/urn:cid:.+@([^\\.]+\\.)(.*)$/\\2/i\" .":
 //                        "cid.urn.arpa.\t3600\tIN\tNAPTR\t100 10 \"\" \"\" \"/urn:cid:.+@([^\\.]+\\.)(.*)$/\\2/i\" .",
        }
 	for i, o := range tests {
 		rr, e := NewRR(i)
 		if e != nil {
 			t.Log("Failed to parse RR: " + e.Error())
 			t.Fail()
 			continue
 		}
 		if rr.String() != o {
 			t.Logf("`%s' should be equal to\n`%s', but is     `%s'\n", i, o, rr.String())
 			t.Fail()
 		} else {
 			t.Logf("RR is OK: `%s'", rr.String())
 		}
 	}
 }
 func TestParseBrace(t *testing.T) {
 	tests := map[string]string{
 		"(miek.nl.) 3600 IN A 127.0.0.1":                 "miek.nl.\t3600\tIN\tA\t127.0.0.1",
--- a/zscan.go
+++ b/zscan.go
@ -10,9 +10,11 @@ import (
 )
 // Only used when debugging the parser itself.
-var _DEBUG = false
+var _DEBUG = true
-const maxTok = 1024
+// Complete unsure about the correctness of this value?
 // Large blobs of base64 code might get longer than this....
 const maxTok = 300
 // Tokinize a RFC 1035 zone file. The tokenizer will normalize it:
 // * Add ownernames if they are left blank;
@ -25,6 +27,7 @@ const (
 	_EOF = iota // Don't let it start with zero
 	_STRING
 	_BLANK
        _QUOTE
 	_NEWLINE
 	_RRTYPE
 	_OWNER
@ -384,6 +387,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
 		// avoids a len(x) that Go otherwise will perform when comparing strings.
 		switch x := s.TokenText(); x[0] {
 		case ' ', '\t':
 			if quote {
 				// Inside quotes this is legal
 				str[stri] = byte(x[0])
 				stri++
 				break
 			}
 			escape = false
 			if commt {
 				break
@ -429,21 +438,28 @@ func zlexer(s scanner.Scanner, c chan lex) {
 			owner = false
 			space = true
 		case ';':
-			if escape {
+			if quote {
-				escape = false
+				// Inside quotes this is legal
-				str[stri] = ';'
+				str[stri] = byte(x[0])
 				stri++
 				break
 			}
-			if quote {
+			if escape {
-				// Inside quoted text we allow ;
+				escape = false
-				str[stri] = ';'
+				str[stri] = byte(x[0])
 				stri++
 				break
 			}
 			commt = true
 		case '\n':
 			// Hmmm, escape newline
                        if quote {
 				str[stri] = byte(x[0])
                                stri++
                                break
                        }
                        // inside quotes this is legal
 			escape = false
 			if commt {
 				// Reset a comment
@ -490,16 +506,17 @@ func zlexer(s scanner.Scanner, c chan lex) {
 			rrtype = false
 			owner = true
 		case '\\':
                        // quote?
 			if commt {
 				break
 			}
 			if escape {
-				str[stri] = '\\'
+				str[stri] = byte(x[0])
 				stri++
 				escape = false
 				break
 			}
-			str[stri] = '\\'
+			str[stri] = byte(x[0])
 			stri++
 			escape = true
 		case '"':
@ -507,30 +524,49 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			if escape {
-				str[stri] = '"'
+				str[stri] = byte(x[0])
 				stri++
 				escape = false
 				break
 			}
-			// str += "\"" don't add quoted quotes
+			// send previous gathered text and the quote
                        if stri != 0 {
                                l.value = _STRING
                                l.token = string(str[:stri])
                                c <-l
                                stri = 0
                        }
                        l.value = _QUOTE
                        l.token = "\""
                        c <- l
 			quote = !quote
 		case '(':
                        if quote {
 				str[stri] = byte(x[0])
                                stri++
                                break
                        }
 			if commt {
 				break
 			}
 			if escape {
-				str[stri] = '('
+				str[stri] = byte(x[0])
 				stri++
 				escape = false
 				break
 			}
 			brace++
 		case ')':
                        if quote {
                                str[stri] = byte(x[0])
                                stri++
                                break
                        }
 			if commt {
 				break
 			}
 			if escape {
-				str[stri] = ')'
+				str[stri] = byte(x[0])
 				stri++
 				escape = false
 				break
@ -546,7 +582,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			escape = false
-			str[stri] = byte(x[0]) // This should be ok...
+			str[stri] = byte(x[0])
 			stri++
 			space = false
 		}
--- a/zscan_rr.go
+++ b/zscan_rr.go
@ -49,6 +49,9 @@ func setRR(h RR_Header, c chan lex, o, f string) (RR, *ParseError) {
 	case TypeSRV:
 		r, e = setSRV(h, c, o, f)
 		goto Slurp
 	case TypeNAPTR:
 		r, e = setNAPTR(h, c, o, f)
 		goto Slurp
 	// These types have a variable ending either chunks of txt or chunks/base64 or hex.
 	// They need to search for the end of the RR themselves, hence they look for the ending
 	// newline. Thus there is no need to slurp the remainder, because there is none.
@ -64,8 +67,6 @@ func setRR(h RR_Header, c chan lex, o, f string) (RR, *ParseError) {
 		return setNSEC3PARAM(h, c, f)
 	case TypeDS:
 		return setDS(h, c, f)
 	case TypeNAPTR:
 		return setNAPTR(h, c, o, f)
 	case TypeTXT:
 		return setTXT(h, c, f)
 	default:
@ -328,19 +329,23 @@ func setNAPTR(h RR_Header, c chan lex, o, f string) (RR, *ParseError) {
 	}
 	<-c     // _BLANK
 	l = <-c // _STRING
 	println("Flags", l.token)
 	rr.Flags = l.token
 	<-c     // _BLANK
 	l = <-c // _STRING
 	println("Service", l.token)
 	rr.Service = l.token
 	<-c     // _BLANK
 	l = <-c // _STRING
 	println("Regexp", l.token)
 	rr.Regexp = l.token
 	<-c     // _BLANK
 	l = <-c // _STRING
 	rr.Replacement = l.token
 	println("Replacement", l.token, "A")
 	_, ld, ok := IsDomainName(l.token)
 	if !ok {
 		return nil, &ParseError{f, "bad NAPTR Replacement", l}
@ -703,14 +708,23 @@ func setTXT(h RR_Header, c chan lex, f string) (RR, *ParseError) {
 	rr.Hdr = h
 	// Get the remaining data until we see a NEWLINE
 	quote := false
 	l := <-c
 	var s string
 	for l.value != _NEWLINE && l.value != _EOF {
 		println("SEEN", l.value, l.token)
 		switch l.value {
 		case _STRING:
-			s += l.token
+			if quote {
 				s += l.token
 			}
 		case _BLANK:
-			s += l.token
+			if quote {
 				// _BLANK can only be seen in between txt parts.
 				return nil, &ParseError{f, "bad TXT Txt", l}
 			}
 		case _QUOTE:
 			quote = !quote
 		default:
 			return nil, &ParseError{f, "bad TXT", l}
 		}