From 5a48b54327392633341fe5ee9f1acdabd6c45e0a Mon Sep 17 00:00:00 2001
From: Miek Gieben <miek@miek.nl>
Date: Sun, 12 Feb 2012 19:06:32 +0100
Subject: [PATCH] added quotes as important chars in the parser

---
 TODO.markdown |  1 +
 parse_test.go | 37 +++++++++++++++++++++++++++++
 zscan.go      | 66 +++++++++++++++++++++++++++++++++++++++------------
 zscan_rr.go   | 22 +++++++++++++----
 4 files changed, 107 insertions(+), 19 deletions(-)

diff --git a/TODO.markdown b/TODO.markdown
index 6ba71ec1..8220d111 100644
--- a/TODO.markdown
+++ b/TODO.markdown
@@ -16,6 +16,7 @@ things that need to be fixed.
 * Add tsig check in 'q'?
 * More RRs to add. Parsing of strings within the rdata
 * Unknown RR parsing
+* \DDD in zonefiles
 
 ## BUGS
 
diff --git a/parse_test.go b/parse_test.go
index 2d648ae9..ce22ddf7 100644
--- a/parse_test.go
+++ b/parse_test.go
@@ -197,6 +197,43 @@ func TestParseNSEC(t *testing.T) {
 	}
 }
 
+func TestQuotes(t *testing.T) {
+	tests := map[string]string{
+                `t.example.com. IN TXT "a bc"`:             "t.example.com.\t3600\tIN\tTXT\t\"a bc\"",
+                `t.example.com. IN TXT "a
+ bc"`:                                                      "t.example.com.\t3600\tIN\tTXT\t\"a\n bc\"",
+                `t.example.com. IN TXT "aaa" ;`:            "t.example.com.\t3600\tIN\tTXT\t\"aaa\"",
+                `t.example.com. IN TXT "abc" "DEF"`:      "t.example.com.\t3600\tIN\tTXT\t\"abcDEF\"",
+//                `t.example.com. IN TXT "abc" ( "DEF" )`:  "t.example.com.\t3600\tIN\tTXT\t",
+//                `t.example.com. IN TXT;`:                 "t.example.com.\t3600\tIN\tTXT\t",
+//                `t.example.com. IN TXT ;`:                "t.example.com.\t3600\tIN\tTXT\t",
+//                `t.example.com. IN TXT aaa ;`:            "t.example.com.\t3600\tIN\tTXT\t \"aaaa\"",
+//                `t.example.com. IN TXT aaa`:              "t.example.com.\t3600\tIN\tTXT\t\"aaa\"",
+//                "cid.urn.arpa. NAPTR 100 50 \"s\" \"z3950+I2L+I2C\"    \"\" _z3950._tcp.gatech.edu.":
+//                        "cid.urn.arpa.\t3600\tIN\tNAPTR\t100 50 \"s\" \"z3950+I2L+I2C\" \"\" _z3950._tcp.gatech.edu.",
+//                "cid.urn.arpa. NAPTR 100 50 \"s\" \"rcds+I2C\"         \"\" _rcds._udp.gatech.edu.":
+//                        "cid.urn.arpa.\t3600\tIN\tNAPTR\t100 50 \"s\" \"rcds+I2C\" \"\" _rcds._udp.gatech.edu.",
+//                "cid.urn.arpa. NAPTR 100 50 \"s\" \"http+I2L+I2C+I2R\" \"\" _http._tcp.gatech.edu.":
+//                        "cid.urn.arpa.\t3600\tN\tNAPTR\t100 50 \"s\" \"http+I2L+I2C+I2R\" \"\" _http._tcp.gatech.edu.",
+//                "cid.urn.arpa. NAPTR 100 10 \"\" \"\" \"/urn:cid:.+@([^\\.]+\\.)(.*)$/\\2/i\" .":
+//                        "cid.urn.arpa.\t3600\tIN\tNAPTR\t100 10 \"\" \"\" \"/urn:cid:.+@([^\\.]+\\.)(.*)$/\\2/i\" .",
+        }
+	for i, o := range tests {
+		rr, e := NewRR(i)
+		if e != nil {
+			t.Log("Failed to parse RR: " + e.Error())
+			t.Fail()
+			continue
+		}
+		if rr.String() != o {
+			t.Logf("`%s' should be equal to\n`%s', but is     `%s'\n", i, o, rr.String())
+			t.Fail()
+		} else {
+			t.Logf("RR is OK: `%s'", rr.String())
+		}
+	}
+}
+
 func TestParseBrace(t *testing.T) {
 	tests := map[string]string{
 		"(miek.nl.) 3600 IN A 127.0.0.1":                 "miek.nl.\t3600\tIN\tA\t127.0.0.1",
diff --git a/zscan.go b/zscan.go
index c6dc5e1e..9ca6de0c 100644
--- a/zscan.go
+++ b/zscan.go
@@ -10,9 +10,11 @@ import (
 )
 
 // Only used when debugging the parser itself.
-var _DEBUG = false
+var _DEBUG = true
 
-const maxTok = 1024
+// Complete unsure about the correctness of this value?
+// Large blobs of base64 code might get longer than this....
+const maxTok = 300
 
 // Tokinize a RFC 1035 zone file. The tokenizer will normalize it:
 // * Add ownernames if they are left blank;
@@ -25,6 +27,7 @@ const (
 	_EOF = iota // Don't let it start with zero
 	_STRING
 	_BLANK
+        _QUOTE
 	_NEWLINE
 	_RRTYPE
 	_OWNER
@@ -384,6 +387,12 @@ func zlexer(s scanner.Scanner, c chan lex) {
 		// avoids a len(x) that Go otherwise will perform when comparing strings.
 		switch x := s.TokenText(); x[0] {
 		case ' ', '\t':
+			if quote {
+				// Inside quotes this is legal
+				str[stri] = byte(x[0])
+				stri++
+				break
+			}
 			escape = false
 			if commt {
 				break
@@ -429,21 +438,28 @@ func zlexer(s scanner.Scanner, c chan lex) {
 			owner = false
 			space = true
 		case ';':
-			if escape {
-				escape = false
-				str[stri] = ';'
+			if quote {
+				// Inside quotes this is legal
+				str[stri] = byte(x[0])
 				stri++
 				break
 			}
-			if quote {
-				// Inside quoted text we allow ;
-				str[stri] = ';'
+			if escape {
+				escape = false
+				str[stri] = byte(x[0])
 				stri++
 				break
 			}
 			commt = true
 		case '\n':
 			// Hmmm, escape newline
+                        if quote {
+				str[stri] = byte(x[0])
+                                stri++
+                                break
+                        }
+
+                        // inside quotes this is legal
 			escape = false
 			if commt {
 				// Reset a comment
@@ -490,16 +506,17 @@ func zlexer(s scanner.Scanner, c chan lex) {
 			rrtype = false
 			owner = true
 		case '\\':
+                        // quote?
 			if commt {
 				break
 			}
 			if escape {
-				str[stri] = '\\'
+				str[stri] = byte(x[0])
 				stri++
 				escape = false
 				break
 			}
-			str[stri] = '\\'
+			str[stri] = byte(x[0])
 			stri++
 			escape = true
 		case '"':
@@ -507,30 +524,49 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			if escape {
-				str[stri] = '"'
+				str[stri] = byte(x[0])
 				stri++
 				escape = false
 				break
 			}
-			// str += "\"" don't add quoted quotes
+			// send previous gathered text and the quote
+                        if stri != 0 {
+                                l.value = _STRING
+                                l.token = string(str[:stri])
+                                c <-l
+                                stri = 0
+                        }
+                        l.value = _QUOTE
+                        l.token = "\""
+                        c <- l
 			quote = !quote
 		case '(':
+                        if quote {
+				str[stri] = byte(x[0])
+                                stri++
+                                break
+                        }
 			if commt {
 				break
 			}
 			if escape {
-				str[stri] = '('
+				str[stri] = byte(x[0])
 				stri++
 				escape = false
 				break
 			}
 			brace++
 		case ')':
+                        if quote {
+                                str[stri] = byte(x[0])
+                                stri++
+                                break
+                        }
 			if commt {
 				break
 			}
 			if escape {
-				str[stri] = ')'
+				str[stri] = byte(x[0])
 				stri++
 				escape = false
 				break
@@ -546,7 +582,7 @@ func zlexer(s scanner.Scanner, c chan lex) {
 				break
 			}
 			escape = false
-			str[stri] = byte(x[0]) // This should be ok...
+			str[stri] = byte(x[0])
 			stri++
 			space = false
 		}
diff --git a/zscan_rr.go b/zscan_rr.go
index c4c6b46b..829d6fc9 100644
--- a/zscan_rr.go
+++ b/zscan_rr.go
@@ -49,6 +49,9 @@ func setRR(h RR_Header, c chan lex, o, f string) (RR, *ParseError) {
 	case TypeSRV:
 		r, e = setSRV(h, c, o, f)
 		goto Slurp
+	case TypeNAPTR:
+		r, e = setNAPTR(h, c, o, f)
+		goto Slurp
 	// These types have a variable ending either chunks of txt or chunks/base64 or hex.
 	// They need to search for the end of the RR themselves, hence they look for the ending
 	// newline. Thus there is no need to slurp the remainder, because there is none.
@@ -64,8 +67,6 @@ func setRR(h RR_Header, c chan lex, o, f string) (RR, *ParseError) {
 		return setNSEC3PARAM(h, c, f)
 	case TypeDS:
 		return setDS(h, c, f)
-	case TypeNAPTR:
-		return setNAPTR(h, c, o, f)
 	case TypeTXT:
 		return setTXT(h, c, f)
 	default:
@@ -328,19 +329,23 @@ func setNAPTR(h RR_Header, c chan lex, o, f string) (RR, *ParseError) {
 	}
 	<-c     // _BLANK
 	l = <-c // _STRING
+	println("Flags", l.token)
 	rr.Flags = l.token
 
 	<-c     // _BLANK
 	l = <-c // _STRING
+	println("Service", l.token)
 	rr.Service = l.token
 
 	<-c     // _BLANK
 	l = <-c // _STRING
+	println("Regexp", l.token)
 	rr.Regexp = l.token
 
 	<-c     // _BLANK
 	l = <-c // _STRING
 	rr.Replacement = l.token
+	println("Replacement", l.token, "A")
 	_, ld, ok := IsDomainName(l.token)
 	if !ok {
 		return nil, &ParseError{f, "bad NAPTR Replacement", l}
@@ -703,14 +708,23 @@ func setTXT(h RR_Header, c chan lex, f string) (RR, *ParseError) {
 	rr.Hdr = h
 
 	// Get the remaining data until we see a NEWLINE
+	quote := false
 	l := <-c
 	var s string
 	for l.value != _NEWLINE && l.value != _EOF {
+		println("SEEN", l.value, l.token)
 		switch l.value {
 		case _STRING:
-			s += l.token
+			if quote {
+				s += l.token
+			}
 		case _BLANK:
-			s += l.token
+			if quote {
+				// _BLANK can only be seen in between txt parts.
+				return nil, &ParseError{f, "bad TXT Txt", l}
+			}
+		case _QUOTE:
+			quote = !quote
 		default:
 			return nil, &ParseError{f, "bad TXT", l}
 		}