From 5a48b54327392633341fe5ee9f1acdabd6c45e0a Mon Sep 17 00:00:00 2001 From: Miek Gieben Date: Sun, 12 Feb 2012 19:06:32 +0100 Subject: [PATCH] added quotes as important chars in the parser --- TODO.markdown | 1 + parse_test.go | 37 +++++++++++++++++++++++++++++ zscan.go | 66 +++++++++++++++++++++++++++++++++++++++------------ zscan_rr.go | 22 +++++++++++++---- 4 files changed, 107 insertions(+), 19 deletions(-) diff --git a/TODO.markdown b/TODO.markdown index 6ba71ec1..8220d111 100644 --- a/TODO.markdown +++ b/TODO.markdown @@ -16,6 +16,7 @@ things that need to be fixed. * Add tsig check in 'q'? * More RRs to add. Parsing of strings within the rdata * Unknown RR parsing +* \DDD in zonefiles ## BUGS diff --git a/parse_test.go b/parse_test.go index 2d648ae9..ce22ddf7 100644 --- a/parse_test.go +++ b/parse_test.go @@ -197,6 +197,43 @@ func TestParseNSEC(t *testing.T) { } } +func TestQuotes(t *testing.T) { + tests := map[string]string{ + `t.example.com. IN TXT "a bc"`: "t.example.com.\t3600\tIN\tTXT\t\"a bc\"", + `t.example.com. IN TXT "a + bc"`: "t.example.com.\t3600\tIN\tTXT\t\"a\n bc\"", + `t.example.com. IN TXT "aaa" ;`: "t.example.com.\t3600\tIN\tTXT\t\"aaa\"", + `t.example.com. IN TXT "abc" "DEF"`: "t.example.com.\t3600\tIN\tTXT\t\"abcDEF\"", +// `t.example.com. IN TXT "abc" ( "DEF" )`: "t.example.com.\t3600\tIN\tTXT\t", +// `t.example.com. IN TXT;`: "t.example.com.\t3600\tIN\tTXT\t", +// `t.example.com. IN TXT ;`: "t.example.com.\t3600\tIN\tTXT\t", +// `t.example.com. IN TXT aaa ;`: "t.example.com.\t3600\tIN\tTXT\t \"aaaa\"", +// `t.example.com. IN TXT aaa`: "t.example.com.\t3600\tIN\tTXT\t\"aaa\"", +// "cid.urn.arpa. NAPTR 100 50 \"s\" \"z3950+I2L+I2C\" \"\" _z3950._tcp.gatech.edu.": +// "cid.urn.arpa.\t3600\tIN\tNAPTR\t100 50 \"s\" \"z3950+I2L+I2C\" \"\" _z3950._tcp.gatech.edu.", +// "cid.urn.arpa. NAPTR 100 50 \"s\" \"rcds+I2C\" \"\" _rcds._udp.gatech.edu.": +// "cid.urn.arpa.\t3600\tIN\tNAPTR\t100 50 \"s\" \"rcds+I2C\" \"\" _rcds._udp.gatech.edu.", +// "cid.urn.arpa. NAPTR 100 50 \"s\" \"http+I2L+I2C+I2R\" \"\" _http._tcp.gatech.edu.": +// "cid.urn.arpa.\t3600\tN\tNAPTR\t100 50 \"s\" \"http+I2L+I2C+I2R\" \"\" _http._tcp.gatech.edu.", +// "cid.urn.arpa. NAPTR 100 10 \"\" \"\" \"/urn:cid:.+@([^\\.]+\\.)(.*)$/\\2/i\" .": +// "cid.urn.arpa.\t3600\tIN\tNAPTR\t100 10 \"\" \"\" \"/urn:cid:.+@([^\\.]+\\.)(.*)$/\\2/i\" .", + } + for i, o := range tests { + rr, e := NewRR(i) + if e != nil { + t.Log("Failed to parse RR: " + e.Error()) + t.Fail() + continue + } + if rr.String() != o { + t.Logf("`%s' should be equal to\n`%s', but is `%s'\n", i, o, rr.String()) + t.Fail() + } else { + t.Logf("RR is OK: `%s'", rr.String()) + } + } +} + func TestParseBrace(t *testing.T) { tests := map[string]string{ "(miek.nl.) 3600 IN A 127.0.0.1": "miek.nl.\t3600\tIN\tA\t127.0.0.1", diff --git a/zscan.go b/zscan.go index c6dc5e1e..9ca6de0c 100644 --- a/zscan.go +++ b/zscan.go @@ -10,9 +10,11 @@ import ( ) // Only used when debugging the parser itself. -var _DEBUG = false +var _DEBUG = true -const maxTok = 1024 +// Complete unsure about the correctness of this value? +// Large blobs of base64 code might get longer than this.... +const maxTok = 300 // Tokinize a RFC 1035 zone file. The tokenizer will normalize it: // * Add ownernames if they are left blank; @@ -25,6 +27,7 @@ const ( _EOF = iota // Don't let it start with zero _STRING _BLANK + _QUOTE _NEWLINE _RRTYPE _OWNER @@ -384,6 +387,12 @@ func zlexer(s scanner.Scanner, c chan lex) { // avoids a len(x) that Go otherwise will perform when comparing strings. switch x := s.TokenText(); x[0] { case ' ', '\t': + if quote { + // Inside quotes this is legal + str[stri] = byte(x[0]) + stri++ + break + } escape = false if commt { break @@ -429,21 +438,28 @@ func zlexer(s scanner.Scanner, c chan lex) { owner = false space = true case ';': - if escape { - escape = false - str[stri] = ';' + if quote { + // Inside quotes this is legal + str[stri] = byte(x[0]) stri++ break } - if quote { - // Inside quoted text we allow ; - str[stri] = ';' + if escape { + escape = false + str[stri] = byte(x[0]) stri++ break } commt = true case '\n': // Hmmm, escape newline + if quote { + str[stri] = byte(x[0]) + stri++ + break + } + + // inside quotes this is legal escape = false if commt { // Reset a comment @@ -490,16 +506,17 @@ func zlexer(s scanner.Scanner, c chan lex) { rrtype = false owner = true case '\\': + // quote? if commt { break } if escape { - str[stri] = '\\' + str[stri] = byte(x[0]) stri++ escape = false break } - str[stri] = '\\' + str[stri] = byte(x[0]) stri++ escape = true case '"': @@ -507,30 +524,49 @@ func zlexer(s scanner.Scanner, c chan lex) { break } if escape { - str[stri] = '"' + str[stri] = byte(x[0]) stri++ escape = false break } - // str += "\"" don't add quoted quotes + // send previous gathered text and the quote + if stri != 0 { + l.value = _STRING + l.token = string(str[:stri]) + c <-l + stri = 0 + } + l.value = _QUOTE + l.token = "\"" + c <- l quote = !quote case '(': + if quote { + str[stri] = byte(x[0]) + stri++ + break + } if commt { break } if escape { - str[stri] = '(' + str[stri] = byte(x[0]) stri++ escape = false break } brace++ case ')': + if quote { + str[stri] = byte(x[0]) + stri++ + break + } if commt { break } if escape { - str[stri] = ')' + str[stri] = byte(x[0]) stri++ escape = false break @@ -546,7 +582,7 @@ func zlexer(s scanner.Scanner, c chan lex) { break } escape = false - str[stri] = byte(x[0]) // This should be ok... + str[stri] = byte(x[0]) stri++ space = false } diff --git a/zscan_rr.go b/zscan_rr.go index c4c6b46b..829d6fc9 100644 --- a/zscan_rr.go +++ b/zscan_rr.go @@ -49,6 +49,9 @@ func setRR(h RR_Header, c chan lex, o, f string) (RR, *ParseError) { case TypeSRV: r, e = setSRV(h, c, o, f) goto Slurp + case TypeNAPTR: + r, e = setNAPTR(h, c, o, f) + goto Slurp // These types have a variable ending either chunks of txt or chunks/base64 or hex. // They need to search for the end of the RR themselves, hence they look for the ending // newline. Thus there is no need to slurp the remainder, because there is none. @@ -64,8 +67,6 @@ func setRR(h RR_Header, c chan lex, o, f string) (RR, *ParseError) { return setNSEC3PARAM(h, c, f) case TypeDS: return setDS(h, c, f) - case TypeNAPTR: - return setNAPTR(h, c, o, f) case TypeTXT: return setTXT(h, c, f) default: @@ -328,19 +329,23 @@ func setNAPTR(h RR_Header, c chan lex, o, f string) (RR, *ParseError) { } <-c // _BLANK l = <-c // _STRING + println("Flags", l.token) rr.Flags = l.token <-c // _BLANK l = <-c // _STRING + println("Service", l.token) rr.Service = l.token <-c // _BLANK l = <-c // _STRING + println("Regexp", l.token) rr.Regexp = l.token <-c // _BLANK l = <-c // _STRING rr.Replacement = l.token + println("Replacement", l.token, "A") _, ld, ok := IsDomainName(l.token) if !ok { return nil, &ParseError{f, "bad NAPTR Replacement", l} @@ -703,14 +708,23 @@ func setTXT(h RR_Header, c chan lex, f string) (RR, *ParseError) { rr.Hdr = h // Get the remaining data until we see a NEWLINE + quote := false l := <-c var s string for l.value != _NEWLINE && l.value != _EOF { + println("SEEN", l.value, l.token) switch l.value { case _STRING: - s += l.token + if quote { + s += l.token + } case _BLANK: - s += l.token + if quote { + // _BLANK can only be seen in between txt parts. + return nil, &ParseError{f, "bad TXT Txt", l} + } + case _QUOTE: + quote = !quote default: return nil, &ParseError{f, "bad TXT", l} }