Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Roland Shoemaker 2015-07-20 13:23:13 -07:00
commit eefb8a37ac
4 changed files with 2416 additions and 9 deletions

View File

@ -37,6 +37,7 @@ A not-so-up-to-date-list-that-may-be-actually-current:
* https://github.com/StalkR/dns-reverse-proxy * https://github.com/StalkR/dns-reverse-proxy
* https://github.com/tianon/rawdns * https://github.com/tianon/rawdns
* https://mesosphere.github.io/mesos-dns/ * https://mesosphere.github.io/mesos-dns/
* https://pulse.turbobytes.com/
Send pull request if you want to be listed here. Send pull request if you want to be listed here.

2346
idn/code_points.go Normal file

File diff suppressed because it is too large Load Diff

View File

@ -3,9 +3,10 @@ package idn
import ( import (
"bytes" "bytes"
"github.com/miekg/dns"
"strings" "strings"
"unicode" "unicode"
"github.com/miekg/dns"
) )
// Implementation idea from RFC itself and from from IDNA::Punycode created by // Implementation idea from RFC itself and from from IDNA::Punycode created by
@ -26,8 +27,8 @@ const (
) )
// ToPunycode converts unicode domain names to DNS-appropriate punycode names. // ToPunycode converts unicode domain names to DNS-appropriate punycode names.
// This function would return incorrect result for strings for non-canonical // This function would return an empty string result for domain names with
// unicode strings. // invalid unicode strings. This function expects domain names in lowercase.
func ToPunycode(s string) string { func ToPunycode(s string) string {
tokens := dns.SplitDomainName(s) tokens := dns.SplitDomainName(s)
switch { switch {
@ -40,7 +41,11 @@ func ToPunycode(s string) string {
} }
for i := range tokens { for i := range tokens {
tokens[i] = string(encode([]byte(tokens[i]))) t := encode([]byte(tokens[i]))
if t == nil {
return ""
}
tokens[i] = string(t)
} }
return strings.Join(tokens, ".") return strings.Join(tokens, ".")
} }
@ -138,12 +143,18 @@ func tfunc(k, bias rune) rune {
return k - bias return k - bias
} }
// encode transforms Unicode input bytes (that represent DNS label) into punycode bytestream // encode transforms Unicode input bytes (that represent DNS label) into
// punycode bytestream. This function would return nil if there's an invalid
// character in the label.
func encode(input []byte) []byte { func encode(input []byte) []byte {
n, bias := _N, _BIAS n, bias := _N, _BIAS
b := bytes.Runes(input) b := bytes.Runes(input)
for i := range b { for i := range b {
if !isValidRune(b[i]) {
return nil
}
b[i] = preprune(b[i]) b[i] = preprune(b[i])
} }
@ -267,3 +278,34 @@ func decode(b []byte) []byte {
} }
return ret.Bytes() return ret.Bytes()
} }
// isValidRune checks if the character is valid. We will look for the
// character property in the code points list. For now we aren't checking special
// rules in case of contextual property
func isValidRune(r rune) bool {
return findProperty(r) == propertyPVALID
}
// findProperty will try to check the code point property of the given
// character. It will use a binary search algorithm as we have a slice of
// ordered ranges (average case performance O(log n))
func findProperty(r rune) property {
imin, imax := 0, len(codePoints)
for imax >= imin {
imid := (imin + imax) / 2
codePoint := codePoints[imid]
if (codePoint.start == r && codePoint.end == 0) || (codePoint.start <= r && codePoint.end >= r) {
return codePoint.state
}
if (codePoint.end > 0 && codePoint.end < r) || (codePoint.end == 0 && codePoint.start < r) {
imin = imid + 1
} else {
imax = imid - 1
}
}
return propertyUnknown
}

View File

@ -13,13 +13,13 @@ var testcases = [][2]string{
{"AbC", "abc"}, {"AbC", "abc"},
{"я", "xn--41a"}, {"я", "xn--41a"},
{"zя", "xn--z-0ub"}, {"zя", "xn--z-0ub"},
{"ЯZ", "xn--z-zub"}, {"яZ", "xn--z-zub"},
{"а-я", "xn----7sb8g"}, {"а-я", "xn----7sb8g"},
{"إختبار", "xn--kgbechtv"}, {"إختبار", "xn--kgbechtv"},
{"آزمایشی", "xn--hgbk6aj7f53bba"}, {"آزمایشی", "xn--hgbk6aj7f53bba"},
{"测试", "xn--0zwm56d"}, {"测试", "xn--0zwm56d"},
{"測試", "xn--g6w251d"}, {"測試", "xn--g6w251d"},
{"Испытание", "xn--80akhbyknj4f"}, {"испытание", "xn--80akhbyknj4f"},
{"परीक्षा", "xn--11b5bs3a9aj6g"}, {"परीक्षा", "xn--11b5bs3a9aj6g"},
{"δοκιμή", "xn--jxalpdlp"}, {"δοκιμή", "xn--jxalpdlp"},
{"테스트", "xn--9t4b11yi5a"}, {"테스트", "xn--9t4b11yi5a"},
@ -27,6 +27,7 @@ var testcases = [][2]string{
{"テスト", "xn--zckzah"}, {"テスト", "xn--zckzah"},
{"பரிட்சை", "xn--hlcj6aya9esc7a"}, {"பரிட்சை", "xn--hlcj6aya9esc7a"},
{"mamão-com-açúcar", "xn--mamo-com-acar-yeb1e6q"}, {"mamão-com-açúcar", "xn--mamo-com-acar-yeb1e6q"},
{"σ", "xn--4xa"},
} }
func TestEncodeDecodePunycode(t *testing.T) { func TestEncodeDecodePunycode(t *testing.T) {
@ -81,17 +82,34 @@ func TestEncodeDecodeFinalPeriod(t *testing.T) {
} }
} }
var invalid = []string{ var invalidACEs = []string{
"xn--*", "xn--*",
"xn--", "xn--",
"xn---", "xn---",
} }
func TestInvalidPunycode(t *testing.T) { func TestInvalidPunycode(t *testing.T) {
for _, d := range invalid { for _, d := range invalidACEs {
s := FromPunycode(d) s := FromPunycode(d)
if s != d { if s != d {
t.Errorf("Changed invalid name %s to %#v", d, s) t.Errorf("Changed invalid name %s to %#v", d, s)
} }
} }
} }
// You can verify the labels that are valid or not comparing to the Verisign
// website: http://mct.verisign-grs.com/
var invalidUnicodes = []string{
"Σ",
"ЯZ",
"Испытание",
}
func TestInvalidUnicodes(t *testing.T) {
for _, d := range invalidUnicodes {
s := ToPunycode(d)
if s != "" {
t.Errorf("Changed invalid name %s to %#v", d, s)
}
}
}