Updated Punycode funcs

* docstrings
* changes to handling invalid punycode names
* updated tests
This commit is contained in:
Alex Sergeyev 2014-09-13 22:11:53 -04:00
parent e59305ec87
commit 61db5487dd
2 changed files with 48 additions and 10 deletions

View File

@ -23,6 +23,9 @@ const (
_PREFIX = "xn--"
)
// ToPunycode converts unicode domain names to DNS-appropriate punycode names.
// This function would return incorrect result for strings for non-canonical
// unicode strings.
func ToPunycode(s string) string {
tokens := bytes.Split([]byte(s), []byte{'.'})
for i := range tokens {
@ -31,6 +34,7 @@ func ToPunycode(s string) string {
return string(bytes.Join(tokens, []byte{'.'}))
}
// FromPunycode returns uncode domain name from provided punycode string.
func FromPunycode(s string) string {
tokens := bytes.Split([]byte(s), []byte{'.'})
for i := range tokens {
@ -40,6 +44,8 @@ func FromPunycode(s string) string {
}
// digitval converts single byte into meaningful value that's used to calculate decoded unicode character.
const errdigit = 0xffff
func digitval(code rune) rune {
switch {
case code >= 'A' && code <= 'Z':
@ -49,7 +55,7 @@ func digitval(code rune) rune {
case code >= '0' && code <= '9':
return code - '0' + 26
}
panic("dns: not reached")
return errdigit
}
// lettercode finds BASE36 byte (a-z0-9) based on calculated number.
@ -93,8 +99,9 @@ func next(b []rune, boundary rune) rune {
return m
}
// PrepRune should do actions recommended by stringprep (RFC3491) for each unicode char. TODO(asergeyev): work on actual implementation, currently just lowercases Unicode chars.
func PrepRune(r rune) rune {
// preprune converts unicode rune to lower case. At this time it's not
// supporting all things described RFC3454.
func preprune(r rune) rune {
if unicode.IsUpper(r) {
r = unicode.ToLower(r)
}
@ -118,7 +125,7 @@ func encodeBytes(input []byte) []byte {
b := bytes.Runes(input)
for i := range b {
b[i] = PrepRune(b[i])
b[i] = preprune(b[i])
}
basic := make([]byte, 0, len(b))
@ -175,22 +182,32 @@ func encodeBytes(input []byte) []byte {
// decodeBytes transforms punycode input bytes (that represent DNS label) into Unicode bytestream
func decodeBytes(b []byte) []byte {
src := b // b would move and we need to keep it
n, bias := _N, _BIAS
if !bytes.HasPrefix(b, []byte(_PREFIX)) {
return b
}
out := make([]rune, 0, len(b))
b = b[len(_PREFIX):]
pos := bytes.Index(b, []byte{_DELIMITER})
if pos >= 0 {
out = append(out, bytes.Runes(b[:pos])...)
b = b[pos+1:] // trim source string
for pos, x := range b {
if x == _DELIMITER {
out = append(out, bytes.Runes(b[:pos])...)
b = b[pos+1:] // trim source string
break
}
}
if len(b) == 0 {
return src
}
for i := rune(0); len(b) > 0; i++ {
oldi, w, ch := i, rune(1), byte(0)
for k := _BASE; ; k += _BASE {
for k := _BASE; len(b) > 0; k += _BASE {
ch, b = b[0], b[1:]
digit := digitval(rune(ch))
if digit == errdigit {
return src
}
i += digit * w
t := tfunc(k, bias)

View File

@ -25,7 +25,7 @@ var testcases = [][2]string{
{"பரிட்சை", "xn--hlcj6aya9esc7a"},
}
func TestEncodePunycode(t *testing.T) {
func TestEncodeDecodePunycode(t *testing.T) {
for _, tst := range testcases {
enc := encodeBytes([]byte(tst[0]))
if string(enc) != tst[1] {
@ -35,14 +35,35 @@ func TestEncodePunycode(t *testing.T) {
if string(dec) != strings.ToLower(tst[0]) {
t.Errorf("%s decoded as %s but should be %s", tst[1], dec, strings.ToLower(tst[0]))
}
}
}
func TestToFromPunycode(t *testing.T) {
for _, tst := range testcases {
// assert unicode.com == punycode.com
full := ToPunycode(tst[0] + ".com")
if full != tst[1]+".com" {
t.Errorf("invalid result from string conversion to punycode, %s and should be %s.com", full, tst[1])
}
// assert punycode.punycode == unicode.unicode
decoded := FromPunycode(tst[1] + "." + tst[1])
if decoded != strings.ToLower(tst[0]+"."+tst[0]) {
t.Errorf("invalid result from string conversion to punycode, %s and should be %s.%s", decoded, tst[0], tst[0])
}
}
}
var invalid = []string{
"xn--*",
"xn--",
"xn---",
}
func TestInvalidPunycode(t *testing.T) {
for _, d := range invalid {
s := FromPunycode(d)
if s != d {
t.Errorf("Changed invalid name %s to %#v", d, s)
}
}
}