2014-09-11 13:33:26 +10:00
|
|
|
package idn
|
2014-09-10 04:27:24 +10:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"unicode"
|
|
|
|
)
|
|
|
|
|
|
|
|
// See http://tools.ietf.org/html/rfc3492
|
|
|
|
// Implementation idea from RFC itself and from from IDNA::Punycode created by
|
2014-09-11 13:33:26 +10:00
|
|
|
// Tatsuhiko Miyagawa <miyagawa@bulknews.net> and released under Perl Artistic
|
|
|
|
// License in 2002
|
2014-09-10 04:27:24 +10:00
|
|
|
|
|
|
|
const (
|
2014-09-11 13:33:26 +10:00
|
|
|
_MIN rune = 1
|
|
|
|
_MAX rune = 26
|
|
|
|
_SKEW rune = 38
|
|
|
|
_DAMP rune = 700
|
|
|
|
_BASE rune = 36
|
|
|
|
_BIAS rune = 72
|
|
|
|
_N rune = 128
|
|
|
|
|
|
|
|
_DELIMITER = '-'
|
|
|
|
_PREFIX = "xn--"
|
2014-09-10 04:27:24 +10:00
|
|
|
)
|
|
|
|
|
2014-09-14 12:11:53 +10:00
|
|
|
// ToPunycode converts unicode domain names to DNS-appropriate punycode names.
|
|
|
|
// This function would return incorrect result for strings for non-canonical
|
|
|
|
// unicode strings.
|
2014-09-11 13:33:26 +10:00
|
|
|
func ToPunycode(s string) string {
|
|
|
|
tokens := bytes.Split([]byte(s), []byte{'.'})
|
|
|
|
for i := range tokens {
|
|
|
|
tokens[i] = encodeBytes(tokens[i])
|
|
|
|
}
|
|
|
|
return string(bytes.Join(tokens, []byte{'.'}))
|
2014-09-10 04:27:24 +10:00
|
|
|
}
|
|
|
|
|
2014-09-14 12:11:53 +10:00
|
|
|
// FromPunycode returns uncode domain name from provided punycode string.
|
2014-09-11 13:33:26 +10:00
|
|
|
func FromPunycode(s string) string {
|
|
|
|
tokens := bytes.Split([]byte(s), []byte{'.'})
|
|
|
|
for i := range tokens {
|
|
|
|
tokens[i] = decodeBytes(tokens[i])
|
|
|
|
}
|
|
|
|
return string(bytes.Join(tokens, []byte{'.'}))
|
2014-09-10 04:27:24 +10:00
|
|
|
}
|
|
|
|
|
2014-09-11 13:33:26 +10:00
|
|
|
// digitval converts single byte into meaningful value that's used to calculate decoded unicode character.
|
2014-09-14 12:11:53 +10:00
|
|
|
const errdigit = 0xffff
|
|
|
|
|
2014-09-11 13:33:26 +10:00
|
|
|
func digitval(code rune) rune {
|
2014-09-10 04:27:24 +10:00
|
|
|
switch {
|
|
|
|
case code >= 'A' && code <= 'Z':
|
|
|
|
return code - 'A'
|
|
|
|
case code >= 'a' && code <= 'z':
|
|
|
|
return code - 'a'
|
|
|
|
case code >= '0' && code <= '9':
|
|
|
|
return code - '0' + 26
|
|
|
|
}
|
2014-09-14 12:11:53 +10:00
|
|
|
return errdigit
|
2014-09-10 04:27:24 +10:00
|
|
|
}
|
|
|
|
|
2014-09-11 13:33:26 +10:00
|
|
|
// lettercode finds BASE36 byte (a-z0-9) based on calculated number.
|
|
|
|
func lettercode(digit rune) rune {
|
2014-09-10 04:27:24 +10:00
|
|
|
switch {
|
|
|
|
case digit >= 0 && digit <= 25:
|
|
|
|
return digit + 'a'
|
|
|
|
case digit >= 26 && digit <= 36:
|
|
|
|
return digit - 26 + '0'
|
|
|
|
}
|
2014-09-12 11:20:17 +10:00
|
|
|
panic("dns: not reached")
|
2014-09-10 04:27:24 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
// adapt calculates next bias to be used for next iteration delta
|
2014-09-11 13:33:26 +10:00
|
|
|
func adapt(delta rune, numpoints rune, firsttime bool) rune {
|
2014-09-10 04:27:24 +10:00
|
|
|
if firsttime {
|
|
|
|
delta /= _DAMP
|
|
|
|
} else {
|
|
|
|
delta /= 2
|
|
|
|
}
|
|
|
|
|
|
|
|
var k rune
|
|
|
|
for delta = delta + delta/numpoints; delta > (_BASE-_MIN)*_MAX/2; k += _BASE {
|
|
|
|
delta /= _BASE - _MIN
|
|
|
|
}
|
|
|
|
|
|
|
|
return k + ((_BASE-_MIN+1)*delta)/(delta+_SKEW)
|
|
|
|
}
|
|
|
|
|
|
|
|
// next finds minimal rune (one with lowest codepoint value) that should be equal or above boundary.
|
|
|
|
func next(b []rune, boundary rune) rune {
|
|
|
|
if len(b) == 0 {
|
|
|
|
panic("invalid set of runes to determine next one")
|
|
|
|
}
|
|
|
|
m := b[0]
|
|
|
|
for _, x := range b[1:] {
|
|
|
|
if x >= boundary && (m < boundary || x < m) {
|
|
|
|
m = x
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return m
|
|
|
|
}
|
|
|
|
|
2014-09-14 12:11:53 +10:00
|
|
|
// preprune converts unicode rune to lower case. At this time it's not
|
|
|
|
// supporting all things described RFC3454.
|
|
|
|
func preprune(r rune) rune {
|
2014-09-10 04:27:24 +10:00
|
|
|
if unicode.IsUpper(r) {
|
|
|
|
r = unicode.ToLower(r)
|
|
|
|
}
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
|
|
|
|
// tfunc is a function that helps calculate each character weight
|
|
|
|
func tfunc(k, bias rune) rune {
|
|
|
|
switch {
|
|
|
|
case k <= bias:
|
|
|
|
return _MIN
|
|
|
|
case k >= bias+_MAX:
|
|
|
|
return _MAX
|
|
|
|
}
|
|
|
|
return k - bias
|
|
|
|
}
|
|
|
|
|
2014-09-11 13:33:26 +10:00
|
|
|
// encodeBytes transforms Unicode input bytes (that represent DNS label) into punycode bytestream
|
|
|
|
func encodeBytes(input []byte) []byte {
|
2014-09-10 04:27:24 +10:00
|
|
|
n, delta, bias := _N, rune(0), _BIAS
|
|
|
|
|
|
|
|
b := bytes.Runes(input)
|
|
|
|
for i := range b {
|
2014-09-14 12:11:53 +10:00
|
|
|
b[i] = preprune(b[i])
|
2014-09-10 04:27:24 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
basic := make([]byte, 0, len(b))
|
|
|
|
for _, ltr := range b {
|
|
|
|
if ltr <= 0x7f {
|
|
|
|
basic = append(basic, byte(ltr))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
basiclen := rune(len(basic))
|
|
|
|
fulllen := rune(len(b))
|
|
|
|
if basiclen == fulllen {
|
|
|
|
return basic
|
|
|
|
}
|
|
|
|
|
|
|
|
var out bytes.Buffer
|
|
|
|
|
2014-09-11 13:33:26 +10:00
|
|
|
out.WriteString(_PREFIX)
|
2014-09-10 04:27:24 +10:00
|
|
|
if basiclen > 0 {
|
|
|
|
out.Write(basic)
|
2014-09-11 13:33:26 +10:00
|
|
|
out.WriteByte(_DELIMITER)
|
2014-09-10 04:27:24 +10:00
|
|
|
}
|
|
|
|
|
|
|
|
for h := basiclen; h < fulllen; n, delta = n+1, delta+1 {
|
|
|
|
next := next(b, n)
|
|
|
|
s := &bytes.Buffer{}
|
|
|
|
s.WriteRune(next)
|
|
|
|
delta, n = delta+(next-n)*(h+1), next
|
|
|
|
|
|
|
|
for _, ltr := range b {
|
|
|
|
if ltr < n {
|
|
|
|
delta++
|
|
|
|
}
|
|
|
|
if ltr == n {
|
|
|
|
q := delta
|
|
|
|
for k := _BASE; ; k += _BASE {
|
|
|
|
t := tfunc(k, bias)
|
|
|
|
if q < t {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
cp := t + ((q - t) % (_BASE - t))
|
2014-09-11 13:33:26 +10:00
|
|
|
out.WriteRune(lettercode(cp))
|
2014-09-10 04:27:24 +10:00
|
|
|
q = (q - t) / (_BASE - t)
|
|
|
|
}
|
|
|
|
|
2014-09-11 13:33:26 +10:00
|
|
|
out.WriteRune(lettercode(q))
|
2014-09-10 04:27:24 +10:00
|
|
|
|
2014-09-11 13:33:26 +10:00
|
|
|
bias = adapt(delta, h+1, h == basiclen)
|
2014-09-10 04:27:24 +10:00
|
|
|
h, delta = h+1, 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return out.Bytes()
|
|
|
|
}
|
|
|
|
|
2014-09-11 13:33:26 +10:00
|
|
|
// decodeBytes transforms punycode input bytes (that represent DNS label) into Unicode bytestream
|
|
|
|
func decodeBytes(b []byte) []byte {
|
2014-09-14 12:11:53 +10:00
|
|
|
src := b // b would move and we need to keep it
|
|
|
|
|
2014-09-10 04:27:24 +10:00
|
|
|
n, bias := _N, _BIAS
|
2014-09-11 13:33:26 +10:00
|
|
|
if !bytes.HasPrefix(b, []byte(_PREFIX)) {
|
2014-09-10 04:27:24 +10:00
|
|
|
return b
|
|
|
|
}
|
|
|
|
out := make([]rune, 0, len(b))
|
2014-09-11 13:33:26 +10:00
|
|
|
b = b[len(_PREFIX):]
|
2014-09-14 12:11:53 +10:00
|
|
|
for pos, x := range b {
|
|
|
|
if x == _DELIMITER {
|
|
|
|
out = append(out, bytes.Runes(b[:pos])...)
|
|
|
|
b = b[pos+1:] // trim source string
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(b) == 0 {
|
|
|
|
return src
|
2014-09-10 04:27:24 +10:00
|
|
|
}
|
|
|
|
for i := rune(0); len(b) > 0; i++ {
|
|
|
|
oldi, w, ch := i, rune(1), byte(0)
|
2014-09-14 12:11:53 +10:00
|
|
|
for k := _BASE; len(b) > 0; k += _BASE {
|
2014-09-10 04:27:24 +10:00
|
|
|
ch, b = b[0], b[1:]
|
2014-09-11 13:33:26 +10:00
|
|
|
digit := digitval(rune(ch))
|
2014-09-14 12:11:53 +10:00
|
|
|
if digit == errdigit {
|
|
|
|
return src
|
|
|
|
}
|
2014-09-10 04:27:24 +10:00
|
|
|
i += digit * w
|
|
|
|
|
|
|
|
t := tfunc(k, bias)
|
|
|
|
if digit < t {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
w *= _BASE - t
|
|
|
|
}
|
|
|
|
ln := rune(len(out) + 1)
|
2014-09-11 13:33:26 +10:00
|
|
|
bias = adapt(i-oldi, ln, oldi == 0)
|
2014-09-10 04:27:24 +10:00
|
|
|
n += i / ln
|
|
|
|
i = i % ln
|
|
|
|
// insert
|
|
|
|
out = append(out, 0)
|
|
|
|
copy(out[i+1:], out[i:])
|
|
|
|
out[i] = n
|
|
|
|
}
|
|
|
|
|
|
|
|
var ret bytes.Buffer
|
|
|
|
for _, r := range out {
|
|
|
|
ret.WriteRune(r)
|
|
|
|
}
|
|
|
|
return ret.Bytes()
|
|
|
|
}
|