#ifndef lint static char *rcsid = "$Id: punycode.c,v 1.1.1.1 2003-06-04 00:26:06 marka Exp $"; #endif /* * Copyright (c) 2001,2002 Japan Network Information Center. * All rights reserved. * * By using this file, you agree to the terms and conditions set forth bellow. * * LICENSE TERMS AND CONDITIONS * * The following License Terms and Conditions apply, unless a different * license is obtained from Japan Network Information Center ("JPNIC"), * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, * Chiyoda-ku, Tokyo 101-0047, Japan. * * 1. Use, Modification and Redistribution (including distribution of any * modified or derived work) in source and/or binary forms is permitted * under this License Terms and Conditions. * * 2. Redistribution of source code must retain the copyright notices as they * appear in each source code file, this License Terms and Conditions. * * 3. Redistribution in binary form must reproduce the Copyright Notice, * this License Terms and Conditions, in the documentation and/or other * materials provided with the distribution. For the purposes of binary * distribution the "Copyright Notice" refers to the following language: * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." * * 4. The name of JPNIC may not be used to endorse or promote products * derived from this Software without specific prior written approval of * JPNIC. * * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. */ #include #include #include #include #include #include #include #include #include #include #include #include /* * Although draft-ietf-idn-punycode-00.txt doesn't specify the ACE * signature, we have to choose one. In order to prevent the converted * name from beginning with a hyphen, we should choose a prefix rather * than a suffix. */ #ifndef IDN_PUNYCODE_PREFIX #define IDN_PUNYCODE_PREFIX "xn--" #endif #define INVALID_UCS 0x80000000 #define MAX_UCS 0x10FFFF /* * As the draft states, it is possible that `delta' may overflow during * the encoding. The upper bound of 'delta' is: * <# of chars. of input string> + * * <# of chars. of input string + 1> * For this value not to be greater than 0xffffffff (since the calculation * is done using unsigned long, which is at least 32bit long), the maxmum * input string size is about 3850 characters, which is long enough for * a domain label... */ #define PUNYCODE_MAXINPUT 3800 /* * Parameters. */ #define PUNYCODE_BASE 36 #define PUNYCODE_TMIN 1 #define PUNYCODE_TMAX 26 #define PUNYCODE_SKEW 38 #define PUNYCODE_DAMP 700 #define PUNYCODE_INITIAL_BIAS 72 #define PUNYCODE_INITIAL_N 0x80 static int punycode_getwc(const char *s, size_t len, int bias, unsigned long *vp); static int punycode_putwc(char *s, size_t len, unsigned long delta, int bias); static int punycode_update_bias(unsigned long delta, size_t npoints, int first); idn_result_t idn__punycode_decode(idn_converter_t ctx, void *privdata, const char *from, unsigned long *to, size_t tolen) { unsigned long *to_org = to; unsigned long c, idx; size_t prefixlen = strlen(IDN_PUNYCODE_PREFIX); size_t fromlen; size_t uidx, fidx, ucslen; int first, bias; idn_result_t r; assert(ctx != NULL); TRACE(("idn__punycode_decode(from=\"%s\", tolen=%d)\n", idn__debug_xstring(from, 50), (int)tolen)); if (!idn__util_asciihaveaceprefix(from, IDN_PUNYCODE_PREFIX)) { if (*from == '\0') { r = idn_ucs4_utf8toucs4(from, to, tolen); goto ret; } r = idn_invalid_encoding; goto ret; } from += prefixlen; fromlen = strlen(from); /* * Find the last delimiter, and copy the characters * before it verbatim. */ ucslen = 0; for (fidx = fromlen; fidx > 0; fidx--) { if (from[fidx - 1] == '-') { if (tolen < fidx) { r = idn_buffer_overflow; goto ret; } for (uidx = 0; uidx < fidx - 1; uidx++) { to[uidx] = from[uidx]; } ucslen = uidx; break; } } first = 1; bias = PUNYCODE_INITIAL_BIAS; c = PUNYCODE_INITIAL_N; idx = 0; while (fidx < fromlen) { int len; unsigned long delta; int i; len = punycode_getwc(from + fidx, fromlen - fidx, bias, &delta); if (len == 0) { r = idn_invalid_encoding; goto ret; } fidx += len; bias = punycode_update_bias(delta, ucslen + 1, first); first = 0; idx += delta; c += idx / (ucslen + 1); uidx = idx % (ucslen + 1); /* Insert 'c' at uidx. */ if (tolen-- <= 0) { r = idn_buffer_overflow; goto ret; } for (i = ucslen; i > uidx; i--) to[i] = to[i - 1]; to[uidx] = c; ucslen++; idx = uidx + 1; } /* Terminate with NUL. */ if (tolen <= 0) { r = idn_buffer_overflow; goto ret; } to[ucslen] = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn__punycode_decode(): succcess (to=\"%s\")\n", idn__debug_ucs4xstring(to_org, 50))); } else { TRACE(("idn__punycode_decode(): %s\n", idn_result_tostring(r))); } return (r); } idn_result_t idn__punycode_encode(idn_converter_t ctx, void *privdata, const unsigned long *from, char *to, size_t tolen) { char *to_org = to; unsigned long cur_code, next_code, delta; size_t prefixlen = strlen(IDN_PUNYCODE_PREFIX); size_t fromlen; size_t ucsdone; size_t toidx; int uidx, bias, first; idn_result_t r; assert(ctx != NULL); TRACE(("idn__punycode_encode(from=\"%s\", tolen=%d)\n", idn__debug_ucs4xstring(from, 50), (int)tolen)); if (*from == '\0') { r = idn_ucs4_ucs4toutf8(from, to, tolen); goto ret; } else if (idn__util_ucs4haveaceprefix(from, IDN_PUNYCODE_PREFIX)) { r = idn_prohibited; goto ret; } if (tolen < prefixlen) { r = idn_buffer_overflow; goto ret; } memcpy(to, IDN_PUNYCODE_PREFIX, prefixlen); to += prefixlen; tolen -= prefixlen; fromlen = idn_ucs4_strlen(from); /* * If the input string is too long (actually too long to be sane), * return failure in order to prevent possible overflow. */ if (fromlen > PUNYCODE_MAXINPUT) { ERROR(("idn__punycode_encode(): " "the input string is too long to convert Punycode\n", idn__debug_ucs4xstring(from, 50))); r = idn_failure; goto ret; } ucsdone = 0; /* number of characters processed */ toidx = 0; /* * First, pick up basic code points and copy them to 'to'. */ for (uidx = 0; uidx < fromlen; uidx++) { if (from[uidx] < 0x80) { if (toidx >= tolen) { r = idn_buffer_overflow; goto ret; } to[toidx++] = from[uidx]; ucsdone++; } } /* * If there are any basic code points, output a delimiter * (hyphen-minus). */ if (toidx > 0) { if (toidx >= tolen) { r = idn_buffer_overflow; goto ret; } to[toidx++] = '-'; to += toidx; tolen -= toidx; } /* * Then encode non-basic characters. */ first = 1; cur_code = PUNYCODE_INITIAL_N; bias = PUNYCODE_INITIAL_BIAS; delta = 0; while (ucsdone < fromlen) { int limit = -1, rest; /* * Find the smallest code point equal to or greater * than 'cur_code'. Also remember the index of the * last occurence of the code point. */ for (next_code = MAX_UCS, uidx = fromlen - 1; uidx >= 0; uidx--) { if (from[uidx] >= cur_code && from[uidx] < next_code) { next_code = from[uidx]; limit = uidx; } } /* There must be such code point. */ assert(limit >= 0); delta += (next_code - cur_code) * (ucsdone + 1); cur_code = next_code; /* * Scan the input string again, and encode characters * whose code point is 'cur_code'. Use 'limit' to avoid * unnecessary scan. */ for (uidx = 0, rest = ucsdone; uidx <= limit; uidx++) { if (from[uidx] < cur_code) { delta++; rest--; } else if (from[uidx] == cur_code) { int sz = punycode_putwc(to, tolen, delta, bias); if (sz == 0) { r = idn_buffer_overflow; goto ret; } to += sz; tolen -= sz; ucsdone++; bias = punycode_update_bias(delta, ucsdone, first); delta = 0; first = 0; } } delta += rest + 1; cur_code++; } /* * Terminate with NUL. */ if (tolen <= 0) { r = idn_buffer_overflow; goto ret; } *to = '\0'; r = idn_success; ret: if (r == idn_success) { TRACE(("idn__punycode_encode(): succcess (to=\"%s\")\n", idn__debug_xstring(to_org, 50))); } else { TRACE(("idn__punycode_encode(): %s\n", idn_result_tostring(r))); } return (r); } static int punycode_getwc(const char *s, size_t len, int bias, unsigned long *vp) { size_t orglen = len; unsigned long v = 0, w = 1; int k; for (k = PUNYCODE_BASE - bias; len > 0; k += PUNYCODE_BASE) { int c = *s++; int t = (k < PUNYCODE_TMIN) ? PUNYCODE_TMIN : (k > PUNYCODE_TMAX) ? PUNYCODE_TMAX : k; len--; if ('a' <= c && c <= 'z') c = c - 'a'; else if ('A' <= c && c <= 'Z') c = c - 'A'; else if ('0' <= c && c <= '9') c = c - '0' + 26; else c = -1; if (c < 0) return (0); /* invalid character */ v += c * w; if (c < t) { *vp = v; return (orglen - len); } w *= (PUNYCODE_BASE - t); } return (0); /* final character missing */ } static int punycode_putwc(char *s, size_t len, unsigned long delta, int bias) { const char *punycode_base36 = "abcdefghijklmnopqrstuvwxyz0123456789"; int k; char *sorg = s; for (k = PUNYCODE_BASE - bias; 1; k += PUNYCODE_BASE) { int t = (k < PUNYCODE_TMIN) ? PUNYCODE_TMIN : (k > PUNYCODE_TMAX) ? PUNYCODE_TMAX : k; if (delta < t) break; if (len < 1) return (0); *s++ = punycode_base36[t + ((delta - t) % (PUNYCODE_BASE - t))]; len--; delta = (delta - t) / (PUNYCODE_BASE - t); } if (len < 1) return (0); *s++ = punycode_base36[delta]; return (s - sorg); } static int punycode_update_bias(unsigned long delta, size_t npoints, int first) { int k = 0; delta /= first ? PUNYCODE_DAMP : 2; delta += delta / npoints; while (delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2) { delta /= PUNYCODE_BASE - PUNYCODE_TMIN; k++; } return (PUNYCODE_BASE * k + (((PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta) / (delta + PUNYCODE_SKEW))); }