monotone

monotone Mtn Source Tree

Root/idna/stringprep.c

1/* stringprep.cCore stringprep implementation.
2 * Copyright (C) 2002, 2003 Simon Josefsson
3 *
4 * This file is part of GNU Libidn.
5 *
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 *
20 */
21
22#if HAVE_CONFIG_H
23# include "config.h"
24#endif
25
26#include <stdlib.h>
27#include <string.h>
28
29#include "idna/stringprep.h"
30
31static ssize_t
32stringprep_find_character_in_table (uint32_t ucs4,
33 const Stringprep_table_element * table)
34{
35 ssize_t i;
36
37 /* During self tests, this is where it spends its CPU time and
38 causes most cache misses. Do a binary search? */
39
40 for (i = 0; table[i].start; i++)
41 if (ucs4 >= table[i].start &&
42ucs4 <= (table[i].end ? table[i].end : table[i].start))
43 return i;
44
45 return -1;
46}
47
48static ssize_t
49stringprep_find_string_in_table (uint32_t * ucs4,
50 size_t ucs4len,
51 size_t * tablepos,
52 const Stringprep_table_element * table)
53{
54 size_t j;
55 ssize_t pos;
56
57 for (j = 0; j < ucs4len; j++)
58 if ((pos = stringprep_find_character_in_table (ucs4[j], table)) != -1)
59 {
60if (tablepos)
61 *tablepos = pos;
62return j;
63 }
64
65 return -1;
66}
67
68static int
69stringprep_apply_table_to_string (uint32_t * ucs4,
70 size_t * ucs4len,
71 size_t maxucs4len,
72 const Stringprep_table_element * table)
73{
74 ssize_t pos;
75 size_t i, maplen;
76
77 while ((pos = stringprep_find_string_in_table (ucs4, *ucs4len,
78 &i, table)) != -1)
79 {
80 for (maplen = STRINGPREP_MAX_MAP_CHARS;
81 maplen > 0 && table[i].map[maplen - 1] == 0; maplen--)
82;
83
84 if (*ucs4len - 1 + maplen >= maxucs4len)
85return STRINGPREP_TOO_SMALL_BUFFER;
86
87 memmove (&ucs4[pos + maplen], &ucs4[pos + 1],
88 sizeof (uint32_t) * (*ucs4len - pos - 1));
89 memcpy (&ucs4[pos], table[i].map, sizeof (uint32_t) * maplen);
90 *ucs4len = *ucs4len - 1 + maplen;
91 }
92
93 return STRINGPREP_OK;
94}
95
96#define INVERTED(x) ((x) & ((~0UL) >> 1))
97#define UNAPPLICAPLEFLAGS(flags, profileflags) \
98 ((!INVERTED(profileflags) && !(profileflags & flags) && profileflags) || \
99 ( INVERTED(profileflags) && (profileflags & flags)))
100
101/**
102 * stringprep_4i:
103 * @ucs4: input/output array with string to prepare.
104 * @len: on input, length of input array with Unicode code points,
105 * on exit, length of output array with Unicode code points.
106 * @maxucs4len: maximum length of input/output array.
107 * @flags: stringprep profile flags, or 0.
108 * @profile: pointer to stringprep profile to use.
109 *
110 * Prepare the input UCS-4 string according to the stringprep profile,
111 * and write back the result to the input string.
112 *
113 * The input is not required to be zero terminated (@ucs4[@len] = 0).
114 * The output will not be zero terminated unless @ucs4[@len] = 0.
115 * Instead, see stringprep_4zi() if your input is zero terminated or
116 * if you want the output to be.
117 *
118 * Since the stringprep operation can expand the string, @maxucs4len
119 * indicate how large the buffer holding the string is. This function
120 * will not read or write to code points outside that size.
121 *
122 * The @flags are one of Stringprep_profile_flags, or 0.
123 *
124 * The @profile contain the instructions to perform. Your application
125 * can define new profiles, possibly re-using the generic stringprep
126 * tables that always will be part of the library, or use one of the
127 * currently supported profiles.
128 *
129 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
130 **/
131int
132stringprep_4i (uint32_t * ucs4, size_t * len, size_t maxucs4len,
133 Stringprep_profile_flags flags,
134 const Stringprep_profile * profile)
135{
136 size_t i, j;
137 ssize_t k;
138 size_t ucs4len = *len;
139 int rc;
140
141 for (i = 0; profile[i].operation; i++)
142 {
143 switch (profile[i].operation)
144{
145case STRINGPREP_NFKC:
146 {
147 uint32_t *q = 0;
148
149 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
150 break;
151
152 if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
153 /* Profile requires NFKC, but callee asked for no NFKC. */
154 return STRINGPREP_FLAG_ERROR;
155
156 q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
157 if (!q)
158 return STRINGPREP_NFKC_FAILED;
159
160 for (ucs4len = 0; q[ucs4len]; ucs4len++)
161 ;
162
163 if (ucs4len >= maxucs4len)
164 {
165free (q);
166return STRINGPREP_TOO_SMALL_BUFFER;
167 }
168
169 memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
170
171 free (q);
172 }
173 break;
174
175case STRINGPREP_PROHIBIT_TABLE:
176 k = stringprep_find_string_in_table (ucs4, ucs4len,
177 NULL, profile[i].table);
178 if (k != -1)
179 return STRINGPREP_CONTAINS_PROHIBITED;
180 break;
181
182case STRINGPREP_UNASSIGNED_TABLE:
183 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
184 break;
185 if (flags & STRINGPREP_NO_UNASSIGNED)
186 {
187 k = stringprep_find_string_in_table
188(ucs4, ucs4len, NULL, profile[i].table);
189 if (k != -1)
190return STRINGPREP_CONTAINS_UNASSIGNED;
191 }
192 break;
193
194case STRINGPREP_MAP_TABLE:
195 if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
196 break;
197 rc = stringprep_apply_table_to_string
198 (ucs4, &ucs4len, maxucs4len, profile[i].table);
199 if (rc != STRINGPREP_OK)
200 return rc;
201 break;
202
203case STRINGPREP_BIDI_PROHIBIT_TABLE:
204case STRINGPREP_BIDI_RAL_TABLE:
205case STRINGPREP_BIDI_L_TABLE:
206 break;
207
208case STRINGPREP_BIDI:
209 {
210 int done_prohibited = 0;
211 int done_ral = 0;
212 int done_l = 0;
213 int contains_ral = -1;
214 int contains_l = -1;
215
216 for (j = 0; profile[j].operation; j++)
217 if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
218{
219 done_prohibited = 1;
220 k = stringprep_find_string_in_table (ucs4, ucs4len,
221 NULL,
222 profile[j].table);
223 if (k != -1)
224 return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
225}
226 else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
227{
228 done_ral = 1;
229 if (stringprep_find_string_in_table
230 (ucs4, ucs4len, NULL, profile[j].table) != -1)
231 contains_ral = j;
232}
233 else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
234{
235 done_l = 1;
236 if (stringprep_find_string_in_table
237 (ucs4, ucs4len, NULL, profile[j].table) != -1)
238 contains_l = j;
239}
240
241 if (!done_prohibited || !done_ral || !done_l)
242 return STRINGPREP_PROFILE_ERROR;
243
244 if (contains_ral != -1 && contains_l != -1)
245 return STRINGPREP_BIDI_BOTH_L_AND_RAL;
246
247 if (contains_ral != -1)
248 {
249if (!(stringprep_find_character_in_table
250 (ucs4[0], profile[contains_ral].table) != -1 &&
251 stringprep_find_character_in_table
252 (ucs4[ucs4len - 1], profile[contains_ral].table) != -1))
253 return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
254 }
255 }
256 break;
257
258default:
259 return STRINGPREP_PROFILE_ERROR;
260 break;
261}
262 }
263
264 *len = ucs4len;
265
266 return STRINGPREP_OK;
267}
268
269static int
270stringprep_4zi_1 (uint32_t * ucs4, size_t ucs4len, size_t maxucs4len,
271 Stringprep_profile_flags flags,
272 const Stringprep_profile * profile)
273{
274 int rc;
275
276 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
277 if (rc != STRINGPREP_OK)
278 return rc;
279
280 if (ucs4len >= maxucs4len)
281 return STRINGPREP_TOO_SMALL_BUFFER;
282
283 ucs4[ucs4len] = 0;
284
285 return STRINGPREP_OK;
286}
287
288/**
289 * stringprep_4zi:
290 * @ucs4: input/output array with zero terminated string to prepare.
291 * @maxucs4len: maximum length of input/output array.
292 * @flags: stringprep profile flags, or 0.
293 * @profile: pointer to stringprep profile to use.
294 *
295 * Prepare the input zero terminated UCS-4 string according to the
296 * stringprep profile, and write back the result to the input string.
297 *
298 * Since the stringprep operation can expand the string, @maxucs4len
299 * indicate how large the buffer holding the string is. This function
300 * will not read or write to code points outside that size.
301 *
302 * The @flags are one of Stringprep_profile_flags, or 0.
303 *
304 * The @profile contain the instructions to perform. Your application
305 * can define new profiles, possibly re-using the generic stringprep
306 * tables that always will be part of the library, or use one of the
307 * currently supported profiles.
308 *
309 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
310 **/
311int
312stringprep_4zi (uint32_t * ucs4, size_t maxucs4len,
313Stringprep_profile_flags flags,
314const Stringprep_profile * profile)
315{
316 size_t ucs4len;
317
318 for (ucs4len = 0; ucs4len < maxucs4len && ucs4[ucs4len] != 0; ucs4len++)
319 ;
320
321 return stringprep_4zi_1 (ucs4, ucs4len, maxucs4len, flags, profile);
322}
323
324/**
325 * stringprep:
326 * @in: input/ouput array with string to prepare.
327 * @maxlen: maximum length of input/output array.
328 * @flags: stringprep profile flags, or 0.
329 * @profile: pointer to stringprep profile to use.
330 *
331 * Prepare the input zero terminated UTF-8 string according to the
332 * stringprep profile, and write back the result to the input string.
333 *
334 * Note that you must convert strings entered in the systems locale
335 * into UTF-8 before using this function, see
336 * stringprep_locale_to_utf8().
337 *
338 * Since the stringprep operation can expand the string, @maxlen
339 * indicate how large the buffer holding the string is. This function
340 * will not read or write to characters outside that size.
341 *
342 * The @flags are one of Stringprep_profile_flags, or 0.
343 *
344 * The @profile contain the instructions to perform. Your application
345 * can define new profiles, possibly re-using the generic stringprep
346 * tables that always will be part of the library, or use one of the
347 * currently supported profiles.
348 *
349 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
350 **/
351int
352stringprep (char *in,
353 size_t maxlen,
354 Stringprep_profile_flags flags,
355 const Stringprep_profile * profile)
356{
357 int rc;
358 char *utf8 = NULL;
359 uint32_t *ucs4 = NULL;
360 size_t ucs4len, maxucs4len, adducs4len = 50;
361
362 do
363 {
364 if (ucs4)
365free (ucs4);
366 ucs4 = stringprep_utf8_to_ucs4 (in, -1, &ucs4len);
367 maxucs4len = ucs4len + adducs4len;
368 ucs4 = realloc (ucs4, maxucs4len * sizeof (uint32_t));
369 if (!ucs4)
370return STRINGPREP_MALLOC_ERROR;
371
372 rc = stringprep_4i (ucs4, &ucs4len, maxucs4len, flags, profile);
373 adducs4len += 50;
374 }
375 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
376 if (rc != STRINGPREP_OK)
377 {
378 free (ucs4);
379 return rc;
380 }
381
382 utf8 = stringprep_ucs4_to_utf8 (ucs4, ucs4len, 0, 0);
383 free (ucs4);
384 if (!utf8)
385 return STRINGPREP_MALLOC_ERROR;
386
387 if (strlen (utf8) >= maxlen)
388 {
389 free (utf8);
390 return STRINGPREP_TOO_SMALL_BUFFER;
391 }
392
393 strcpy (in, utf8);/* flawfinder: ignore */
394
395 free (utf8);
396
397 return STRINGPREP_OK;
398}
399
400/**
401 * stringprep_profile:
402 * @in: input array with UTF-8 string to prepare.
403 * @out: output variable with pointer to newly allocate string.
404 * @profile: name of stringprep profile to use.
405 * @flags: stringprep profile flags, or 0.
406 *
407 * Prepare the input zero terminated UTF-8 string according to the
408 * stringprep profile, and return the result in a newly allocated
409 * variable.
410 *
411 * Note that you must convert strings entered in the systems locale
412 * into UTF-8 before using this function, see
413 * stringprep_locale_to_utf8().
414 *
415 * The output @out variable must be deallocated by the caller.
416 *
417 * The @flags are one of Stringprep_profile_flags, or 0.
418 *
419 * The @profile specifies the name of the stringprep profile to use.
420 * It must be one of the internally supported stringprep profiles.
421 *
422 * Return value: Returns %STRINGPREP_OK iff successful, or an error code.
423 **/
424int
425stringprep_profile (const char *in,
426 char **out,
427 const char *profile,
428 Stringprep_profile_flags flags)
429{
430 const Stringprep_profiles *p;
431 char *str = NULL;
432 size_t len = strlen (in) + 1;
433 int rc;
434
435 for (p = &stringprep_profiles[0]; p->name; p++)
436 if (strcmp (p->name, profile) == 0)
437 break;
438
439 if (!p || !p->name || !p->tables)
440 return STRINGPREP_UNKNOWN_PROFILE;
441
442 do
443 {
444 if (str)
445free (str);
446 str = (char *) malloc (len);
447 if (str == NULL)
448return STRINGPREP_MALLOC_ERROR;
449
450 strcpy (str, in);
451
452 rc = stringprep (str, len, flags, p->tables);
453 len += 50;
454 }
455 while (rc == STRINGPREP_TOO_SMALL_BUFFER);
456
457 if (rc == STRINGPREP_OK)
458 *out = str;
459 else
460 free (str);
461
462 return rc;
463}
464
465/*! \mainpage GNU Internationalized Domain Name Library
466 *
467 * \section intro Introduction
468 *
469 * GNU Libidn is an implementation of the Stringprep, Punycode and IDNA
470 * specifications defined by the IETF Internationalized Domain Names
471 * (IDN) working group, used for internationalized domain names. The
472 * package is available under the GNU Lesser General Public License.
473 *
474 * The library contains a generic Stringprep implementation that does
475 * Unicode 3.2 NFKC normalization, mapping and prohibitation of
476 * characters, and bidirectional character handling. Profiles for iSCSI,
477 * Kerberos 5, Nameprep, SASL and XMPP are included. Punycode and ASCII
478 * Compatible Encoding (ACE) via IDNA are supported.
479 *
480 * The Stringprep API consists of two main functions, one for converting
481 * data from the system's native representation into UTF-8, and one
482 * function to perform the Stringprep processing. Adding a new
483 * Stringprep profile for your application within the API is
484 * straightforward. The Punycode API consists of one encoding function
485 * and one decoding function. The IDNA API consists of the ToASCII and
486 * ToUnicode functions, as well as an high-level interface for converting
487 * entire domain names to and from the ACE encoded form.
488 *
489 * The library is used by, e.g., GNU SASL and Shishi to process user
490 * names and passwords. Libidn can be built into GNU Libc to enable a
491 * new system-wide getaddrinfo() flag for IDN processing.
492 *
493 * Libidn is developed for the GNU/Linux system, but runs on over 20 Unix
494 * platforms (including Solaris, IRIX, AIX, and Tru64) and Windows.
495 * Libidn is written in C and (parts of) the API is accessible from C,
496 * C++, Emacs Lisp, Python and Java.
497 *
498 * The project web page:\n
499 * http://www.gnu.org/software/libidn/
500 *
501 * The software archive:\n
502 * ftp://alpha.gnu.org/pub/gnu/libidn/
503 *
504 * For more information see:\n
505 * http://www.ietf.org/html.charters/idn-charter.html\n
506 * http://www.ietf.org/rfc/rfc3454.txt (stringprep specification)\n
507 * http://www.ietf.org/rfc/rfc3490.txt (idna specification)\n
508 * http://www.ietf.org/rfc/rfc3491.txt (nameprep specification)\n
509 * http://www.ietf.org/rfc/rfc3492.txt (punycode specification)\n
510 * http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-04.txt\n
511 * http://www.ietf.org/internet-drafts/draft-ietf-krb-wg-utf8-profile-01.txt\n
512 * http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt\n
513 * http://www.ietf.org/internet-drafts/draft-ietf-sasl-saslprep-00.txt\n
514 * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt\n
515 * http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt\n
516 *
517 * Further information and paid contract development:\n
518 * Simon Josefsson <simon@josefsson.org>
519 *
520 * \section examples Examples
521 *
522 * \include example.c
523 * \include example3.c
524 * \include example4.c
525 */
526
527/**
528 * STRINGPREP_VERSION
529 *
530 * String defined via CPP denoting the header file version number.
531 * Used together with stringprep_check_version() to verify header file
532 * and run-time library consistency.
533 */
534
535/**
536 * STRINGPREP_MAX_MAP_CHARS
537 *
538 * Maximum number of code points that can replace a single code point,
539 * during stringprep mapping.
540 */
541
542/**
543 * Stringprep_rc:
544 * @STRINGPREP_OK: Successful operation. This value is guaranteed to
545 * always be zero, the remaining ones are only guaranteed to hold
546 * non-zero values, for logical comparison purposes.
547 * @STRINGPREP_CONTAINS_UNASSIGNED: String contain unassigned Unicode
548 * code points, which is forbidden by the profile.
549 * @STRINGPREP_CONTAINS_PROHIBITED: String contain code points
550 * prohibited by the profile.
551 * @STRINGPREP_BIDI_BOTH_L_AND_RAL: String contain code points with
552 * conflicting bidirection category.
553 * @STRINGPREP_BIDI_LEADTRAIL_NOT_RAL: Leading and trailing character
554 * in string not of proper bidirectional category.
555 * @STRINGPREP_BIDI_CONTAINS_PROHIBITED: Contains prohibited code
556 * points detected by bidirectional code.
557 * @STRINGPREP_TOO_SMALL_BUFFER: Buffer handed to function was too
558 * small. This usually indicate a problem in the calling
559 * application.
560 * @STRINGPREP_PROFILE_ERROR: The stringprep profile was inconsistent.
561 * This usually indicate an internal error in the library.
562 * @STRINGPREP_FLAG_ERROR: The supplied flag conflicted with profile.
563 * This usually indicate a problem in the calling application.
564 * @STRINGPREP_UNKNOWN_PROFILE: The supplied profile name was not
565 * known to the library.
566 * @STRINGPREP_NFKC_FAILED: The Unicode NFKC operation failed. This
567 * usually indicate an internal error in the library.
568 * @STRINGPREP_MALLOC_ERROR: The malloc() was out of memory. This is
569 * usually a fatal error.
570 *
571 * Enumerated return codes of stringprep(), stringprep_profile()
572 * functions (and macros using those functions). The value 0 is
573 * guaranteed to always correspond to success.
574 */
575
576/**
577 * Stringprep_profile_flags:
578 * @STRINGPREP_NO_NFKC: Disable the NFKC normalization, as well as
579 * selecting the non-NFKC case folding tables. Usually the profile
580 * specifies BIDI and NFKC settings, and applications should not
581 * override it unless in special situations.
582 * @STRINGPREP_NO_BIDI: Disable the BIDI step. Usually the profile
583 * specifies BIDI and NFKC settings, and applications should not
584 * override it unless in special situations.
585 * @STRINGPREP_NO_UNASSIGNED: Make the library return with an error if
586 * string contains unassigned characters according to profile.
587 *
588 * Stringprep profile flags.
589 */
590
591/**
592 * Stringprep_profile_steps:
593 *
594 * Various steps in the stringprep algorithm. You really want to
595 * study the source code to understand this one. Only useful if you
596 * want to add another profile.
597 */
598
599/**
600 * stringprep_nameprep:
601 * @in: input/ouput array with string to prepare.
602 * @maxlen: maximum length of input/output array.
603 *
604 * Prepare the input UTF-8 string according to the nameprep profile.
605 * The AllowUnassigned flag is true, use
606 * stringprep_nameprep_no_unassigned() if you want a false
607 * AllowUnassigned. Returns 0 iff successful, or an error code.
608 **/
609
610/**
611 * stringprep_nameprep_no_unassigned:
612 * @in: input/ouput array with string to prepare.
613 * @maxlen: maximum length of input/output array.
614 *
615 * Prepare the input UTF-8 string according to the nameprep profile.
616 * The AllowUnassigned flag is false, use stringprep_nameprep() for
617 * true AllowUnassigned. Returns 0 iff successful, or an error code.
618 **/
619
620/**
621 * stringprep_iscsi:
622 * @in: input/ouput array with string to prepare.
623 * @maxlen: maximum length of input/output array.
624 *
625 * Prepare the input UTF-8 string according to the draft iSCSI
626 * stringprep profile. Returns 0 iff successful, or an error code.
627 **/
628
629/**
630 * stringprep_kerberos5:
631 * @in: input/ouput array with string to prepare.
632 * @maxlen: maximum length of input/output array.
633 *
634 * Prepare the input UTF-8 string according to the draft Kerberos5
635 * stringprep profile. Returns 0 iff successful, or an error code.
636 **/
637
638/**
639 * stringprep_plain:
640 * @in: input/ouput array with string to prepare.
641 * @maxlen: maximum length of input/output array.
642 *
643 * Prepare the input UTF-8 string according to the draft SASL
644 * ANONYMOUS profile. Returns 0 iff successful, or an error code.
645 **/
646
647/**
648 * stringprep_xmpp_nodeprep:
649 * @in: input/ouput array with string to prepare.
650 * @maxlen: maximum length of input/output array.
651 *
652 * Prepare the input UTF-8 string according to the draft XMPP node
653 * identifier profile. Returns 0 iff successful, or an error code.
654 **/
655
656/**
657 * stringprep_xmpp_resourceprep:
658 * @in: input/ouput array with string to prepare.
659 * @maxlen: maximum length of input/output array.
660 *
661 * Prepare the input UTF-8 string according to the draft XMPP resource
662 * identifier profile. Returns 0 iff successful, or an error code.
663 **/

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status