monotone

monotone Mtn Source Tree

Root/pcre/pcre_ucp_searchfuncs.c

1/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2008 University of Cambridge
10
11-----------------------------------------------------------------------------
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36POSSIBILITY OF SUCH DAMAGE.
37-----------------------------------------------------------------------------
38*/
39
40
41/* This module contains code for searching the table of Unicode character
42properties. */
43
44#include "pcre_config.h"
45
46#include "pcre_internal.h"
47
48#include "ucp.h" /* Category definitions */
49#include "ucpinternal.h" /* Internal table details */
50#include "ucptable.h" /* The table itself */
51
52
53/* Table to translate from particular type value to the general value. */
54
55static const int ucp_gentype[] = {
56 ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
57 ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
58 ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
59 ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
60 ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
61 ucp_P, ucp_P, /* Ps, Po */
62 ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
63 ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
64};
65
66
67
68/*************************************************
69* Search table and return type *
70*************************************************/
71
72/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
73character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
74
75Arguments:
76 c the character value
77 type_ptr the detailed character type is returned here
78 script_ptr the script is returned here
79
80Returns: the character type category
81*/
82
83int
84_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
85{
86int bot = 0;
87int top = sizeof(ucp_table)/sizeof(cnode);
88int mid;
89
90/* The table is searched using a binary chop. You might think that using
91intermediate variables to hold some of the common expressions would speed
92things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
93makes things a lot slower. */
94
95for (;;)
96 {
97 if (top <= bot)
98 {
99 *type_ptr = ucp_Cn;
100 *script_ptr = ucp_Common;
101 return ucp_C;
102 }
103 mid = (bot + top) >> 1;
104 if (c == (ucp_table[mid].f0 & f0_charmask)) break;
105 if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
106 else
107 {
108 if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
109 c <= (ucp_table[mid].f0 & f0_charmask) +
110 (ucp_table[mid].f1 & f1_rangemask)) break;
111 bot = mid + 1;
112 }
113 }
114
115/* Found an entry in the table. Set the script and detailed type values, and
116return the general type. */
117
118*script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
119*type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
120
121return ucp_gentype[*type_ptr];
122}
123
124
125
126/*************************************************
127* Search table and return other case *
128*************************************************/
129
130/* If the given character is a letter, and there is another case for the
131letter, return the other case. Otherwise, return -1.
132
133Arguments:
134 c the character value
135
136Returns: the other case or NOTACHAR if none
137*/
138
139unsigned int
140_pcre_ucp_othercase(const unsigned int c)
141{
142int bot = 0;
143int top = sizeof(ucp_table)/sizeof(cnode);
144int mid, offset;
145
146/* The table is searched using a binary chop. You might think that using
147intermediate variables to hold some of the common expressions would speed
148things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
149makes things a lot slower. */
150
151for (;;)
152 {
153 if (top <= bot) return -1;
154 mid = (bot + top) >> 1;
155 if (c == (ucp_table[mid].f0 & f0_charmask)) break;
156 if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
157 else
158 {
159 if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
160 c <= (ucp_table[mid].f0 & f0_charmask) +
161 (ucp_table[mid].f1 & f1_rangemask)) break;
162 bot = mid + 1;
163 }
164 }
165
166/* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
167return the other case if there is one, else NOTACHAR. */
168
169if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
170
171offset = ucp_table[mid].f1 & f1_casemask;
172if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
173return (offset == 0)? NOTACHAR : c + offset;
174}
175
176
177/* End of pcre_ucp_searchfuncs.c */

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status