monotone

monotone Mtn Source Tree

Root/pcrewrap.cc

1// Copyright (C) 2007 Zack Weinberg <zackw@panix.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include "pcrewrap.hh"
12#include "sanity.hh"
13#include <cstring>
14
15// This dirty trick is necessary to prevent the 'pcre' typedef defined by
16// pcre.h from colliding with namespace pcre.
17#define pcre pcre_t
18#include "pcre.h"
19#undef pcre
20
21using std::string;
22
23static NORETURN(void pcre_compile_error(int errcode, char const * err,
24 int erroff, char const * pattern));
25static NORETURN(void pcre_study_error(char const * err, char const * pattern));
26static NORETURN(void pcre_match_error(int errcode));
27
28inline unsigned int
29flags_to_internal(pcre::flags f)
30{
31 using namespace pcre;
32#define C(f_, x) (((f_) & (x)) ? PCRE_##x : 0)
33 unsigned int i = 0;
34 i |= C(f, NEWLINE_CR);
35 i |= C(f, NEWLINE_LF);
36 // NEWLINE_CRLF == NEWLINE_CR|NEWLINE_LF and so is handled above
37 i |= C(f, ANCHORED);
38 i |= C(f, NOTBOL);
39 i |= C(f, NOTEOL);
40 i |= C(f, NOTEMPTY);
41 i |= C(f, CASELESS);
42 i |= C(f, DOLLAR_ENDONLY);
43 i |= C(f, DOTALL);
44 i |= C(f, DUPNAMES);
45 i |= C(f, EXTENDED);
46 i |= C(f, FIRSTLINE);
47 i |= C(f, MULTILINE);
48 i |= C(f, UNGREEDY);
49#undef C
50 return i;
51}
52
53inline unsigned int
54get_capturecount(void const * bd)
55{
56 unsigned int cc;
57 int err = pcre_fullinfo(static_cast<pcre_t const *>(bd), 0,
58 PCRE_INFO_CAPTURECOUNT,
59 static_cast<void *>(&cc));
60 I(err == 0);
61 return cc;
62}
63
64namespace pcre
65{
66 void regex::init(char const * pattern, flags options)
67 {
68 int errcode;
69 int erroff;
70 char const * err;
71 basedat = pcre_compile2(pattern, flags_to_internal(options),
72 &errcode, &err, &erroff, 0);
73 if (!basedat)
74 pcre_compile_error(errcode, err, erroff, pattern);
75
76 pcre_extra *ed = pcre_study(basedat, 0, &err);
77 if (err)
78 pcre_study_error(err, pattern);
79 if (!ed)
80 {
81 // I resent that C++ requires this cast.
82 ed = (pcre_extra *)pcre_malloc(sizeof(pcre_extra));
83 std::memset(ed, 0, sizeof(pcre_extra));
84 }
85
86 // We set a fairly low recursion depth to avoid stack overflow.
87 // Per pcrestack(3), one should assume 500 bytes per recursion;
88 // it should be safe to let pcre have a megabyte of stack, so
89 // that's a depth of 2000, give or take. (For reference, the
90 // default stack limit on Linux is 8MB.)
91 ed->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
92 ed->match_limit_recursion = 2000;
93 extradat = ed;
94 }
95
96 regex::regex(char const * pattern, flags options)
97 {
98 this->init(pattern, options);
99 }
100
101 regex::regex(string const & pattern, flags options)
102 {
103 this->init(pattern.c_str(), options);
104 }
105
106 regex::~regex()
107 {
108 if (basedat)
109 pcre_free(const_cast<pcre_t *>(basedat));
110 if (extradat)
111 pcre_free(const_cast<pcre_extra *>(extradat));
112 }
113
114 bool
115 regex::match(string const & subject, flags options) const
116 {
117 int rc = pcre_exec(basedat, extradat,
118 subject.data(), subject.size(),
119 0, flags_to_internal(options), 0, 0);
120 if (rc == 0)
121 return true;
122 else if (rc == PCRE_ERROR_NOMATCH)
123 return false;
124 else
125 pcre_match_error(rc);
126 }
127} // namespace pcre
128
129// When the library returns an error, these functions discriminate between
130// bugs in monotone and user errors in regexp writing.
131static void
132pcre_compile_error(int errcode, char const * err,
133 int erroff, char const * pattern)
134{
135 // One of the more entertaining things about the PCRE API is that
136 // while the numeric error codes are documented, they do not get
137 // symbolic names.
138
139 switch (errcode)
140 {
141 case 21: // failed to get memory
142 throw std::bad_alloc();
143
144 case 10: // [code allegedly not in use]
145 case 11: // internal error: unexpected repeat
146 case 16: // erroffset passed as NULL
147 case 17: // unknown option bit(s) set
148 case 19: // [code allegedly not in use]
149 case 23: // internal error: code overflow
150 case 33: // [code allegedly not in use]
151 case 50: // [code allegedly not in use]
152 case 52: // internal error: overran compiling workspace
153 case 53: // internal error: previously-checked referenced subpattern
154 // not found
155 throw oops((F("while compiling regex \"%s\": %s") % pattern % err)
156 .str().c_str());
157
158 default:
159 // PCRE fails to distinguish between errors at no position and errors at
160 // character offset 0 in the pattern, so in practice we give the
161 // position-ful variant for all errors, but I'm leaving the == -1 check
162 // here in case PCRE gets fixed.
163 throw informative_failure((erroff == -1
164 ? (F("error in regex \"%s\": %s")
165 % pattern % err)
166 : (F("error near char %d of regex \"%s\": %s")
167 % (erroff + 1) % pattern % err)
168 ).str().c_str());
169 }
170}
171
172static void
173pcre_study_error(char const * err, char const * pattern)
174{
175 // This interface doesn't even *have* error codes.
176 // If the error is not out-of-memory, it's a bug.
177 if (!std::strcmp(err, "failed to get memory"))
178 throw std::bad_alloc();
179 else
180 throw oops((F("while studying regex \"%s\": %s") % pattern % err)
181 .str().c_str());
182}
183
184static void
185pcre_match_error(int errcode)
186{
187 // This interface provides error codes with symbolic constants for them!
188 // But it doesn't provide string versions of them. As most of them
189 // indicate bugs in monotone, it's not worth defining our own strings.
190
191 switch(errcode)
192 {
193 case PCRE_ERROR_NOMEMORY:
194 throw std::bad_alloc();
195
196 case PCRE_ERROR_MATCHLIMIT:
197 throw informative_failure
198 (_("backtrack limit exceeded in regular expression matching"));
199
200 case PCRE_ERROR_RECURSIONLIMIT:
201 throw informative_failure
202 (_("recursion limit exceeded in regular expression matching"));
203
204 case PCRE_ERROR_BADUTF8:
205 case PCRE_ERROR_BADUTF8_OFFSET:
206 throw informative_failure
207 (_("invalid UTF-8 sequence found during regular expression matching"));
208
209 default:
210 throw oops((F("pcre_match returned %d") % errcode)
211 .str().c_str());
212 }
213}
214
215// Local Variables:
216// mode: C++
217// fill-column: 76
218// c-file-style: "gnu"
219// indent-tabs-mode: nil
220// End:
221// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status