monotone

monotone Mtn Source Tree

Root/basic_io.hh

1// Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>
2// 2008 Stephen Leake <stephen_leake@stephe-leake.org>
3//
4// This program is made available under the GNU GPL version 2.0 or
5// greater. See the accompanying file COPYING for details.
6//
7// This program is distributed WITHOUT ANY WARRANTY; without even the
8// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9// PURPOSE.
10
11#ifndef __BASIC_IO_HH__
12#define __BASIC_IO_HH__
13
14#include "vector.hh"
15#include <map>
16
17#include "paths.hh"
18#include "sanity.hh"
19#include "vocab.hh"
20#include "numeric_vocab.hh"
21#include "char_classifiers.hh"
22
23// This file provides parsing and printing primitives used by the
24// higher level parser and printer routines for the datatypes cset,
25// roster/marking_map and revision.
26
27namespace basic_io
28{
29
30 namespace
31 {
32 namespace syms
33 {
34 // general format symbol
35 symbol const format_version("format_version");
36
37 // common symbols
38 symbol const dir("dir");
39 symbol const file("file");
40 symbol const content("content");
41 symbol const size("size");
42 symbol const attr("attr");
43
44 symbol const content_mark("content_mark");
45 }
46 }
47
48 typedef enum
49 {
50 TOK_SYMBOL,
51 TOK_STRING,
52 TOK_HEX,
53 TOK_NONE
54 } token_type;
55
56 struct
57 input_source : public origin_aware
58 {
59 size_t line, col;
60 std::string const & in;
61 std::string::const_iterator curr;
62 std::string name;
63 int lookahead;
64 char c;
65 input_source(std::string const & in, std::string const & nm)
66 : line(1), col(1), in(in), curr(in.begin()),
67 name(nm), lookahead(0), c('\0')
68 {}
69 input_source(std::string const & in, std::string const & nm, origin::type w)
70 : origin_aware(w), line(1), col(1), in(in), curr(in.begin()),
71 name(nm), lookahead(0), c('\0')
72 {}
73
74 inline void peek()
75 {
76 if (LIKELY(curr != in.end()))
77 // we do want to distinguish between EOF and '\xff',
78 // so we translate '\xff' to 255u
79 lookahead = widen<unsigned int,char>(*curr);
80 else
81 lookahead = EOF;
82 }
83
84 inline void advance()
85 {
86 if (LIKELY(curr != in.end()))
87 {
88 c = *curr;
89 ++curr;
90 ++col;
91 if (c == '\n')
92 {
93 col = 1;
94 ++line;
95 }
96 }
97 peek();
98 }
99 void err(std::string const & s);
100 };
101
102 struct
103 tokenizer
104 {
105 input_source & in;
106 std::string::const_iterator begin;
107 std::string::const_iterator end;
108
109 tokenizer(input_source & i) : in(i), begin(in.curr), end(in.curr)
110 {}
111
112 inline void mark()
113 {
114 begin = in.curr;
115 end = begin;
116 }
117
118 inline void advance()
119 {
120 in.advance();
121 end = in.curr;
122 }
123
124 inline void store(std::string & val)
125 {
126 val.assign(begin, end);
127 }
128
129 inline token_type get_token(std::string & val)
130 {
131 in.peek();
132
133 while (true)
134 {
135 if (UNLIKELY(in.lookahead == EOF))
136 return TOK_NONE;
137 if (!is_space(in.lookahead))
138 break;
139 in.advance();
140 }
141
142 if (is_alpha(in.lookahead))
143 {
144 mark();
145 while (is_alnum(in.lookahead) || in.lookahead == '_')
146 advance();
147 store(val);
148 return basic_io::TOK_SYMBOL;
149 }
150 else if (in.lookahead == '[')
151 {
152 in.advance();
153 mark();
154 while (static_cast<char>(in.lookahead) != ']')
155 {
156 if (UNLIKELY(in.lookahead == EOF))
157 in.err("input stream ended in hex string");
158 if (UNLIKELY(!is_xdigit(in.lookahead)))
159 in.err("non-hex character in hex string");
160 advance();
161 }
162
163 store(val);
164
165 if (UNLIKELY(static_cast<char>(in.lookahead) != ']'))
166 in.err("hex string did not end with ']'");
167 in.advance();
168
169 return basic_io::TOK_HEX;
170 }
171 else if (in.lookahead == '"')
172 {
173 in.advance();
174 mark();
175 while (static_cast<char>(in.lookahead) != '"')
176 {
177 if (UNLIKELY(in.lookahead == EOF))
178 in.err("input stream ended in string");
179 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
180 {
181 // Possible escape: we understand escaped quotes and
182 // escaped backslashes. Nothing else. If we // happen to
183 // hit an escape, we stop doing the mark/store // thing
184 // and switch to copying and appending per-character
185 // until the // end of the token.
186
187 // So first, store what we have *before* the escape.
188 store(val);
189
190 // Then skip over the escape backslash.
191 in.advance();
192
193 // Make sure it's an escape we recognize.
194 if (UNLIKELY(!(static_cast<char>(in.lookahead) == '"'
195 ||
196 static_cast<char>(in.lookahead) == '\\')))
197 in.err("unrecognized character escape");
198
199 // Add the escaped character onto the accumulating token.
200 in.advance();
201 val += in.c;
202
203 // Now enter special slow loop for remainder.
204 while (static_cast<char>(in.lookahead) != '"')
205 {
206 if (UNLIKELY(in.lookahead == EOF))
207 in.err("input stream ended in string");
208 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
209 {
210 // Skip over any further escape marker.
211 in.advance();
212 if (UNLIKELY
213 (!(static_cast<char>(in.lookahead) == '"'
214 ||
215 static_cast<char>(in.lookahead) == '\\')))
216 in.err("unrecognized character escape");
217 }
218 in.advance();
219 val += in.c;
220 }
221 // When slow loop completes, return early.
222 if (static_cast<char>(in.lookahead) != '"')
223 in.err("string did not end with '\"'");
224 in.advance();
225
226 return basic_io::TOK_STRING;
227 }
228 advance();
229 }
230
231 store(val);
232
233 if (UNLIKELY(static_cast<char>(in.lookahead) != '"'))
234 in.err("string did not end with '\"'");
235 in.advance();
236
237 return basic_io::TOK_STRING;
238 }
239 else
240 return basic_io::TOK_NONE;
241 }
242 void err(std::string const & s);
243 };
244
245 std::string escape(std::string const & s);
246
247 struct
248 stanza
249 {
250 stanza();
251 size_t indent;
252 std::vector<std::pair<symbol, std::string> > entries;
253 void push_symbol(symbol const & k);
254 void push_hex_pair(symbol const & k, hexenc<id> const & v);
255 void push_binary_pair(symbol const & k, id const & v);
256 void push_binary_triple(symbol const & k, std::string const & n,
257 id const & v);
258 void push_str_pair(symbol const & k, std::string const & v);
259 void push_str_pair(symbol const & k, symbol const & v);
260 void push_str_triple(symbol const & k, std::string const & n,
261 std::string const & v);
262 void push_file_pair(symbol const & k, file_path const & v);
263 void push_str_multi(symbol const & k,
264 std::vector<std::string> const & v);
265 void push_str_multi(symbol const & k1,
266 symbol const & k2,
267 std::vector<std::string> const & v);
268 };
269
270
271 // Note: printer uses a static buffer; thus only one buffer
272 // may be referenced (globally). An invariant will be triggered
273 // if more than one basic_io::printer is instantiated.
274 struct
275 printer
276 {
277 static std::string buf;
278 static int count;
279 printer();
280 ~printer();
281 void print_stanza(stanza const & st);
282 };
283
284 struct
285 parser
286 {
287 tokenizer & tok;
288 parser(tokenizer & t) : tok(t)
289 {
290 token.reserve(128);
291 advance();
292 }
293
294 std::string token;
295 token_type ttype;
296
297 void err(std::string const & s);
298 std::string tt2str(token_type tt);
299
300 inline void advance()
301 {
302 ttype = tok.get_token(token);
303 }
304
305 inline void eat(token_type want)
306 {
307 if (ttype != want)
308 err("wanted "
309 + tt2str(want)
310 + ", got "
311 + tt2str(ttype)
312 + (token.empty()
313 ? std::string("")
314 : (std::string(" with value ") + token)));
315 advance();
316 }
317
318 inline void str() { eat(basic_io::TOK_STRING); }
319 inline void sym() { eat(basic_io::TOK_SYMBOL); }
320 inline void hex() { eat(basic_io::TOK_HEX); }
321
322 inline void str(std::string & v) { v = token; str(); }
323 inline void sym(std::string & v) { v = token; sym(); }
324 inline void hex(std::string & v) { v = token; hex(); }
325 inline bool symp() { return ttype == basic_io::TOK_SYMBOL; }
326 inline bool symp(symbol const & val)
327 {
328 return ttype == basic_io::TOK_SYMBOL && token == val();
329 }
330 inline void esym(symbol const & val)
331 {
332 if (!(ttype == basic_io::TOK_SYMBOL && token == val()))
333 err("wanted symbol '"
334 + val() +
335 + "', got "
336 + tt2str(ttype)
337 + (token.empty()
338 ? std::string("")
339 : (std::string(" with value ") + token)));
340 advance();
341 }
342 };
343
344}
345
346#endif // __BASIC_IO_HH__
347
348// Local Variables:
349// mode: C++
350// fill-column: 76
351// c-file-style: "gnu"
352// indent-tabs-mode: nil
353// End:
354// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status