monotone

monotone Mtn Source Tree

Root/basic_io.hh

1#ifndef __BASIC_IO_HH__
2#define __BASIC_IO_HH__
3
4// Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13
14#include <iosfwd>
15#include <string>
16#include <vector>
17#include <map>
18
19#include "paths.hh"
20#include "sanity.hh"
21#include "vocab.hh"
22#include "numeric_vocab.hh"
23
24// This file provides parsing and printing primitives used by the
25// higher level parser and printer routines for the datatypes cset,
26// roster/marking_map and revision.
27
28namespace basic_io
29{
30
31 namespace
32 {
33 namespace syms
34 {
35 // general format symbol
36 symbol const format_version("format_version");
37
38 // roster symbols
39 symbol const dir("dir");
40 symbol const file("file");
41 symbol const content("content");
42 symbol const attr("attr");
43
44 // 'local' roster and marking symbols
45 // FIXME: should these be listed as "general" symbols here as well?
46 symbol const ident("ident");
47 symbol const birth("birth");
48 symbol const dormant_attr("dormant_attr");
49
50 symbol const path_mark("path_mark");
51 symbol const content_mark("content_mark");
52 symbol const attr_mark("attr_mark");
53 }
54 }
55
56 typedef enum
57 {
58 TOK_SYMBOL,
59 TOK_STRING,
60 TOK_HEX,
61 TOK_NONE
62 } token_type;
63
64 struct
65 input_source
66 {
67 size_t line, col;
68 std::string const & in;
69 std::string::const_iterator curr;
70 std::string name;
71 int lookahead;
72 char c;
73 input_source(std::string const & in, std::string const & nm)
74 : line(1), col(1), in(in), curr(in.begin()),
75name(nm), lookahead(0), c('\0')
76 {}
77
78 inline void peek()
79 {
80 if (LIKELY(curr != in.end()))
81 // we do want to distinguish between EOF and '\xff',
82 // so we translate '\xff' to 255u
83lookahead = widen<unsigned int,char>(*curr);
84 else
85lookahead = EOF;
86 }
87
88 inline void advance()
89 {
90 if (LIKELY(curr != in.end()))
91 {
92 c = *curr;
93 ++curr;
94 ++col;
95 if (c == '\n')
96 {
97 col = 1;
98 ++line;
99 }
100 }
101 peek();
102 }
103 void err(std::string const & s);
104 };
105
106 struct
107 tokenizer
108 {
109 input_source & in;
110 std::string::const_iterator begin;
111 std::string::const_iterator end;
112
113 tokenizer(input_source & i) : in(i), begin(in.curr), end(in.curr)
114 {}
115
116 inline void mark()
117 {
118 begin = in.curr;
119 end = begin;
120 }
121
122 inline void advance()
123 {
124 in.advance();
125 end = in.curr;
126 }
127
128 inline void store(std::string & val)
129 {
130 val.assign(begin, end);
131 }
132
133 inline token_type get_token(std::string & val)
134 {
135 in.peek();
136
137 while (true)
138 {
139 if (UNLIKELY(in.lookahead == EOF))
140 return TOK_NONE;
141 if (!is_space(in.lookahead))
142 break;
143 in.advance();
144 }
145
146 if (is_alpha(in.lookahead))
147{
148 mark();
149 while (is_alnum(in.lookahead) || in.lookahead == '_')
150 advance();
151 store(val);
152 return basic_io::TOK_SYMBOL;
153}
154 else if (in.lookahead == '[')
155{
156 in.advance();
157 mark();
158 while (static_cast<char>(in.lookahead) != ']')
159 {
160 if (UNLIKELY(in.lookahead == EOF))
161in.err("input stream ended in hex string");
162 if (UNLIKELY(!is_xdigit(in.lookahead)))
163 in.err("non-hex character in hex string");
164 advance();
165 }
166
167 store(val);
168
169 if (UNLIKELY(static_cast<char>(in.lookahead) != ']'))
170 in.err("hex string did not end with ']'");
171 in.advance();
172
173 return basic_io::TOK_HEX;
174}
175 else if (in.lookahead == '"')
176{
177 in.advance();
178 mark();
179 while (static_cast<char>(in.lookahead) != '"')
180 {
181 if (UNLIKELY(in.lookahead == EOF))
182in.err("input stream ended in string");
183 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
184{
185 // Possible escape: we understand escaped quotes and
186 // escaped backslashes. Nothing else. If we // happen to
187 // hit an escape, we stop doing the mark/store // thing
188 // and switch to copying and appending per-character
189 // until the // end of the token.
190
191 // So first, store what we have *before* the escape.
192 store(val);
193
194 // Then skip over the escape backslash.
195 in.advance();
196
197 // Make sure it's an escape we recognize.
198 if (UNLIKELY(!(static_cast<char>(in.lookahead) == '"'
199 ||
200 static_cast<char>(in.lookahead) == '\\')))
201 in.err("unrecognized character escape");
202
203 // Add the escaped character onto the accumulating token.
204 in.advance();
205 val += in.c;
206
207 // Now enter special slow loop for remainder.
208 while (static_cast<char>(in.lookahead) != '"')
209 {
210 if (UNLIKELY(in.lookahead == EOF))
211 in.err("input stream ended in string");
212 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
213 {
214 // Skip over any further escape marker.
215 in.advance();
216 if (UNLIKELY
217 (!(static_cast<char>(in.lookahead) == '"'
218 ||
219 static_cast<char>(in.lookahead) == '\\')))
220 in.err("unrecognized character escape");
221 }
222 in.advance();
223 val += in.c;
224 }
225 // When slow loop completes, return early.
226 if (static_cast<char>(in.lookahead) != '"')
227 in.err("string did not end with '\"'");
228 in.advance();
229
230 return basic_io::TOK_STRING;
231}
232 advance();
233 }
234
235 store(val);
236
237 if (UNLIKELY(static_cast<char>(in.lookahead) != '"'))
238 in.err("string did not end with '\"'");
239 in.advance();
240
241 return basic_io::TOK_STRING;
242}
243 else
244return basic_io::TOK_NONE;
245 }
246 void err(std::string const & s);
247 };
248
249 std::string escape(std::string const & s);
250
251 struct
252 stanza
253 {
254 stanza();
255 size_t indent;
256 std::vector<std::pair<symbol, std::string> > entries;
257 void push_hex_pair(symbol const & k, hexenc<id> const & v);
258 void push_hex_triple(symbol const & k, std::string const & n,
259 hexenc<id> const & v);
260 void push_str_pair(symbol const & k, std::string const & v);
261 void push_str_triple(symbol const & k, std::string const & n,
262 std::string const & v);
263 void push_file_pair(symbol const & k, file_path const & v);
264 void push_str_multi(symbol const & k,
265 std::vector<std::string> const & v);
266 };
267
268
269 // Note: printer uses a static buffer; thus only one buffer
270 // may be referenced (globally). An invariant will be triggered
271 // if more than one basic_io::printer is instantiated.
272 struct
273 printer
274 {
275 static std::string buf;
276 static int count;
277 printer();
278 ~printer();
279 void print_stanza(stanza const & st);
280 };
281
282 struct
283 parser
284 {
285 tokenizer & tok;
286 parser(tokenizer & t) : tok(t)
287 {
288 token.reserve(128);
289 advance();
290 }
291
292 std::string token;
293 token_type ttype;
294
295 void err(std::string const & s);
296 std::string tt2str(token_type tt);
297
298 inline void advance()
299 {
300 ttype = tok.get_token(token);
301 }
302
303 inline void eat(token_type want)
304 {
305 if (ttype != want)
306 err("wanted "
307 + tt2str(want)
308 + ", got "
309 + tt2str(ttype)
310 + (token.empty()
311 ? std::string("")
312 : (std::string(" with value ") + token)));
313 advance();
314 }
315
316 inline void str() { eat(basic_io::TOK_STRING); }
317 inline void sym() { eat(basic_io::TOK_SYMBOL); }
318 inline void hex() { eat(basic_io::TOK_HEX); }
319
320 inline void str(std::string & v) { v = token; str(); }
321 inline void sym(std::string & v) { v = token; sym(); }
322 inline void hex(std::string & v) { v = token; hex(); }
323 inline bool symp() { return ttype == basic_io::TOK_SYMBOL; }
324 inline bool symp(symbol const & val)
325 {
326 return ttype == basic_io::TOK_SYMBOL && token == val();
327 }
328 inline void esym(symbol const & val)
329 {
330 if (!(ttype == basic_io::TOK_SYMBOL && token == val()))
331 err("wanted symbol '"
332 + val() +
333 + "', got "
334 + tt2str(ttype)
335 + (token.empty()
336 ? std::string("")
337 : (std::string(" with value ") + token)));
338 advance();
339 }
340 };
341
342}
343
344// Local Variables:
345// mode: C++
346// fill-column: 76
347// c-file-style: "gnu"
348// indent-tabs-mode: nil
349// End:
350// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
351
352#endif // __BASIC_IO_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status