monotone

monotone Mtn Source Tree

Root/basic_io.hh

1#ifndef __BASIC_IO_HH__
2#define __BASIC_IO_HH__
3
4// Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13
14#include <vector>
15#include <map>
16
17#include "paths.hh"
18#include "sanity.hh"
19#include "vocab.hh"
20#include "numeric_vocab.hh"
21
22// This file provides parsing and printing primitives used by the
23// higher level parser and printer routines for the datatypes cset,
24// roster/marking_map and revision.
25
26namespace basic_io
27{
28
29 namespace
30 {
31 namespace syms
32 {
33 // general format symbol
34 symbol const format_version("format_version");
35
36 // roster symbols
37 symbol const dir("dir");
38 symbol const file("file");
39 symbol const content("content");
40 symbol const attr("attr");
41
42 // 'local' roster and marking symbols
43 // FIXME: should these be listed as "general" symbols here as well?
44 symbol const ident("ident");
45 symbol const birth("birth");
46 symbol const dormant_attr("dormant_attr");
47
48 symbol const path_mark("path_mark");
49 symbol const content_mark("content_mark");
50 symbol const attr_mark("attr_mark");
51 }
52 }
53
54 typedef enum
55 {
56 TOK_SYMBOL,
57 TOK_STRING,
58 TOK_HEX,
59 TOK_NONE
60 } token_type;
61
62 struct
63 input_source
64 {
65 size_t line, col;
66 std::string const & in;
67 std::string::const_iterator curr;
68 std::string name;
69 int lookahead;
70 char c;
71 input_source(std::string const & in, std::string const & nm)
72 : line(1), col(1), in(in), curr(in.begin()),
73name(nm), lookahead(0), c('\0')
74 {}
75
76 inline void peek()
77 {
78 if (LIKELY(curr != in.end()))
79 // we do want to distinguish between EOF and '\xff',
80 // so we translate '\xff' to 255u
81lookahead = widen<unsigned int,char>(*curr);
82 else
83lookahead = EOF;
84 }
85
86 inline void advance()
87 {
88 if (LIKELY(curr != in.end()))
89 {
90 c = *curr;
91 ++curr;
92 ++col;
93 if (c == '\n')
94 {
95 col = 1;
96 ++line;
97 }
98 }
99 peek();
100 }
101 void err(std::string const & s);
102 };
103
104 struct
105 tokenizer
106 {
107 input_source & in;
108 std::string::const_iterator begin;
109 std::string::const_iterator end;
110
111 tokenizer(input_source & i) : in(i), begin(in.curr), end(in.curr)
112 {}
113
114 inline void mark()
115 {
116 begin = in.curr;
117 end = begin;
118 }
119
120 inline void advance()
121 {
122 in.advance();
123 end = in.curr;
124 }
125
126 inline void store(std::string & val)
127 {
128 val.assign(begin, end);
129 }
130
131 inline token_type get_token(std::string & val)
132 {
133 in.peek();
134
135 while (true)
136 {
137 if (UNLIKELY(in.lookahead == EOF))
138 return TOK_NONE;
139 if (!is_space(in.lookahead))
140 break;
141 in.advance();
142 }
143
144 if (is_alpha(in.lookahead))
145{
146 mark();
147 while (is_alnum(in.lookahead) || in.lookahead == '_')
148 advance();
149 store(val);
150 return basic_io::TOK_SYMBOL;
151}
152 else if (in.lookahead == '[')
153{
154 in.advance();
155 mark();
156 while (static_cast<char>(in.lookahead) != ']')
157 {
158 if (UNLIKELY(in.lookahead == EOF))
159in.err("input stream ended in hex string");
160 if (UNLIKELY(!is_xdigit(in.lookahead)))
161 in.err("non-hex character in hex string");
162 advance();
163 }
164
165 store(val);
166
167 if (UNLIKELY(static_cast<char>(in.lookahead) != ']'))
168 in.err("hex string did not end with ']'");
169 in.advance();
170
171 return basic_io::TOK_HEX;
172}
173 else if (in.lookahead == '"')
174{
175 in.advance();
176 mark();
177 while (static_cast<char>(in.lookahead) != '"')
178 {
179 if (UNLIKELY(in.lookahead == EOF))
180in.err("input stream ended in string");
181 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
182{
183 // Possible escape: we understand escaped quotes and
184 // escaped backslashes. Nothing else. If we // happen to
185 // hit an escape, we stop doing the mark/store // thing
186 // and switch to copying and appending per-character
187 // until the // end of the token.
188
189 // So first, store what we have *before* the escape.
190 store(val);
191
192 // Then skip over the escape backslash.
193 in.advance();
194
195 // Make sure it's an escape we recognize.
196 if (UNLIKELY(!(static_cast<char>(in.lookahead) == '"'
197 ||
198 static_cast<char>(in.lookahead) == '\\')))
199 in.err("unrecognized character escape");
200
201 // Add the escaped character onto the accumulating token.
202 in.advance();
203 val += in.c;
204
205 // Now enter special slow loop for remainder.
206 while (static_cast<char>(in.lookahead) != '"')
207 {
208 if (UNLIKELY(in.lookahead == EOF))
209 in.err("input stream ended in string");
210 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
211 {
212 // Skip over any further escape marker.
213 in.advance();
214 if (UNLIKELY
215 (!(static_cast<char>(in.lookahead) == '"'
216 ||
217 static_cast<char>(in.lookahead) == '\\')))
218 in.err("unrecognized character escape");
219 }
220 in.advance();
221 val += in.c;
222 }
223 // When slow loop completes, return early.
224 if (static_cast<char>(in.lookahead) != '"')
225 in.err("string did not end with '\"'");
226 in.advance();
227
228 return basic_io::TOK_STRING;
229}
230 advance();
231 }
232
233 store(val);
234
235 if (UNLIKELY(static_cast<char>(in.lookahead) != '"'))
236 in.err("string did not end with '\"'");
237 in.advance();
238
239 return basic_io::TOK_STRING;
240}
241 else
242return basic_io::TOK_NONE;
243 }
244 void err(std::string const & s);
245 };
246
247 std::string escape(std::string const & s);
248
249 struct
250 stanza
251 {
252 stanza();
253 size_t indent;
254 std::vector<std::pair<symbol, std::string> > entries;
255 void push_hex_pair(symbol const & k, hexenc<id> const & v);
256 void push_hex_triple(symbol const & k, std::string const & n,
257 hexenc<id> const & v);
258 void push_str_pair(symbol const & k, std::string const & v);
259 void push_str_triple(symbol const & k, std::string const & n,
260 std::string const & v);
261 void push_file_pair(symbol const & k, file_path const & v);
262 void push_str_multi(symbol const & k,
263 std::vector<std::string> const & v);
264 };
265
266
267 // Note: printer uses a static buffer; thus only one buffer
268 // may be referenced (globally). An invariant will be triggered
269 // if more than one basic_io::printer is instantiated.
270 struct
271 printer
272 {
273 static std::string buf;
274 static int count;
275 printer();
276 ~printer();
277 void print_stanza(stanza const & st);
278 };
279
280 struct
281 parser
282 {
283 tokenizer & tok;
284 parser(tokenizer & t) : tok(t)
285 {
286 token.reserve(128);
287 advance();
288 }
289
290 std::string token;
291 token_type ttype;
292
293 void err(std::string const & s);
294 std::string tt2str(token_type tt);
295
296 inline void advance()
297 {
298 ttype = tok.get_token(token);
299 }
300
301 inline void eat(token_type want)
302 {
303 if (ttype != want)
304 err("wanted "
305 + tt2str(want)
306 + ", got "
307 + tt2str(ttype)
308 + (token.empty()
309 ? std::string("")
310 : (std::string(" with value ") + token)));
311 advance();
312 }
313
314 inline void str() { eat(basic_io::TOK_STRING); }
315 inline void sym() { eat(basic_io::TOK_SYMBOL); }
316 inline void hex() { eat(basic_io::TOK_HEX); }
317
318 inline void str(std::string & v) { v = token; str(); }
319 inline void sym(std::string & v) { v = token; sym(); }
320 inline void hex(std::string & v) { v = token; hex(); }
321 inline bool symp() { return ttype == basic_io::TOK_SYMBOL; }
322 inline bool symp(symbol const & val)
323 {
324 return ttype == basic_io::TOK_SYMBOL && token == val();
325 }
326 inline void esym(symbol const & val)
327 {
328 if (!(ttype == basic_io::TOK_SYMBOL && token == val()))
329 err("wanted symbol '"
330 + val() +
331 + "', got "
332 + tt2str(ttype)
333 + (token.empty()
334 ? std::string("")
335 : (std::string(" with value ") + token)));
336 advance();
337 }
338 };
339
340}
341
342// Local Variables:
343// mode: C++
344// fill-column: 76
345// c-file-style: "gnu"
346// indent-tabs-mode: nil
347// End:
348// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
349
350#endif // __BASIC_IO_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status