monotone

monotone Mtn Source Tree

Root/basic_io.hh

1#ifndef __BASIC_IO_HH__
2#define __BASIC_IO_HH__
3
4// Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13
14#include "vector.hh"
15#include <map>
16
17#include "paths.hh"
18#include "sanity.hh"
19#include "vocab.hh"
20#include "numeric_vocab.hh"
21#include "char_classifiers.hh"
22
23// This file provides parsing and printing primitives used by the
24// higher level parser and printer routines for the datatypes cset,
25// roster/marking_map and revision.
26
27namespace basic_io
28{
29
30 namespace
31 {
32 namespace syms
33 {
34 // general format symbol
35 symbol const format_version("format_version");
36
37 // roster symbols
38 symbol const dir("dir");
39 symbol const file("file");
40 symbol const content("content");
41 symbol const attr("attr");
42
43 // 'local' roster and marking symbols
44 // FIXME: should these be listed as "general" symbols here as well?
45 symbol const ident("ident");
46 symbol const birth("birth");
47 symbol const dormant_attr("dormant_attr");
48
49 symbol const path_mark("path_mark");
50 symbol const content_mark("content_mark");
51 symbol const attr_mark("attr_mark");
52 }
53 }
54
55 typedef enum
56 {
57 TOK_SYMBOL,
58 TOK_STRING,
59 TOK_HEX,
60 TOK_NONE
61 } token_type;
62
63 struct
64 input_source
65 {
66 size_t line, col;
67 std::string const & in;
68 std::string::const_iterator curr;
69 std::string name;
70 int lookahead;
71 char c;
72 input_source(std::string const & in, std::string const & nm)
73 : line(1), col(1), in(in), curr(in.begin()),
74name(nm), lookahead(0), c('\0')
75 {}
76
77 inline void peek()
78 {
79 if (LIKELY(curr != in.end()))
80 // we do want to distinguish between EOF and '\xff',
81 // so we translate '\xff' to 255u
82lookahead = widen<unsigned int,char>(*curr);
83 else
84lookahead = EOF;
85 }
86
87 inline void advance()
88 {
89 if (LIKELY(curr != in.end()))
90 {
91 c = *curr;
92 ++curr;
93 ++col;
94 if (c == '\n')
95 {
96 col = 1;
97 ++line;
98 }
99 }
100 peek();
101 }
102 void err(std::string const & s);
103 };
104
105 struct
106 tokenizer
107 {
108 input_source & in;
109 std::string::const_iterator begin;
110 std::string::const_iterator end;
111
112 tokenizer(input_source & i) : in(i), begin(in.curr), end(in.curr)
113 {}
114
115 inline void mark()
116 {
117 begin = in.curr;
118 end = begin;
119 }
120
121 inline void advance()
122 {
123 in.advance();
124 end = in.curr;
125 }
126
127 inline void store(std::string & val)
128 {
129 val.assign(begin, end);
130 }
131
132 inline token_type get_token(std::string & val)
133 {
134 in.peek();
135
136 while (true)
137 {
138 if (UNLIKELY(in.lookahead == EOF))
139 return TOK_NONE;
140 if (!is_space(in.lookahead))
141 break;
142 in.advance();
143 }
144
145 if (is_alpha(in.lookahead))
146{
147 mark();
148 while (is_alnum(in.lookahead) || in.lookahead == '_')
149 advance();
150 store(val);
151 return basic_io::TOK_SYMBOL;
152}
153 else if (in.lookahead == '[')
154{
155 in.advance();
156 mark();
157 while (static_cast<char>(in.lookahead) != ']')
158 {
159 if (UNLIKELY(in.lookahead == EOF))
160in.err("input stream ended in hex string");
161 if (UNLIKELY(!is_xdigit(in.lookahead)))
162 in.err("non-hex character in hex string");
163 advance();
164 }
165
166 store(val);
167
168 if (UNLIKELY(static_cast<char>(in.lookahead) != ']'))
169 in.err("hex string did not end with ']'");
170 in.advance();
171
172 return basic_io::TOK_HEX;
173}
174 else if (in.lookahead == '"')
175{
176 in.advance();
177 mark();
178 while (static_cast<char>(in.lookahead) != '"')
179 {
180 if (UNLIKELY(in.lookahead == EOF))
181in.err("input stream ended in string");
182 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
183{
184 // Possible escape: we understand escaped quotes and
185 // escaped backslashes. Nothing else. If we // happen to
186 // hit an escape, we stop doing the mark/store // thing
187 // and switch to copying and appending per-character
188 // until the // end of the token.
189
190 // So first, store what we have *before* the escape.
191 store(val);
192
193 // Then skip over the escape backslash.
194 in.advance();
195
196 // Make sure it's an escape we recognize.
197 if (UNLIKELY(!(static_cast<char>(in.lookahead) == '"'
198 ||
199 static_cast<char>(in.lookahead) == '\\')))
200 in.err("unrecognized character escape");
201
202 // Add the escaped character onto the accumulating token.
203 in.advance();
204 val += in.c;
205
206 // Now enter special slow loop for remainder.
207 while (static_cast<char>(in.lookahead) != '"')
208 {
209 if (UNLIKELY(in.lookahead == EOF))
210 in.err("input stream ended in string");
211 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
212 {
213 // Skip over any further escape marker.
214 in.advance();
215 if (UNLIKELY
216 (!(static_cast<char>(in.lookahead) == '"'
217 ||
218 static_cast<char>(in.lookahead) == '\\')))
219 in.err("unrecognized character escape");
220 }
221 in.advance();
222 val += in.c;
223 }
224 // When slow loop completes, return early.
225 if (static_cast<char>(in.lookahead) != '"')
226 in.err("string did not end with '\"'");
227 in.advance();
228
229 return basic_io::TOK_STRING;
230}
231 advance();
232 }
233
234 store(val);
235
236 if (UNLIKELY(static_cast<char>(in.lookahead) != '"'))
237 in.err("string did not end with '\"'");
238 in.advance();
239
240 return basic_io::TOK_STRING;
241}
242 else
243return basic_io::TOK_NONE;
244 }
245 void err(std::string const & s);
246 };
247
248 std::string escape(std::string const & s);
249
250 struct
251 stanza
252 {
253 stanza();
254 size_t indent;
255 std::vector<std::pair<symbol, std::string> > entries;
256 void push_hex_pair(symbol const & k, hexenc<id> const & v);
257 void push_binary_pair(symbol const & k, id const & v);
258 void push_binary_triple(symbol const & k, std::string const & n,
259 id const & v);
260 void push_str_pair(symbol const & k, std::string const & v);
261 void push_str_triple(symbol const & k, std::string const & n,
262 std::string const & v);
263 void push_file_pair(symbol const & k, file_path const & v);
264 void push_str_multi(symbol const & k,
265 std::vector<std::string> const & v);
266 };
267
268
269 // Note: printer uses a static buffer; thus only one buffer
270 // may be referenced (globally). An invariant will be triggered
271 // if more than one basic_io::printer is instantiated.
272 struct
273 printer
274 {
275 static std::string buf;
276 static int count;
277 printer();
278 ~printer();
279 void print_stanza(stanza const & st);
280 };
281
282 struct
283 parser
284 {
285 tokenizer & tok;
286 parser(tokenizer & t) : tok(t)
287 {
288 token.reserve(128);
289 advance();
290 }
291
292 std::string token;
293 token_type ttype;
294
295 void err(std::string const & s);
296 std::string tt2str(token_type tt);
297
298 inline void advance()
299 {
300 ttype = tok.get_token(token);
301 }
302
303 inline void eat(token_type want)
304 {
305 if (ttype != want)
306 err("wanted "
307 + tt2str(want)
308 + ", got "
309 + tt2str(ttype)
310 + (token.empty()
311 ? std::string("")
312 : (std::string(" with value ") + token)));
313 advance();
314 }
315
316 inline void str() { eat(basic_io::TOK_STRING); }
317 inline void sym() { eat(basic_io::TOK_SYMBOL); }
318 inline void hex() { eat(basic_io::TOK_HEX); }
319
320 inline void str(std::string & v) { v = token; str(); }
321 inline void sym(std::string & v) { v = token; sym(); }
322 inline void hex(std::string & v) { v = token; hex(); }
323 inline bool symp() { return ttype == basic_io::TOK_SYMBOL; }
324 inline bool symp(symbol const & val)
325 {
326 return ttype == basic_io::TOK_SYMBOL && token == val();
327 }
328 inline void esym(symbol const & val)
329 {
330 if (!(ttype == basic_io::TOK_SYMBOL && token == val()))
331 err("wanted symbol '"
332 + val() +
333 + "', got "
334 + tt2str(ttype)
335 + (token.empty()
336 ? std::string("")
337 : (std::string(" with value ") + token)));
338 advance();
339 }
340 };
341
342}
343
344// Local Variables:
345// mode: C++
346// fill-column: 76
347// c-file-style: "gnu"
348// indent-tabs-mode: nil
349// End:
350// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
351
352#endif // __BASIC_IO_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status