monotone

monotone Mtn Source Tree

Root/basic_io.hh

1#ifndef __BASIC_IO_HH__
2#define __BASIC_IO_HH__
3
4// Copyright (C) 2004 Graydon Hoare <graydon@pobox.com>
5//
6// This program is made available under the GNU GPL version 2.0 or
7// greater. See the accompanying file COPYING for details.
8//
9// This program is distributed WITHOUT ANY WARRANTY; without even the
10// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
11// PURPOSE.
12
13
14#include <cstdio>
15#include <iosfwd>
16#include <string>
17#include <vector>
18#include <map>
19
20#include "paths.hh"
21#include "sanity.hh"
22#include "vocab.hh"
23
24// This file provides parsing and printing primitives used by the
25// higher level parser and printer routines for the datatypes cset,
26// roster/marking_map and revision.
27
28namespace basic_io
29{
30
31 typedef enum
32 {
33 TOK_SYMBOL,
34 TOK_STRING,
35 TOK_HEX,
36 TOK_NONE
37 } token_type;
38
39 struct
40 input_source
41 {
42 size_t line, col;
43 std::string const & in;
44 std::string::const_iterator curr;
45 std::string name;
46 int lookahead;
47 char c;
48 input_source(std::string const & in, std::string const & nm)
49 : line(1), col(1), in(in), curr(in.begin()),
50name(nm), lookahead(0), c('\0')
51 {}
52
53 inline void peek()
54 {
55 if (LIKELY(curr != in.end()))
56lookahead = *curr;
57 else
58lookahead = EOF;
59 }
60
61 inline void advance()
62 {
63 if (LIKELY(curr != in.end()))
64 {
65 c = *curr;
66 ++curr;
67 ++col;
68 if (c == '\n')
69 {
70 col = 1;
71 ++line;
72 }
73 }
74 peek();
75 }
76 void err(std::string const & s);
77 };
78
79 struct
80 tokenizer
81 {
82 input_source & in;
83 std::string::const_iterator begin;
84 std::string::const_iterator end;
85
86 tokenizer(input_source & i) : in(i), begin(in.curr), end(in.curr)
87 {}
88
89 inline void mark()
90 {
91 begin = in.curr;
92 end = begin;
93 }
94
95 inline void advance()
96 {
97 in.advance();
98 end = in.curr;
99 }
100
101 inline void store(std::string & val)
102 {
103 val.assign(begin, end);
104 }
105
106 inline token_type get_token(std::string & val)
107 {
108 in.peek();
109
110 while (true)
111 {
112 if (UNLIKELY(in.lookahead == EOF))
113 return TOK_NONE;
114 if (!is_space(in.lookahead))
115 break;
116 in.advance();
117 }
118
119 if (is_alpha(in.lookahead))
120{
121 mark();
122 while (is_alnum(in.lookahead) || in.lookahead == '_')
123 advance();
124 store(val);
125 return basic_io::TOK_SYMBOL;
126}
127 else if (in.lookahead == '[')
128{
129 in.advance();
130 mark();
131 while (static_cast<char>(in.lookahead) != ']')
132 {
133 if (UNLIKELY(in.lookahead == EOF))
134in.err("input stream ended in hex string");
135 if (UNLIKELY(!is_xdigit(in.lookahead)))
136 in.err("non-hex character in hex string");
137 advance();
138 }
139
140 store(val);
141
142 if (UNLIKELY(static_cast<char>(in.lookahead) != ']'))
143 in.err("hex string did not end with ']'");
144 in.advance();
145
146 return basic_io::TOK_HEX;
147}
148 else if (in.lookahead == '"')
149{
150 in.advance();
151 mark();
152 while (static_cast<char>(in.lookahead) != '"')
153 {
154 if (UNLIKELY(in.lookahead == EOF))
155in.err("input stream ended in string");
156 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
157{
158 // Possible escape: we understand escaped quotes and
159 // escaped backslashes. Nothing else. If we // happen to
160 // hit an escape, we stop doing the mark/store // thing
161 // and switch to copying and appending per-character
162 // until the // end of the token.
163
164 // So first, store what we have *before* the escape.
165 store(val);
166
167 // Then skip over the escape backslash.
168 in.advance();
169
170 // Make sure it's an escape we recognize.
171 if (UNLIKELY(!(static_cast<char>(in.lookahead) == '"'
172 ||
173 static_cast<char>(in.lookahead) == '\\')))
174 in.err("unrecognized character escape");
175
176 // Add the escaped character onto the accumulating token.
177 in.advance();
178 val += in.c;
179
180 // Now enter special slow loop for remainder.
181 while (static_cast<char>(in.lookahead) != '"')
182 {
183 if (UNLIKELY(in.lookahead == EOF))
184 in.err("input stream ended in string");
185 if (UNLIKELY(static_cast<char>(in.lookahead) == '\\'))
186 {
187 // Skip over any further escape marker.
188 in.advance();
189 if (UNLIKELY
190 (!(static_cast<char>(in.lookahead) == '"'
191 ||
192 static_cast<char>(in.lookahead) == '\\')))
193 in.err("unrecognized character escape");
194 }
195 in.advance();
196 val += in.c;
197 }
198 // When slow loop completes, return early.
199 if (static_cast<char>(in.lookahead) != '"')
200 in.err("string did not end with '\"'");
201 in.advance();
202
203 return basic_io::TOK_STRING;
204}
205 advance();
206 }
207
208 store(val);
209
210 if (UNLIKELY(static_cast<char>(in.lookahead) != '"'))
211 in.err("string did not end with '\"'");
212 in.advance();
213
214 return basic_io::TOK_STRING;
215}
216 else
217return basic_io::TOK_NONE;
218 }
219 void err(std::string const & s);
220 };
221
222 std::string escape(std::string const & s);
223
224 struct
225 stanza
226 {
227 stanza();
228 size_t indent;
229 std::vector<std::pair<symbol, std::string> > entries;
230 void push_hex_pair(symbol const & k, hexenc<id> const & v);
231 void push_hex_triple(symbol const & k, std::string const & n,
232 hexenc<id> const & v);
233 void push_str_pair(symbol const & k, std::string const & v);
234 void push_str_triple(symbol const & k, std::string const & n,
235 std::string const & v);
236 void push_file_pair(symbol const & k, file_path const & v);
237 void push_str_multi(symbol const & k,
238 std::vector<std::string> const & v);
239 };
240
241 struct
242 printer
243 {
244 static std::string buf;
245 printer();
246 void print_stanza(stanza const & st);
247 };
248
249 struct
250 parser
251 {
252 tokenizer & tok;
253 parser(tokenizer & t) : tok(t)
254 {
255 token.reserve(128);
256 advance();
257 }
258
259 std::string token;
260 token_type ttype;
261
262 void err(std::string const & s);
263 std::string tt2str(token_type tt);
264
265 inline void advance()
266 {
267 ttype = tok.get_token(token);
268 }
269
270 inline void eat(token_type want)
271 {
272 if (ttype != want)
273 err("wanted "
274 + tt2str(want)
275 + ", got "
276 + tt2str(ttype)
277 + (token.empty()
278 ? std::string("")
279 : (std::string(" with value ") + token)));
280 advance();
281 }
282
283 inline void str() { eat(basic_io::TOK_STRING); }
284 inline void sym() { eat(basic_io::TOK_SYMBOL); }
285 inline void hex() { eat(basic_io::TOK_HEX); }
286
287 inline void str(std::string & v) { v = token; str(); }
288 inline void sym(std::string & v) { v = token; sym(); }
289 inline void hex(std::string & v) { v = token; hex(); }
290 inline bool symp() { return ttype == basic_io::TOK_SYMBOL; }
291 inline bool symp(symbol const & val)
292 {
293 return ttype == basic_io::TOK_SYMBOL && token == val();
294 }
295 inline void esym(symbol const & val)
296 {
297 if (!(ttype == basic_io::TOK_SYMBOL && token == val()))
298 err("wanted symbol '"
299 + val() +
300 + "', got "
301 + tt2str(ttype)
302 + (token.empty()
303 ? std::string("")
304 : (std::string(" with value ") + token)));
305 advance();
306 }
307 };
308
309}
310
311// Local Variables:
312// mode: C++
313// fill-column: 76
314// c-file-style: "gnu"
315// indent-tabs-mode: nil
316// End:
317// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:
318
319#endif // __BASIC_IO_HH__

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status