monotone

monotone Mtn Source Tree

Root/simplestring_xform.cc

1#include "base.hh"
2#include "simplestring_xform.hh"
3#include "sanity.hh"
4#include "constants.hh"
5
6#include <set>
7#include <algorithm>
8#include <sstream>
9#include <iterator>
10
11using std::set;
12using std::string;
13using std::vector;
14using std::ostringstream;
15using std::ostream_iterator;
16using std::transform;
17
18struct
19lowerize
20{
21 char operator()(char const & c) const
22 {
23 return ::tolower(static_cast<int>(c));
24 }
25};
26
27string
28lowercase(string const & in)
29{
30 string n(in);
31 transform(n.begin(), n.end(), n.begin(), lowerize());
32 return n;
33}
34
35struct
36upperize
37{
38 char operator()(char const & c) const
39 {
40 return ::toupper(static_cast<int>(c));
41 }
42};
43
44string
45uppercase(string const & in)
46{
47 string n(in);
48 transform(n.begin(), n.end(), n.begin(), upperize());
49 return n;
50}
51
52void split_into_lines(string const & in,
53 vector<string> & out,
54 bool diff_compat)
55{
56 return split_into_lines(in, constants::default_encoding, out, diff_compat);
57}
58
59void split_into_lines(string const & in,
60 string const & encoding,
61 vector<string> & out)
62{
63 return split_into_lines(in, encoding, out, false);
64}
65
66void split_into_lines(string const & in,
67 string const & encoding,
68 vector<string> & out,
69 bool diff_compat)
70{
71 string lc_encoding = lowercase(encoding);
72 out.clear();
73
74 // note: this function does not handle ISO-2022-X, Shift-JIS, and
75 // probably a good deal of other encodings as well. please expand
76 // the logic here if you can work out an easy way of doing line
77 // breaking on these encodings. currently it's just designed to
78 // work with charsets in which 0x0a / 0x0d are *always* \n and \r
79 // respectively.
80 //
81 // as far as I know, this covers the EUC, ISO-8859-X, GB, Big5, KOI,
82 // ASCII, and UTF-8 families of encodings.
83
84 if (lc_encoding == constants::default_encoding
85 || lc_encoding.find("ascii") != string::npos
86 || lc_encoding.find("8859") != string::npos
87 || lc_encoding.find("euc") != string::npos
88 || lc_encoding.find("koi") != string::npos
89 || lc_encoding.find("gb") != string::npos
90 || lc_encoding == "utf-8"
91 || lc_encoding == "utf_8"
92 || lc_encoding == "utf8")
93 {
94 string::size_type begin = 0;
95 string::size_type end = in.find_first_of("\r\n", begin);
96
97 while (end != string::npos && end >= begin)
98 {
99 out.push_back(in.substr(begin, end-begin));
100 if (in.at(end) == '\r'
101 && in.size() > end+1
102 && in.at(end+1) == '\n')
103 begin = end + 2;
104 else
105 begin = end + 1;
106 if (begin >= in.size())
107 break;
108 end = in.find_first_of("\r\n", begin);
109 }
110 if (begin < in.size()) {
111 // special case: last line without trailing newline
112 string s = in.substr(begin, in.size() - begin);
113 if (diff_compat) {
114 // special handling: produce diff(1) compatible output
115 s += (in.find_first_of("\r") != string::npos ? "\r\n" : "\n");
116 s += "\\ No newline at end of file";
117 }
118 out.push_back(s);
119 }
120 }
121 else
122 {
123 out.push_back(in);
124 }
125}
126
127
128void
129split_into_lines(string const & in,
130 vector<string> & out)
131{
132 split_into_lines(in, constants::default_encoding, out);
133}
134
135void
136join_lines(vector<string> const & in,
137 string & out,
138 string const & linesep)
139{
140 ostringstream oss;
141 copy(in.begin(), in.end(), ostream_iterator<string>(oss, linesep.c_str()));
142 out = oss.str();
143}
144
145void
146join_lines(vector<string> const & in,
147 string & out)
148{
149 join_lines(in, out, "\n");
150}
151
152void
153prefix_lines_with(string const & prefix, string const & lines, string & out)
154{
155 vector<string> msgs;
156 split_into_lines(lines, msgs);
157
158 ostringstream oss;
159 for (vector<string>::const_iterator i = msgs.begin();
160 i != msgs.end();)
161 {
162 oss << prefix << *i;
163 i++;
164 if (i != msgs.end())
165 oss << '\n';
166 }
167
168 out = oss.str();
169}
170
171void
172append_without_ws(string & appendto, string const & s)
173{
174 unsigned pos = appendto.size();
175 appendto.resize(pos + s.size());
176 for (string::const_iterator i = s.begin();
177 i != s.end(); ++i)
178 {
179 switch (*i)
180 {
181 case '\n':
182 case '\r':
183 case '\t':
184 case ' ':
185 break;
186 default:
187 appendto[pos] = *i;
188 ++pos;
189 break;
190 }
191 }
192 appendto.resize(pos);
193}
194
195string
196remove_ws(string const & s)
197{
198 string tmp;
199 append_without_ws(tmp, s);
200 return tmp;
201}
202
203string
204trim_ws(string const & s)
205{
206 string tmp = s;
207 string::size_type pos = tmp.find_last_not_of("\n\r\t ");
208 if (pos < string::npos)
209 tmp.erase(++pos);
210 pos = tmp.find_first_not_of("\n\r\t ");
211 if (pos < string::npos)
212 tmp = tmp.substr(pos);
213 return tmp;
214}
215
216#ifdef BUILD_UNIT_TESTS
217#include "unit_tests.hh"
218#include "vocab.hh"
219
220UNIT_TEST(simplestring_xform, caseconv)
221{
222 UNIT_TEST_CHECK(uppercase("hello") == "HELLO");
223 UNIT_TEST_CHECK(uppercase("heLlO") == "HELLO");
224 UNIT_TEST_CHECK(lowercase("POODLE DAY") == "poodle day");
225 UNIT_TEST_CHECK(lowercase("PooDLe DaY") == "poodle day");
226 UNIT_TEST_CHECK(uppercase("!@#$%^&*()") == "!@#$%^&*()");
227 UNIT_TEST_CHECK(lowercase("!@#$%^&*()") == "!@#$%^&*()");
228}
229
230UNIT_TEST(simplestring_xform, join_lines)
231{
232 vector<string> strs;
233 string joined;
234
235 strs.clear();
236 join_lines(strs, joined);
237 UNIT_TEST_CHECK(joined == "");
238
239 strs.push_back("hi");
240 join_lines(strs, joined);
241 UNIT_TEST_CHECK(joined == "hi\n");
242
243 strs.push_back("there");
244 join_lines(strs, joined);
245 UNIT_TEST_CHECK(joined == "hi\nthere\n");
246
247 strs.push_back("user");
248 join_lines(strs, joined);
249 UNIT_TEST_CHECK(joined == "hi\nthere\nuser\n");
250}
251
252UNIT_TEST(simplestring_xform, join_words)
253{
254 vector< utf8 > v;
255 set< utf8 > s;
256
257 v.clear();
258 UNIT_TEST_CHECK(join_words(v)() == "");
259
260 v.clear();
261 v.push_back(utf8("a"));
262 UNIT_TEST_CHECK(join_words(v)() == "a");
263 UNIT_TEST_CHECK(join_words(v, ", ")() == "a");
264
265 s.clear();
266 s.insert(utf8("a"));
267 UNIT_TEST_CHECK(join_words(s)() == "a");
268 UNIT_TEST_CHECK(join_words(s, ", ")() == "a");
269
270 v.clear();
271 v.push_back(utf8("a"));
272 v.push_back(utf8("b"));
273 UNIT_TEST_CHECK(join_words(v)() == "a b");
274 UNIT_TEST_CHECK(join_words(v, ", ")() == "a, b");
275
276 s.clear();
277 s.insert(utf8("b"));
278 s.insert(utf8("a"));
279 UNIT_TEST_CHECK(join_words(s)() == "a b");
280 UNIT_TEST_CHECK(join_words(s, ", ")() == "a, b");
281
282 v.clear();
283 v.push_back(utf8("a"));
284 v.push_back(utf8("b"));
285 v.push_back(utf8("c"));
286 UNIT_TEST_CHECK(join_words(v)() == "a b c");
287 UNIT_TEST_CHECK(join_words(v, ", ")() == "a, b, c");
288
289 s.clear();
290 s.insert(utf8("b"));
291 s.insert(utf8("a"));
292 s.insert(utf8("c"));
293 UNIT_TEST_CHECK(join_words(s)() == "a b c");
294 UNIT_TEST_CHECK(join_words(s, ", ")() == "a, b, c");
295}
296
297UNIT_TEST(simplestring_xform, split_into_words)
298{
299 vector< utf8 > words;
300
301 words = split_into_words(utf8(""));
302 UNIT_TEST_CHECK(words.empty());
303
304 words = split_into_words(utf8("foo"));
305 UNIT_TEST_CHECK(words.size() == 1);
306 UNIT_TEST_CHECK(words[0]() == "foo");
307
308 words = split_into_words(utf8("foo bar"));
309 UNIT_TEST_CHECK(words.size() == 2);
310 UNIT_TEST_CHECK(words[0]() == "foo");
311 UNIT_TEST_CHECK(words[1]() == "bar");
312
313 // describe() in commands.cc assumes this behavior. If it ever changes,
314 // remember to modify that function accordingly!
315 words = split_into_words(utf8("foo bar"));
316 UNIT_TEST_CHECK(words.size() == 3);
317 UNIT_TEST_CHECK(words[0]() == "foo");
318 UNIT_TEST_CHECK(words[1]() == "");
319 UNIT_TEST_CHECK(words[2]() == "bar");
320}
321
322UNIT_TEST(simplestring_xform, strip_ws)
323{
324 UNIT_TEST_CHECK(trim_ws("\n leading space") == "leading space");
325 UNIT_TEST_CHECK(trim_ws("trailing space \n") == "trailing space");
326 UNIT_TEST_CHECK(trim_ws("\t\n both \r \n\r\n") == "both");
327 UNIT_TEST_CHECK(remove_ws(" I like going\tfor walks\n ")
328 == "Ilikegoingforwalks");
329}
330
331#endif // BUILD_UNIT_TESTS
332
333// Local Variables:
334// mode: C++
335// fill-column: 76
336// c-file-style: "gnu"
337// indent-tabs-mode: nil
338// End:
339// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status