monotone

monotone Mtn Source Tree

Root/simplestring_xform.cc

1#include "simplestring_xform.hh"
2#include "sanity.hh"
3#include "constants.hh"
4
5#include <sstream>
6
7using std::string;
8using std::vector;
9using std::ostringstream;
10using std::ostream_iterator;
11using std::transform;
12
13struct
14lowerize
15{
16 char operator()(char const & c) const
17 {
18 return ::tolower(static_cast<int>(c));
19 }
20};
21
22string
23lowercase(string const & in)
24{
25 string n(in);
26 transform(n.begin(), n.end(), n.begin(), lowerize());
27 return n;
28}
29
30struct
31upperize
32{
33 char operator()(char const & c) const
34 {
35 return ::toupper(static_cast<int>(c));
36 }
37};
38
39string
40uppercase(string const & in)
41{
42 string n(in);
43 transform(n.begin(), n.end(), n.begin(), upperize());
44 return n;
45}
46
47void split_into_lines(string const & in,
48 string const & encoding,
49 vector<string> & out)
50{
51 string lc_encoding = lowercase(encoding);
52 out.clear();
53
54 // note: this function does not handle ISO-2022-X, Shift-JIS, and
55 // probably a good deal of other encodings as well. please expand
56 // the logic here if you can work out an easy way of doing line
57 // breaking on these encodings. currently it's just designed to
58 // work with charsets in which 0x0a / 0x0d are *always* \n and \r
59 // respectively.
60 //
61 // as far as I know, this covers the EUC, ISO-8859-X, GB, Big5, KOI,
62 // ASCII, and UTF-8 families of encodings.
63
64 if (lc_encoding == constants::default_encoding
65 || lc_encoding.find("ascii") != string::npos
66 || lc_encoding.find("8859") != string::npos
67 || lc_encoding.find("euc") != string::npos
68 || lc_encoding.find("koi") != string::npos
69 || lc_encoding.find("gb") != string::npos
70 || lc_encoding == "utf-8"
71 || lc_encoding == "utf_8"
72 || lc_encoding == "utf8")
73 {
74 string::size_type begin = 0;
75 string::size_type end = in.find_first_of("\r\n", begin);
76
77 while (end != string::npos && end >= begin)
78 {
79 out.push_back(in.substr(begin, end-begin));
80 if (in.at(end) == '\r'
81 && in.size() > end+1
82 && in.at(end+1) == '\n')
83 begin = end + 2;
84 else
85 begin = end + 1;
86 if (begin >= in.size())
87 break;
88 end = in.find_first_of("\r\n", begin);
89 }
90 if (begin < in.size())
91 out.push_back(in.substr(begin, in.size() - begin));
92 }
93 else
94 {
95 out.push_back(in);
96 }
97}
98
99
100void
101split_into_lines(string const & in,
102 vector<string> & out)
103{
104 split_into_lines(in, constants::default_encoding, out);
105}
106
107void
108join_lines(vector<string> const & in,
109 string & out,
110 string const & linesep)
111{
112 ostringstream oss;
113 copy(in.begin(), in.end(), ostream_iterator<string>(oss, linesep.c_str()));
114 out = oss.str();
115}
116
117void
118join_lines(vector<string> const & in,
119 string & out)
120{
121 join_lines(in, out, "\n");
122}
123
124void
125prefix_lines_with(string const & prefix, string const & lines, string & out)
126{
127 vector<string> msgs;
128 split_into_lines(lines, msgs);
129
130 ostringstream oss;
131 for (vector<string>::const_iterator i = msgs.begin();
132 i != msgs.end();)
133 {
134 oss << prefix << *i;
135 i++;
136 if (i != msgs.end())
137 oss << '\n';
138 }
139
140 out = oss.str();
141}
142
143void
144append_without_ws(string & appendto, string const & s)
145{
146 unsigned pos = appendto.size();
147 appendto.resize(pos + s.size());
148 for (string::const_iterator i = s.begin();
149 i != s.end(); ++i)
150 {
151 switch (*i)
152 {
153 case '\n':
154 case '\r':
155 case '\t':
156 case ' ':
157 break;
158 default:
159 appendto[pos] = *i;
160 ++pos;
161 break;
162 }
163 }
164 appendto.resize(pos);
165}
166
167string
168remove_ws(string const & s)
169{
170 string tmp;
171 append_without_ws(tmp, s);
172 return tmp;
173}
174
175string
176trim_ws(string const & s)
177{
178 string tmp = s;
179 string::size_type pos = tmp.find_last_not_of("\n\r\t ");
180 if (pos < string::npos)
181 tmp.erase(++pos);
182 pos = tmp.find_first_not_of("\n\r\t ");
183 if (pos < string::npos)
184 tmp = tmp.substr(pos);
185 return tmp;
186}
187
188#ifdef BUILD_UNIT_TESTS
189#include "unit_tests.hh"
190#include <stdlib.h>
191
192UNIT_TEST(simplestring_xform, caseconv)
193{
194 BOOST_CHECK(uppercase("hello") == "HELLO");
195 BOOST_CHECK(uppercase("heLlO") == "HELLO");
196 BOOST_CHECK(lowercase("POODLE DAY") == "poodle day");
197 BOOST_CHECK(lowercase("PooDLe DaY") == "poodle day");
198 BOOST_CHECK(uppercase("!@#$%^&*()") == "!@#$%^&*()");
199 BOOST_CHECK(lowercase("!@#$%^&*()") == "!@#$%^&*()");
200}
201
202UNIT_TEST(simplestring_xform, join_lines)
203{
204 vector<string> strs;
205 string joined;
206
207 strs.clear();
208 join_lines(strs, joined);
209 BOOST_CHECK(joined == "");
210
211 strs.push_back("hi");
212 join_lines(strs, joined);
213 BOOST_CHECK(joined == "hi\n");
214
215 strs.push_back("there");
216 join_lines(strs, joined);
217 BOOST_CHECK(joined == "hi\nthere\n");
218
219 strs.push_back("user");
220 join_lines(strs, joined);
221 BOOST_CHECK(joined == "hi\nthere\nuser\n");
222}
223
224UNIT_TEST(simplestring_xform, strip_ws)
225{
226 BOOST_CHECK(trim_ws("\n leading space") == "leading space");
227 BOOST_CHECK(trim_ws("trailing space \n") == "trailing space");
228 BOOST_CHECK(trim_ws("\t\n both \r \n\r\n") == "both");
229 BOOST_CHECK(remove_ws(" I like going\tfor walks\n ")
230 == "Ilikegoingforwalks");
231}
232
233#endif // BUILD_UNIT_TESTS
234
235// Local Variables:
236// mode: C++
237// fill-column: 76
238// c-file-style: "gnu"
239// indent-tabs-mode: nil
240// End:
241// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status