monotone

monotone Mtn Source Tree

Root/src/simplestring_xform.cc

1// Copyright (C) 2006 Timothy Brownawell <tbrownaw@gmail.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include "simplestring_xform.hh"
12#include "sanity.hh"
13#include "constants.hh"
14
15#include <set>
16#include <algorithm>
17#include <sstream>
18#include <iterator>
19
20using std::set;
21using std::string;
22using std::vector;
23using std::ostringstream;
24using std::ostream_iterator;
25using std::transform;
26
27struct
28lowerize
29{
30 char operator()(unsigned char const & c) const
31 {
32 return ::tolower(static_cast<int>(c));
33 }
34};
35
36string
37lowercase(string const & in)
38{
39 string n(in);
40 transform(n.begin(), n.end(), n.begin(), lowerize());
41 return n;
42}
43
44struct
45upperize
46{
47 char operator()(unsigned char const & c) const
48 {
49 return ::toupper(static_cast<int>(c));
50 }
51};
52
53string
54uppercase(string const & in)
55{
56 string n(in);
57 transform(n.begin(), n.end(), n.begin(), upperize());
58 return n;
59}
60
61void split_into_lines(string const & in,
62 vector<string> & out,
63 split_flags::split_flags flags)
64{
65 return split_into_lines(in, constants::default_encoding, out, flags);
66}
67
68void split_into_lines(string const & in,
69 string const & encoding,
70 vector<string> & out,
71 split_flags::split_flags flags)
72{
73 string lc_encoding = lowercase(encoding);
74 out.clear();
75
76 // note: this function does not handle ISO-2022-X, Shift-JIS, and
77 // probably a good deal of other encodings as well. please expand
78 // the logic here if you can work out an easy way of doing line
79 // breaking on these encodings. currently it's just designed to
80 // work with charsets in which 0x0a / 0x0d are *always* \n and \r
81 // respectively.
82 //
83 // as far as I know, this covers the EUC, ISO-8859-X, GB, Big5, KOI,
84 // ASCII, and UTF-8 families of encodings.
85
86 if (lc_encoding == constants::default_encoding
87 || lc_encoding.find("ascii") != string::npos
88 || lc_encoding.find("8859") != string::npos
89 || lc_encoding.find("euc") != string::npos
90 || lc_encoding.find("koi") != string::npos
91 || lc_encoding.find("gb") != string::npos
92 || lc_encoding == "utf-8"
93 || lc_encoding == "utf_8"
94 || lc_encoding == "utf8")
95 {
96 string::size_type begin = 0;
97 string::size_type end = in.find_first_of("\r\n", begin);
98
99 while (end != string::npos && end >= begin)
100 {
101 string::size_type next_begin;
102
103 if (in.at(end) == '\r'
104 && in.size() > end+1
105 && in.at(end+1) == '\n')
106 next_begin = end + 2;
107 else
108 next_begin = end + 1;
109
110 if (flags & split_flags::keep_endings)
111 out.push_back(in.substr(begin, next_begin-begin));
112 else
113 out.push_back(in.substr(begin, end-begin));
114
115 begin = next_begin;
116
117 if (begin >= in.size())
118 break;
119 end = in.find_first_of("\r\n", begin);
120 }
121 if (begin < in.size()) {
122 // special case: last line without trailing newline
123 string s = in.substr(begin, in.size() - begin);
124 if (flags & split_flags::diff_compat) {
125 // special handling: produce diff(1) compatible output
126 s += (in.find_first_of("\r") != string::npos ? "\r\n" : "\n");
127 s += "\\ No newline at end of file";
128 }
129 out.push_back(s);
130 }
131 }
132 else
133 {
134 out.push_back(in);
135 }
136}
137
138
139
140void
141join_lines(vector<string> const & in,
142 string & out,
143 string const & linesep)
144{
145 join_lines(in.begin(), in.end(), out, linesep);
146}
147
148void
149join_lines(vector<string>::const_iterator begin,
150 vector<string>::const_iterator end,
151 string & out,
152 string const & linesep)
153{
154 ostringstream oss;
155 copy(begin, end, ostream_iterator<string>(oss, linesep.c_str()));
156 out = oss.str();
157}
158
159void
160prefix_lines_with(string const & prefix, string const & lines, string & out)
161{
162 vector<string> msgs;
163 split_into_lines(lines, msgs);
164
165 ostringstream oss;
166 for (vector<string>::const_iterator i = msgs.begin();
167 i != msgs.end();)
168 {
169 oss << prefix << *i;
170 i++;
171 if (i != msgs.end())
172 oss << '\n';
173 }
174
175 out = oss.str();
176}
177
178void
179append_without_ws(string & appendto, string const & s)
180{
181 unsigned pos = appendto.size();
182 appendto.resize(pos + s.size());
183 for (string::const_iterator i = s.begin();
184 i != s.end(); ++i)
185 {
186 switch (*i)
187 {
188 case '\n':
189 case '\r':
190 case '\t':
191 case ' ':
192 break;
193 default:
194 appendto[pos] = *i;
195 ++pos;
196 break;
197 }
198 }
199 appendto.resize(pos);
200}
201
202string
203remove_ws(string const & s)
204{
205 string tmp;
206 append_without_ws(tmp, s);
207 return tmp;
208}
209
210string
211trim_left(string const & s, string const & chars)
212{
213 string tmp = s;
214 string::size_type pos = tmp.find_first_not_of(chars);
215 if (pos < string::npos)
216 tmp = tmp.substr(pos);
217
218 // if the first character in the string is still one of the specified
219 // characters then the entire string is made up of these characters
220
221 pos = tmp.find_first_of(chars);
222 if (pos == 0)
223 tmp = "";
224
225 return tmp;
226}
227
228string
229trim_right(string const & s, string const & chars)
230{
231 string tmp = s;
232 string::size_type pos = tmp.find_last_not_of(chars);
233 if (pos < string::npos)
234 tmp.erase(++pos);
235
236 // if the last character in the string is still one of the specified
237 // characters then the entire string is made up of these characters
238
239 pos = tmp.find_last_of(chars);
240 if (pos == tmp.size()-1)
241 tmp = "";
242
243 return tmp;
244}
245
246string
247trim(string const & s, string const & chars)
248{
249 string tmp = s;
250 string::size_type pos = tmp.find_last_not_of(chars);
251 if (pos < string::npos)
252 tmp.erase(++pos);
253 pos = tmp.find_first_not_of(chars);
254 if (pos < string::npos)
255 tmp = tmp.substr(pos);
256
257 // if the first character in the string is still one of the specified
258 // characters then the entire string is made up of these characters
259
260 pos = tmp.find_first_of(chars);
261 if (pos == 0)
262 tmp = "";
263
264 return tmp;
265}
266
267
268// Local Variables:
269// mode: C++
270// fill-column: 76
271// c-file-style: "gnu"
272// indent-tabs-mode: nil
273// End:
274// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status