monotone

monotone Mtn Source Tree

Root/src/file_io.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <iostream>
12#include <fstream>
13
14#include <botan/botan.h>
15#include "botan_pipe_cache.hh"
16
17#include "file_io.hh"
18#include "sanity.hh"
19#include "simplestring_xform.hh"
20#include "charset.hh"
21#include "platform-wrapped.hh"
22#include "numeric_vocab.hh"
23#include "vocab_cast.hh"
24
25// this file deals with talking to the filesystem, loading and
26// saving files.
27
28using std::cin;
29using std::ifstream;
30using std::ios_base;
31using std::logic_error;
32using std::string;
33using std::vector;
34
35void
36assert_path_is_nonexistent(any_path const & path)
37{
38 I(get_path_status(path) == path::nonexistent);
39}
40
41void
42assert_path_is_file(any_path const & path)
43{
44 I(get_path_status(path) == path::file);
45}
46
47void
48assert_path_is_directory(any_path const & path)
49{
50 I(get_path_status(path) == path::directory);
51}
52
53void
54require_path_is_nonexistent(any_path const & path,
55 i18n_format const & message)
56{
57 E(!path_exists(path), origin::user, message);
58}
59
60void
61require_path_is_file(any_path const & path,
62 i18n_format const & message_if_nonexistent,
63 i18n_format const & message_if_directory)
64{
65 switch (get_path_status(path))
66 {
67 case path::nonexistent:
68 E(false, origin::user, message_if_nonexistent);
69 break;
70 case path::file:
71 return;
72 case path::directory:
73 E(false, origin::user, message_if_directory);
74 break;
75 }
76}
77
78void
79require_path_is_directory(any_path const & path,
80 i18n_format const & message_if_nonexistent,
81 i18n_format const & message_if_file)
82{
83 switch (get_path_status(path))
84 {
85 case path::nonexistent:
86 E(false, origin::user, message_if_nonexistent);
87 break;
88 case path::file:
89 E(false, origin::user, message_if_file);
90 case path::directory:
91 return;
92 break;
93 }
94}
95
96bool
97path_exists(any_path const & p)
98{
99 return get_path_status(p) != path::nonexistent;
100}
101
102bool
103directory_exists(any_path const & p)
104{
105 return get_path_status(p) == path::directory;
106}
107
108bool
109file_exists(any_path const & p)
110{
111 return get_path_status(p) == path::file;
112}
113
114bool
115directory_empty(any_path const & path)
116{
117 struct directory_not_empty_exception {};
118 struct directory_empty_helper : public dirent_consumer
119 {
120 virtual void consume(char const *)
121 { throw directory_not_empty_exception(); }
122 };
123
124 directory_empty_helper h;
125 try {
126 read_directory(path, h, h, h);
127 } catch (directory_not_empty_exception) {
128 return false;
129 }
130 return true;
131}
132
133// This is not the greatest heuristic ever; it just looks for characters in
134// the original ASCII control code range (00 - 1f, 7f) that are not white
135// space (07 - 0D). But then, GNU diff just looks for NULs. We could do
136// better if this was detecting character encoding (because then we could
137// report wide encodings as such instead of treating them as binary) but the
138// application proper isn't set up for that.
139//
140// Everything > 128 *can* be a valid text character, depending on encoding,
141// even in the 80 - 9F region that Unicode reserves for yet more useless
142// control characters.
143//
144// N.B. the obvious algorithmic version of the inner loop here
145// u8 c = s[i];
146// if (c <= 0x06 || (c >= 0x0E && c < 0x20) || c == 0x7F)
147// return true;
148// is about twice as slow on current hardware (Intel Core2 quad).
149
150bool guess_binary(string const & s)
151{
152 static const bool char_is_binary[256] = {
153 //_0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _A _B _C _D _E _F
154 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, // 0_
155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1_
156 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2_
157 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3_
158 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4_
159 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5_
160 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6_
161 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, // 7_
162 0 // 8+
163 };
164
165 for (size_t i = 0; i < s.size(); ++i)
166 {
167 if (char_is_binary[ static_cast<u8>(s[i]) ])
168 return true;
169 }
170 return false;
171}
172
173void
174mkdir_p(any_path const & p)
175{
176 switch (get_path_status(p))
177 {
178 case path::directory:
179 return;
180 case path::file:
181 E(false, origin::system,
182 F("could not create directory '%s': it is a file") % p);
183 case path::nonexistent:
184 std::string const current = p.as_external();
185 any_path const parent = p.dirname();
186 if (current != parent.as_external())
187 {
188 mkdir_p(parent);
189 }
190 do_mkdir(current);
191 }
192}
193
194void
195make_dir_for(any_path const & p)
196{
197 mkdir_p(p.dirname());
198}
199
200void
201delete_file(any_path const & p)
202{
203 require_path_is_file(p,
204 F("file to delete '%s' does not exist") % p,
205 F("file to delete, '%s', is not a file but a directory") % p);
206 do_remove(p.as_external());
207}
208
209void
210delete_dir_shallow(any_path const & p)
211{
212 require_path_is_directory(p,
213 F("directory to delete '%s' does not exist") % p,
214 F("directory to delete, '%s', is not a directory but a file") % p);
215 do_remove(p.as_external());
216}
217
218void
219delete_file_or_dir_shallow(any_path const & p)
220{
221 E(path_exists(p), origin::user,
222 F("object to delete, '%s', does not exist") % p);
223 do_remove(p.as_external());
224}
225
226void
227delete_dir_recursive(any_path const & p)
228{
229 require_path_is_directory(p,
230 F("directory to delete, '%s', does not exist") % p,
231 F("directory to delete, '%s', is a file") % p);
232
233 do_remove_recursive(p.as_external());
234}
235
236void
237move_file(any_path const & old_path,
238 any_path const & new_path)
239{
240 require_path_is_file(old_path,
241 F("rename source file '%s' does not exist") % old_path,
242 F("rename source file '%s' is a directory "
243 "-- bug in monotone?") % old_path);
244 require_path_is_nonexistent(new_path,
245 F("rename target '%s' already exists")
246 % new_path);
247 rename_clobberingly(old_path.as_external(), new_path.as_external());
248}
249
250void
251move_dir(any_path const & old_path,
252 any_path const & new_path)
253{
254 require_path_is_directory(old_path,
255 F("rename source dir '%s' does not exist")
256 % old_path,
257 F("rename source dir '%s' is a file "
258 "-- bug in monotone?") % old_path);
259 require_path_is_nonexistent(new_path,
260 F("rename target '%s' already exists")
261 % new_path);
262 rename_clobberingly(old_path.as_external(), new_path.as_external());
263}
264
265void
266move_path(any_path const & old_path,
267 any_path const & new_path)
268{
269 E(path_exists(old_path), origin::user,
270 F("rename source path '%s' does not exist") % old_path);
271 require_path_is_nonexistent(new_path,
272 F("rename target '%s' already exists")
273 % new_path);
274 rename_clobberingly(old_path.as_external(), new_path.as_external());
275}
276
277void
278read_data(any_path const & p, data & dat)
279{
280 require_path_is_file(p,
281 F("file '%s' does not exist") % p,
282 F("file '%s' cannot be read as data; it is a directory") % p);
283
284 ifstream file(p.as_external().c_str(),
285 ios_base::in | ios_base::binary);
286 E(file, origin::user, F("cannot open file '%s' for reading") % p);
287 unfiltered_pipe->start_msg();
288 file >> *unfiltered_pipe;
289 unfiltered_pipe->end_msg();
290 origin::type data_from = p.made_from;
291 if (data_from != origin::internal || data_from == origin::database)
292 data_from = origin::system;
293 dat = data(unfiltered_pipe->read_all_as_string(Botan::Pipe::LAST_MESSAGE),
294 data_from);
295}
296
297// This function can only be called once per run.
298void
299read_data_stdin(data & dat)
300{
301 static bool have_consumed_stdin = false;
302 E(!have_consumed_stdin, origin::user,
303 F("cannot read standard input multiple times"));
304 have_consumed_stdin = true;
305 unfiltered_pipe->start_msg();
306 cin >> *unfiltered_pipe;
307 unfiltered_pipe->end_msg();
308 dat = data(unfiltered_pipe->read_all_as_string(Botan::Pipe::LAST_MESSAGE),
309 origin::user);
310}
311
312void
313read_data_for_command_line(utf8 const & path, data & dat)
314{
315 if (path() == "-")
316 read_data_stdin(dat);
317 else
318 read_data(system_path(path), dat);
319}
320
321
322// FIXME: this is probably not enough brains to actually manage "atomic
323// filesystem writes". at some point you have to draw the line with even
324// trying, and I'm not sure it's really a strict requirement of this tool,
325// but you might want to make this code a bit tighter.
326
327static void
328write_data_impl(any_path const & p,
329 data const & dat,
330 any_path const & tmp,
331 bool user_private)
332{
333 E(!directory_exists(p), origin::user,
334 F("file '%s' cannot be overwritten as data; it is a directory") % p);
335
336 make_dir_for(p);
337
338 write_data_worker(p.as_external(), dat(), tmp.as_external(), user_private);
339}
340
341void
342write_data(file_path const & path, data const & dat)
343{
344 // use the bookkeeping root as the temporary directory.
345 assert_path_is_directory(bookkeeping_root);
346 write_data_impl(path, dat, bookkeeping_root, false);
347}
348
349void
350write_data(bookkeeping_path const & path, data const & dat)
351{
352 // use the bookkeeping root as the temporary directory.
353 assert_path_is_directory(bookkeeping_root);
354 write_data_impl(path, dat, bookkeeping_root, false);
355}
356
357void
358write_data(system_path const & path,
359 data const & data,
360 system_path const & tmpdir)
361{
362 write_data_impl(path, data, tmpdir, false);
363}
364
365void
366write_data_userprivate(system_path const & path,
367 data const & data,
368 system_path const & tmpdir)
369{
370 write_data_impl(path, data, tmpdir, true);
371}
372
373// recursive directory walking
374
375tree_walker::~tree_walker() {}
376
377bool
378tree_walker::visit_dir(file_path const & path)
379{
380 return true;
381}
382
383static void
384walk_tree_recursive(file_path const & path,
385 tree_walker & walker)
386{
387 // Read the directory up front, so that the directory handle is released
388 // before we recurse. This is important, because it can allocate rather a
389 // bit of memory (especially on ReiserFS, see [1]; opendir uses the
390 // filesystem's blocksize as a clue how much memory to allocate). We used
391 // to recurse into subdirectories on the fly; this left the memory
392 // describing _this_ directory pinned on the heap. Then our recursive
393 // call itself made another recursive call, etc., causing a huge spike in
394 // peak memory. By splitting the loop in half, we avoid this problem.
395 //
396 // [1] http://lkml.org/lkml/2006/2/24/215
397 vector<file_path> files, dirs;
398 fill_path_vec<file_path> fill_files(path, files, false);
399 fill_path_vec<file_path> fill_dirs(path, dirs, true);
400
401 read_directory(path, fill_files, fill_dirs);
402
403 for (vector<file_path>::const_iterator i = files.begin();
404 i != files.end(); ++i)
405 walker.visit_file(*i);
406
407 for (vector<file_path>::const_iterator i = dirs.begin();
408 i != dirs.end(); ++i)
409 if (walker.visit_dir(*i))
410 walk_tree_recursive(*i, walker);
411}
412
413// from some (safe) sub-entry of cwd
414void
415walk_tree(file_path const & path, tree_walker & walker)
416{
417 if (path.empty())
418 {
419 walk_tree_recursive(path, walker);
420 return;
421 }
422
423 switch (get_path_status(path))
424 {
425 case path::nonexistent:
426 E(false, origin::user, F("no such file or directory: '%s'") % path);
427 break;
428 case path::file:
429 walker.visit_file(path);
430 break;
431 case path::directory:
432 if (walker.visit_dir(path))
433 walk_tree_recursive(path, walker);
434 break;
435 }
436}
437
438bool
439ident_existing_file(file_path const & p, file_id & ident)
440{
441 return ident_existing_file(p, ident, get_path_status(p));
442}
443
444bool
445ident_existing_file(file_path const & p, file_id & ident, path::status status)
446{
447 switch (status)
448 {
449 case path::nonexistent:
450 return false;
451 case path::file:
452 break;
453 case path::directory:
454 W(F("expected file '%s', but it is a directory.") % p);
455 return false;
456 }
457
458 calculate_ident(p, ident);
459 return true;
460}
461
462void
463calculate_ident(file_path const & file,
464 file_id & ident)
465{
466 // no conversions necessary, use streaming form
467 static cached_botan_pipe
468 p(new Botan::Pipe(new Botan::Hash_Filter("SHA-160")));
469
470 // Best to be safe and check it isn't a dir.
471 assert_path_is_file(file);
472 Botan::DataSource_Stream infile(file.as_external(), true);
473 p->process_msg(infile);
474
475 ident = file_id(p->read_all_as_string(Botan::Pipe::LAST_MESSAGE),
476 origin::internal);
477}
478
479// Local Variables:
480// mode: C++
481// fill-column: 76
482// c-file-style: "gnu"
483// indent-tabs-mode: nil
484// End:
485// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status