monotone

monotone Mtn Source Tree

Root/file_io.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <iostream>
12#include <fstream>
13
14#include <boost/shared_ptr.hpp>
15
16#include "botan/botan.h"
17#include "botan_pipe_cache.hh"
18
19#include "file_io.hh"
20#include "sanity.hh"
21#include "simplestring_xform.hh"
22#include "charset.hh"
23#include "platform-wrapped.hh"
24#include "numeric_vocab.hh"
25
26// this file deals with talking to the filesystem, loading and
27// saving files.
28
29using std::cin;
30using std::ifstream;
31using std::ios_base;
32using std::ofstream;
33using std::logic_error;
34using std::string;
35using std::vector;
36
37using boost::shared_ptr;
38
39void
40assert_path_is_nonexistent(any_path const & path)
41{
42 I(get_path_status(path) == path::nonexistent);
43}
44
45void
46assert_path_is_file(any_path const & path)
47{
48 I(get_path_status(path) == path::file);
49}
50
51void
52assert_path_is_directory(any_path const & path)
53{
54 I(get_path_status(path) == path::directory);
55}
56
57void
58require_path_is_nonexistent(any_path const & path,
59 i18n_format const & message)
60{
61 N(!path_exists(path), message);
62}
63
64void
65require_path_is_file(any_path const & path,
66 i18n_format const & message_if_nonexistent,
67 i18n_format const & message_if_directory)
68{
69 switch (get_path_status(path))
70 {
71 case path::nonexistent:
72 N(false, message_if_nonexistent);
73 break;
74 case path::file:
75 return;
76 case path::directory:
77 N(false, message_if_directory);
78 break;
79 }
80}
81
82void
83require_path_is_directory(any_path const & path,
84 i18n_format const & message_if_nonexistent,
85 i18n_format const & message_if_file)
86{
87 switch (get_path_status(path))
88 {
89 case path::nonexistent:
90 N(false, message_if_nonexistent);
91 break;
92 case path::file:
93 N(false, message_if_file);
94 case path::directory:
95 return;
96 break;
97 }
98}
99
100bool
101path_exists(any_path const & p)
102{
103 return get_path_status(p) != path::nonexistent;
104}
105
106bool
107directory_exists(any_path const & p)
108{
109 return get_path_status(p) == path::directory;
110}
111
112bool
113file_exists(any_path const & p)
114{
115 return get_path_status(p) == path::file;
116}
117
118namespace
119{
120 struct directory_not_empty_exception {};
121 struct directory_empty_helper : public dirent_consumer
122 {
123 virtual void consume(char const *)
124 { throw directory_not_empty_exception(); }
125 };
126}
127
128bool
129directory_empty(any_path const & path)
130{
131 directory_empty_helper h;
132 try {
133 do_read_directory(system_path(path).as_external(), h, h, h);
134 } catch (directory_not_empty_exception) {
135 return false;
136 }
137 return true;
138}
139
140static bool did_char_is_binary_init;
141static bool char_is_binary[256];
142
143static void
144set_char_is_binary(char c, bool is_binary)
145{
146 char_is_binary[static_cast<u8>(c)] = is_binary;
147}
148
149static void
150init_char_is_binary()
151{
152 // these do not occur in ASCII text files
153 // FIXME: this heuristic is (a) crap and (b) hardcoded. fix both these.
154 // Should be calling a lua hook here that can use set_char_is_binary()
155 // That will at least fix (b)
156 string nontext_chars("\x01\x02\x03\x04\x05\x06\x0e\x0f"
157 "\x10\x11\x12\x13\x14\x15\x16\x17\x18"
158 "\x19\x1a\x1c\x1d\x1e\x1f");
159 set_char_is_binary('\0', true);
160 for(size_t i = 0; i < nontext_chars.size(); ++i)
161 {
162 set_char_is_binary(nontext_chars[i], true);
163 }
164}
165
166bool guess_binary(string const & s)
167{
168 if (did_char_is_binary_init == false)
169 {
170 init_char_is_binary();
171 did_char_is_binary_init = true;
172 }
173
174 for (size_t i = 0; i < s.size(); ++i)
175 {
176 if (char_is_binary[ static_cast<u8>(s[i]) ])
177 return true;
178 }
179 return false;
180}
181
182void
183mkdir_p(any_path const & p)
184{
185 switch (get_path_status(p))
186 {
187 case path::directory:
188 return;
189 case path::file:
190 E(false, F("could not create directory '%s': it is a file") % p);
191 case path::nonexistent:
192 std::string const current = p.as_external();
193 any_path const parent = p.dirname();
194 if (current != parent.as_external())
195 {
196 mkdir_p(parent);
197 }
198 do_mkdir(current);
199 }
200}
201
202void
203make_dir_for(any_path const & p)
204{
205 mkdir_p(p.dirname());
206}
207
208void
209delete_file(any_path const & p)
210{
211 require_path_is_file(p,
212 F("file to delete '%s' does not exist") % p,
213 F("file to delete, '%s', is not a file but a directory") % p);
214 do_remove(p.as_external());
215}
216
217void
218delete_dir_shallow(any_path const & p)
219{
220 require_path_is_directory(p,
221 F("directory to delete '%s' does not exist") % p,
222 F("directory to delete, '%s', is not a directory but a file") % p);
223 do_remove(p.as_external());
224}
225
226void
227delete_file_or_dir_shallow(any_path const & p)
228{
229 N(path_exists(p), F("object to delete, '%s', does not exist") % p);
230 do_remove(p.as_external());
231}
232
233namespace
234{
235 struct fill_pc_vec : public dirent_consumer
236 {
237 fill_pc_vec(vector<path_component> & v) : v(v) { v.clear(); }
238
239 // FIXME BUG: this treats 's' as being already utf8, but it is actually
240 // in the external character set. Also, will I() out on invalid
241 // pathnames, when it should N() or perhaps W() and skip.
242 virtual void consume(char const * s)
243 { v.push_back(path_component(s)); }
244
245 private:
246 vector<path_component> & v;
247 };
248
249 struct file_deleter : public dirent_consumer
250 {
251 file_deleter(any_path const & p) : parent(p) {}
252 virtual void consume(char const * f)
253 {
254 // FIXME: same bug as above.
255 do_remove((parent / path_component(f)).as_external());
256 }
257 private:
258 any_path const & parent;
259 };
260}
261
262static void
263do_remove_recursive(any_path const & p)
264{
265 // for the reasons described in walk_tree_recursive, we read the entire
266 // directory before recursing into any subdirs. however, it is safe to
267 // delete files as we encounter them, and we do so.
268 vector<path_component> subdirs;
269 fill_pc_vec subdir_fill(subdirs);
270 file_deleter delete_files(p);
271
272 do_read_directory(p.as_external(), delete_files, subdir_fill, delete_files);
273 for (vector<path_component>::const_iterator i = subdirs.begin();
274 i != subdirs.end(); i++)
275 do_remove_recursive(p / *i);
276
277 do_remove(p.as_external());
278}
279
280
281void
282delete_dir_recursive(any_path const & p)
283{
284 require_path_is_directory(p,
285 F("directory to delete, '%s', does not exist") % p,
286 F("directory to delete, '%s', is a file") % p);
287
288 do_remove_recursive(p);
289}
290
291void
292move_file(any_path const & old_path,
293 any_path const & new_path)
294{
295 require_path_is_file(old_path,
296 F("rename source file '%s' does not exist") % old_path,
297 F("rename source file '%s' is a directory "
298 "-- bug in monotone?") % old_path);
299 require_path_is_nonexistent(new_path,
300 F("rename target '%s' already exists")
301 % new_path);
302 rename_clobberingly(old_path.as_external(), new_path.as_external());
303}
304
305void
306move_dir(any_path const & old_path,
307 any_path const & new_path)
308{
309 require_path_is_directory(old_path,
310 F("rename source dir '%s' does not exist")
311 % old_path,
312 F("rename source dir '%s' is a file "
313 "-- bug in monotone?") % old_path);
314 require_path_is_nonexistent(new_path,
315 F("rename target '%s' already exists")
316 % new_path);
317 rename_clobberingly(old_path.as_external(), new_path.as_external());
318}
319
320void
321move_path(any_path const & old_path,
322 any_path const & new_path)
323{
324 N(path_exists(old_path),
325 F("rename source path '%s' does not exist") % old_path);
326 require_path_is_nonexistent(new_path,
327 F("rename target '%s' already exists")
328 % new_path);
329 rename_clobberingly(old_path.as_external(), new_path.as_external());
330}
331
332void
333read_data(any_path const & p, data & dat)
334{
335 require_path_is_file(p,
336 F("file %s does not exist") % p,
337 F("file %s cannot be read as data; it is a directory") % p);
338
339 ifstream file(p.as_external().c_str(),
340 ios_base::in | ios_base::binary);
341 N(file, F("cannot open file %s for reading") % p);
342 unfiltered_pipe->start_msg();
343 file >> *unfiltered_pipe;
344 unfiltered_pipe->end_msg();
345 dat = data(unfiltered_pipe->read_all_as_string(Botan::Pipe::LAST_MESSAGE));
346}
347
348void read_directory(any_path const & path,
349 vector<path_component> & files,
350 vector<path_component> & dirs)
351{
352 vector<path_component> special_files;
353 fill_pc_vec ff(files), df(dirs), sf(special_files);
354 do_read_directory(path.as_external(), ff, df, sf);
355 E(special_files.empty(), F("cannot handle special files in dir '%s'") % path);
356}
357
358// This function can only be called once per run.
359void
360read_data_stdin(data & dat)
361{
362 static bool have_consumed_stdin = false;
363 N(!have_consumed_stdin, F("Cannot read standard input multiple times"));
364 have_consumed_stdin = true;
365 unfiltered_pipe->start_msg();
366 cin >> *unfiltered_pipe;
367 unfiltered_pipe->end_msg();
368 dat = data(unfiltered_pipe->read_all_as_string(Botan::Pipe::LAST_MESSAGE));
369}
370
371void
372read_data_for_command_line(utf8 const & path, data & dat)
373{
374 if (path() == "-")
375 read_data_stdin(dat);
376 else
377 read_data(system_path(path), dat);
378}
379
380
381// FIXME: this is probably not enough brains to actually manage "atomic
382// filesystem writes". at some point you have to draw the line with even
383// trying, and I'm not sure it's really a strict requirement of this tool,
384// but you might want to make this code a bit tighter.
385
386static void
387write_data_impl(any_path const & p,
388 data const & dat,
389 any_path const & tmp,
390 bool user_private)
391{
392 N(!directory_exists(p),
393 F("file '%s' cannot be overwritten as data; it is a directory") % p);
394
395 make_dir_for(p);
396
397 write_data_worker(p.as_external(), dat(), tmp.as_external(), user_private);
398}
399
400void
401write_data(file_path const & path, data const & dat)
402{
403 // use the bookkeeping root as the temporary directory.
404 assert_path_is_directory(bookkeeping_root);
405 write_data_impl(path, dat, bookkeeping_root, false);
406}
407
408void
409write_data(bookkeeping_path const & path, data const & dat)
410{
411 // use the bookkeeping root as the temporary directory.
412 assert_path_is_directory(bookkeeping_root);
413 write_data_impl(path, dat, bookkeeping_root, false);
414}
415
416void
417write_data(system_path const & path,
418 data const & data,
419 system_path const & tmpdir)
420{
421 write_data_impl(path, data, tmpdir, false);
422}
423
424void
425write_data_userprivate(system_path const & path,
426 data const & data,
427 system_path const & tmpdir)
428{
429 write_data_impl(path, data, tmpdir, true);
430}
431
432// recursive directory walking
433
434tree_walker::~tree_walker() {}
435
436bool
437tree_walker::visit_dir(file_path const & path)
438{
439 return true;
440}
441
442// subroutine of walk_tree_recursive: if the path composition of PATH and PC
443// is a valid file_path, write it to ENTRY and return true. otherwise,
444// generate an appropriate diagnostic and return false. in this context, an
445// invalid path is *not* an invariant failure, because it came from a
446// directory scan. ??? arguably belongs as a file_path method.
447static bool
448safe_compose(file_path const & path, path_component const & pc, bool isdir,
449 file_path & entry)
450{
451 try
452 {
453 entry = path / pc;
454 return true;
455 }
456 catch (logic_error)
457 {
458 // do what the above operator/ did, by hand, and then figure out what
459 // sort of diagnostic to issue.
460 utf8 badpth;
461 if (path.empty())
462 badpth = utf8(pc());
463 else
464 badpth = utf8(path.as_internal() + "/" + pc());
465
466 if (!isdir)
467 W(F("skipping file '%s' with unsupported name") % badpth);
468 else if (bookkeeping_path::internal_string_is_bookkeeping_path(badpth))
469 L(FL("ignoring bookkeeping directory '%s'") % badpth);
470 else
471 W(F("skipping directory '%s' with unsupported name") % badpth);
472 return false;
473 }
474}
475
476static void
477walk_tree_recursive(file_path const & path,
478 tree_walker & walker)
479{
480 // Read the directory up front, so that the directory handle is released
481 // before we recurse. This is important, because it can allocate rather a
482 // bit of memory (especially on ReiserFS, see [1]; opendir uses the
483 // filesystem's blocksize as a clue how much memory to allocate). We used
484 // to recurse into subdirectories on the fly; this left the memory
485 // describing _this_ directory pinned on the heap. Then our recursive
486 // call itself made another recursive call, etc., causing a huge spike in
487 // peak memory. By splitting the loop in half, we avoid this problem.
488 //
489 // [1] http://lkml.org/lkml/2006/2/24/215
490 vector<path_component> files, dirs;
491 read_directory(path, files, dirs);
492
493 for (vector<path_component>::const_iterator i = files.begin();
494 i != files.end(); ++i)
495 {
496 file_path entry;
497 if (safe_compose(path, *i, false, entry))
498 walker.visit_file(entry);
499 }
500
501 for (vector<path_component>::const_iterator i = dirs.begin();
502 i != dirs.end(); ++i)
503 {
504 file_path entry;
505 if (safe_compose(path, *i, true, entry))
506 if (walker.visit_dir(entry))
507 walk_tree_recursive(entry, walker);
508 }
509}
510
511// from some (safe) sub-entry of cwd
512void
513walk_tree(file_path const & path, tree_walker & walker)
514{
515 if (path.empty())
516 {
517 walk_tree_recursive(path, walker);
518 return;
519 }
520
521 switch (get_path_status(path))
522 {
523 case path::nonexistent:
524 N(false, F("no such file or directory: '%s'") % path);
525 break;
526 case path::file:
527 walker.visit_file(path);
528 break;
529 case path::directory:
530 if (walker.visit_dir(path))
531 walk_tree_recursive(path, walker);
532 break;
533 }
534}
535
536class file_hash_calc_task
537 : public threaded_task
538{
539 shared_ptr<file_path> path;
540 shared_ptr<file_id> ident;
541
542public:
543 file_hash_calc_task(shared_ptr<file_path> path, shared_ptr<file_id> ident)
544 : path(path),
545 ident(ident)
546 { }
547
548 virtual void operator()()
549 {
550 calculate_ident(*path, *ident);
551 }
552};
553
554bool
555ident_existing_file(worker_pool & pool, shared_ptr<file_path> p,
556 shared_ptr<file_id> ident)
557{
558 return ident_existing_file(pool, p, ident, get_path_status(*p));
559}
560
561bool
562ident_existing_file(worker_pool & pool, shared_ptr<file_path> p,
563 shared_ptr<file_id> ident, path::status status)
564{
565 switch (status)
566 {
567 case path::nonexistent:
568 return false;
569 case path::file:
570 break;
571 case path::directory:
572 W(F("expected file '%s', but it is a directory.") % p);
573 return false;
574 }
575
576 pool.add_job(new file_hash_calc_task(p, ident));
577 return true;
578}
579
580void
581calculate_ident(file_path const & file,
582 file_id & ident)
583{
584 // no conversions necessary, use streaming form
585 static cached_botan_pipe
586 p(new Botan::Pipe(new Botan::Hash_Filter("SHA-160")));
587
588 // Best to be safe and check it isn't a dir.
589 assert_path_is_file(file);
590 Botan::DataSource_Stream infile(file.as_external(), true);
591 p->process_msg(infile);
592
593 ident = file_id(p->read_all_as_string(Botan::Pipe::LAST_MESSAGE));
594}
595
596// Local Variables:
597// mode: C++
598// fill-column: 76
599// c-file-style: "gnu"
600// indent-tabs-mode: nil
601// End:
602// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status