monotone

monotone Mtn Source Tree

Root/file_io.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <iostream>
12#include <fstream>
13
14#include "botan/botan.h"
15
16#include "file_io.hh"
17#include "sanity.hh"
18#include "simplestring_xform.hh"
19#include "charset.hh"
20#include "platform-wrapped.hh"
21#include "numeric_vocab.hh"
22
23// this file deals with talking to the filesystem, loading and
24// saving files.
25
26using std::cin;
27using std::ifstream;
28using std::ios_base;
29using std::ofstream;
30using std::logic_error;
31using std::string;
32using std::vector;
33
34void
35assert_path_is_nonexistent(any_path const & path)
36{
37 I(get_path_status(path) == path::nonexistent);
38}
39
40void
41assert_path_is_file(any_path const & path)
42{
43 I(get_path_status(path) == path::file);
44}
45
46void
47assert_path_is_directory(any_path const & path)
48{
49 I(get_path_status(path) == path::directory);
50}
51
52void
53require_path_is_nonexistent(any_path const & path,
54 i18n_format const & message)
55{
56 N(!path_exists(path), message);
57}
58
59void
60require_path_is_file(any_path const & path,
61 i18n_format const & message_if_nonexistent,
62 i18n_format const & message_if_directory)
63{
64 switch (get_path_status(path))
65 {
66 case path::nonexistent:
67 N(false, message_if_nonexistent);
68 break;
69 case path::file:
70 return;
71 case path::directory:
72 N(false, message_if_directory);
73 break;
74 }
75}
76
77void
78require_path_is_directory(any_path const & path,
79 i18n_format const & message_if_nonexistent,
80 i18n_format const & message_if_file)
81{
82 switch (get_path_status(path))
83 {
84 case path::nonexistent:
85 N(false, message_if_nonexistent);
86 break;
87 case path::file:
88 N(false, message_if_file);
89 case path::directory:
90 return;
91 break;
92 }
93}
94
95bool
96path_exists(any_path const & p)
97{
98 return get_path_status(p) != path::nonexistent;
99}
100
101bool
102directory_exists(any_path const & p)
103{
104 return get_path_status(p) == path::directory;
105}
106
107bool
108file_exists(any_path const & p)
109{
110 return get_path_status(p) == path::file;
111}
112
113namespace
114{
115 struct directory_not_empty_exception {};
116 struct directory_empty_helper : public dirent_consumer
117 {
118 virtual void consume(char const *)
119 { throw directory_not_empty_exception(); }
120 };
121}
122
123bool
124directory_empty(any_path const & path)
125{
126 directory_empty_helper h;
127 try {
128 do_read_directory(system_path(path).as_external(), h, h, h);
129 } catch (directory_not_empty_exception) {
130 return false;
131 }
132 return true;
133}
134
135static bool did_char_is_binary_init;
136static bool char_is_binary[256];
137
138static void
139set_char_is_binary(char c, bool is_binary)
140{
141 char_is_binary[static_cast<u8>(c)] = is_binary;
142}
143
144static void
145init_char_is_binary()
146{
147 // these do not occur in ASCII text files
148 // FIXME: this heuristic is (a) crap and (b) hardcoded. fix both these.
149 // Should be calling a lua hook here that can use set_char_is_binary()
150 // That will at least fix (b)
151 string nontext_chars("\x01\x02\x03\x04\x05\x06\x0e\x0f"
152 "\x10\x11\x12\x13\x14\x15\x16\x17\x18"
153 "\x19\x1a\x1c\x1d\x1e\x1f");
154 set_char_is_binary('\0', true);
155 for(size_t i = 0; i < nontext_chars.size(); ++i)
156 {
157 set_char_is_binary(nontext_chars[i], true);
158 }
159}
160
161bool guess_binary(string const & s)
162{
163 if (did_char_is_binary_init == false)
164 {
165 init_char_is_binary();
166 did_char_is_binary_init = true;
167 }
168
169 for (size_t i = 0; i < s.size(); ++i)
170 {
171 if (char_is_binary[ static_cast<u8>(s[i]) ])
172 return true;
173 }
174 return false;
175}
176
177void
178mkdir_p(any_path const & p)
179{
180 switch (get_path_status(p))
181 {
182 case path::directory:
183 return;
184 case path::file:
185 E(false, F("could not create directory '%s': it is a file") % p);
186 case path::nonexistent:
187 std::string const current = p.as_external();
188 any_path const parent = p.dirname();
189 if (current != parent.as_external())
190 {
191 mkdir_p(parent);
192 }
193 do_mkdir(current);
194 }
195}
196
197void
198make_dir_for(any_path const & p)
199{
200 mkdir_p(p.dirname());
201}
202
203void
204delete_file(any_path const & p)
205{
206 require_path_is_file(p,
207 F("file to delete '%s' does not exist") % p,
208 F("file to delete, '%s', is not a file but a directory") % p);
209 do_remove(p.as_external());
210}
211
212void
213delete_dir_shallow(any_path const & p)
214{
215 require_path_is_directory(p,
216 F("directory to delete '%s' does not exist") % p,
217 F("directory to delete, '%s', is not a directory but a file") % p);
218 do_remove(p.as_external());
219}
220
221void
222delete_file_or_dir_shallow(any_path const & p)
223{
224 N(path_exists(p), F("object to delete, '%s', does not exist") % p);
225 do_remove(p.as_external());
226}
227
228namespace
229{
230 struct fill_pc_vec : public dirent_consumer
231 {
232 fill_pc_vec(vector<path_component> & v) : v(v) { v.clear(); }
233
234 // FIXME BUG: this treats 's' as being already utf8, but it is actually
235 // in the external character set. Also, will I() out on invalid
236 // pathnames, when it should N() or perhaps W() and skip.
237 virtual void consume(char const * s)
238 { v.push_back(path_component(s)); }
239
240 private:
241 vector<path_component> & v;
242 };
243
244 struct file_deleter : public dirent_consumer
245 {
246 file_deleter(any_path const & p) : parent(p) {}
247 virtual void consume(char const * f)
248 {
249 // FIXME: same bug as above.
250 do_remove((parent / path_component(f)).as_external());
251 }
252 private:
253 any_path const & parent;
254 };
255}
256
257static void
258do_remove_recursive(any_path const & p)
259{
260 // for the reasons described in walk_tree_recursive, we read the entire
261 // directory before recursing into any subdirs. however, it is safe to
262 // delete files as we encounter them, and we do so.
263 vector<path_component> subdirs;
264 fill_pc_vec subdir_fill(subdirs);
265 file_deleter delete_files(p);
266
267 do_read_directory(p.as_external(), delete_files, subdir_fill, delete_files);
268 for (vector<path_component>::const_iterator i = subdirs.begin();
269 i != subdirs.end(); i++)
270 do_remove_recursive(p / *i);
271
272 do_remove(p.as_external());
273}
274
275
276void
277delete_dir_recursive(any_path const & p)
278{
279 require_path_is_directory(p,
280 F("directory to delete, '%s', does not exist") % p,
281 F("directory to delete, '%s', is a file") % p);
282
283 do_remove_recursive(p);
284}
285
286void
287move_file(any_path const & old_path,
288 any_path const & new_path)
289{
290 require_path_is_file(old_path,
291 F("rename source file '%s' does not exist") % old_path,
292 F("rename source file '%s' is a directory "
293 "-- bug in monotone?") % old_path);
294 require_path_is_nonexistent(new_path,
295 F("rename target '%s' already exists")
296 % new_path);
297 rename_clobberingly(old_path.as_external(), new_path.as_external());
298}
299
300void
301move_dir(any_path const & old_path,
302 any_path const & new_path)
303{
304 require_path_is_directory(old_path,
305 F("rename source dir '%s' does not exist")
306 % old_path,
307 F("rename source dir '%s' is a file "
308 "-- bug in monotone?") % old_path);
309 require_path_is_nonexistent(new_path,
310 F("rename target '%s' already exists")
311 % new_path);
312 rename_clobberingly(old_path.as_external(), new_path.as_external());
313}
314
315void
316move_path(any_path const & old_path,
317 any_path const & new_path)
318{
319 N(path_exists(old_path),
320 F("rename source path '%s' does not exist") % old_path);
321 require_path_is_nonexistent(new_path,
322 F("rename target '%s' already exists")
323 % new_path);
324 rename_clobberingly(old_path.as_external(), new_path.as_external());
325}
326
327void
328read_data(any_path const & p, data & dat)
329{
330 require_path_is_file(p,
331 F("file %s does not exist") % p,
332 F("file %s cannot be read as data; it is a directory") % p);
333
334 ifstream file(p.as_external().c_str(),
335 ios_base::in | ios_base::binary);
336 N(file, F("cannot open file %s for reading") % p);
337 Botan::Pipe pipe;
338 pipe.start_msg();
339 file >> pipe;
340 pipe.end_msg();
341 dat = data(pipe.read_all_as_string());
342}
343
344void read_directory(any_path const & path,
345 vector<path_component> & files,
346 vector<path_component> & dirs)
347{
348 vector<path_component> special_files;
349 fill_pc_vec ff(files), df(dirs), sf(special_files);
350 do_read_directory(path.as_external(), ff, df, sf);
351 E(special_files.empty(), F("cannot handle special files in dir '%s'") % path);
352}
353
354// This function can only be called once per run.
355void
356read_data_stdin(data & dat)
357{
358 static bool have_consumed_stdin = false;
359 N(!have_consumed_stdin, F("Cannot read standard input multiple times"));
360 have_consumed_stdin = true;
361 Botan::Pipe pipe;
362 pipe.start_msg();
363 cin >> pipe;
364 pipe.end_msg();
365 dat = data(pipe.read_all_as_string());
366}
367
368void
369read_data_for_command_line(utf8 const & path, data & dat)
370{
371 if (path() == "-")
372 read_data_stdin(dat);
373 else
374 read_data(system_path(path), dat);
375}
376
377
378// FIXME: this is probably not enough brains to actually manage "atomic
379// filesystem writes". at some point you have to draw the line with even
380// trying, and I'm not sure it's really a strict requirement of this tool,
381// but you might want to make this code a bit tighter.
382
383static void
384write_data_impl(any_path const & p,
385 data const & dat,
386 any_path const & tmp,
387 bool user_private)
388{
389 N(!directory_exists(p),
390 F("file '%s' cannot be overwritten as data; it is a directory") % p);
391
392 make_dir_for(p);
393
394 write_data_worker(p.as_external(), dat(), tmp.as_external(), user_private);
395}
396
397void
398write_data(file_path const & path, data const & dat)
399{
400 // use the bookkeeping root as the temporary directory.
401 assert_path_is_directory(bookkeeping_root);
402 write_data_impl(path, dat, bookkeeping_root, false);
403}
404
405void
406write_data(bookkeeping_path const & path, data const & dat)
407{
408 // use the bookkeeping root as the temporary directory.
409 assert_path_is_directory(bookkeeping_root);
410 write_data_impl(path, dat, bookkeeping_root, false);
411}
412
413void
414write_data(system_path const & path,
415 data const & data,
416 system_path const & tmpdir)
417{
418 write_data_impl(path, data, tmpdir, false);
419}
420
421void
422write_data_userprivate(system_path const & path,
423 data const & data,
424 system_path const & tmpdir)
425{
426 write_data_impl(path, data, tmpdir, true);
427}
428
429// recursive directory walking
430
431tree_walker::~tree_walker() {}
432
433bool
434tree_walker::visit_dir(file_path const & path)
435{
436 return true;
437}
438
439// subroutine of walk_tree_recursive: if the path composition of PATH and PC
440// is a valid file_path, write it to ENTRY and return true. otherwise,
441// generate an appropriate diagnostic and return false. in this context, an
442// invalid path is *not* an invariant failure, because it came from a
443// directory scan. ??? arguably belongs as a file_path method.
444static bool
445safe_compose(file_path const & path, path_component const & pc, bool isdir,
446 file_path & entry)
447{
448 try
449 {
450 entry = path / pc;
451 return true;
452 }
453 catch (logic_error)
454 {
455 // do what the above operator/ did, by hand, and then figure out what
456 // sort of diagnostic to issue.
457 utf8 badpth;
458 if (path.empty())
459 badpth = utf8(pc());
460 else
461 badpth = utf8(path.as_internal() + "/" + pc());
462
463 if (!isdir)
464 W(F("skipping file '%s' with unsupported name") % badpth);
465 else if (bookkeeping_path::internal_string_is_bookkeeping_path(badpth))
466 L(FL("ignoring bookkeeping directory '%s'") % badpth);
467 else
468 W(F("skipping directory '%s' with unsupported name") % badpth);
469 return false;
470 }
471}
472
473static void
474walk_tree_recursive(file_path const & path,
475 tree_walker & walker)
476{
477 // Read the directory up front, so that the directory handle is released
478 // before we recurse. This is important, because it can allocate rather a
479 // bit of memory (especially on ReiserFS, see [1]; opendir uses the
480 // filesystem's blocksize as a clue how much memory to allocate). We used
481 // to recurse into subdirectories on the fly; this left the memory
482 // describing _this_ directory pinned on the heap. Then our recursive
483 // call itself made another recursive call, etc., causing a huge spike in
484 // peak memory. By splitting the loop in half, we avoid this problem.
485 //
486 // [1] http://lkml.org/lkml/2006/2/24/215
487 vector<path_component> files, dirs;
488 read_directory(path, files, dirs);
489
490 for (vector<path_component>::const_iterator i = files.begin();
491 i != files.end(); ++i)
492 {
493 file_path entry;
494 if (safe_compose(path, *i, false, entry))
495 walker.visit_file(entry);
496 }
497
498 for (vector<path_component>::const_iterator i = dirs.begin();
499 i != dirs.end(); ++i)
500 {
501 file_path entry;
502 if (safe_compose(path, *i, true, entry))
503 if (walker.visit_dir(entry))
504 walk_tree_recursive(entry, walker);
505 }
506}
507
508// from some (safe) sub-entry of cwd
509void
510walk_tree(file_path const & path, tree_walker & walker)
511{
512 if (path.empty())
513 {
514 walk_tree_recursive(path, walker);
515 return;
516 }
517
518 switch (get_path_status(path))
519 {
520 case path::nonexistent:
521 N(false, F("no such file or directory: '%s'") % path);
522 break;
523 case path::file:
524 walker.visit_file(path);
525 break;
526 case path::directory:
527 if (walker.visit_dir(path))
528 walk_tree_recursive(path, walker);
529 break;
530 }
531}
532
533bool
534ident_existing_file(file_path const & p, file_id & ident)
535{
536 return ident_existing_file(p, ident, get_path_status(p));
537}
538
539bool
540ident_existing_file(file_path const & p, file_id & ident, path::status status)
541{
542 switch (status)
543 {
544 case path::nonexistent:
545 return false;
546 case path::file:
547 break;
548 case path::directory:
549 W(F("expected file '%s', but it is a directory.") % p);
550 return false;
551 }
552
553 hexenc<id> id;
554 calculate_ident(p, id);
555 ident = file_id(id);
556
557 return true;
558}
559
560void
561calculate_ident(file_path const & file,
562 hexenc<id> & ident)
563{
564 // no conversions necessary, use streaming form
565 // Best to be safe and check it isn't a dir.
566 assert_path_is_file(file);
567 Botan::Pipe p(new Botan::Hash_Filter("SHA-160"), new Botan::Hex_Encoder());
568 Botan::DataSource_Stream infile(file.as_external(), true);
569 p.process_msg(infile);
570
571 ident = hexenc<id>(lowercase(p.read_all_as_string()));
572}
573
574// Local Variables:
575// mode: C++
576// fill-column: 76
577// c-file-style: "gnu"
578// indent-tabs-mode: nil
579// End:
580// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status