monotone

monotone Mtn Source Tree

Root/file_io.cc

1// Copyright (C) 2002 Graydon Hoare <graydon@pobox.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include <iostream>
12#include <fstream>
13
14#include "botan/botan.h"
15#include "botan_pipe_cache.hh"
16
17#include "file_io.hh"
18#include "sanity.hh"
19#include "simplestring_xform.hh"
20#include "charset.hh"
21#include "platform-wrapped.hh"
22#include "numeric_vocab.hh"
23
24// this file deals with talking to the filesystem, loading and
25// saving files.
26
27using std::cin;
28using std::ifstream;
29using std::ios_base;
30using std::ofstream;
31using std::logic_error;
32using std::string;
33using std::vector;
34
35void
36assert_path_is_nonexistent(any_path const & path)
37{
38 I(get_path_status(path) == path::nonexistent);
39}
40
41void
42assert_path_is_file(any_path const & path)
43{
44 I(get_path_status(path) == path::file);
45}
46
47void
48assert_path_is_directory(any_path const & path)
49{
50 I(get_path_status(path) == path::directory);
51}
52
53void
54require_path_is_nonexistent(any_path const & path,
55 i18n_format const & message)
56{
57 N(!path_exists(path), message);
58}
59
60void
61require_path_is_file(any_path const & path,
62 i18n_format const & message_if_nonexistent,
63 i18n_format const & message_if_directory)
64{
65 switch (get_path_status(path))
66 {
67 case path::nonexistent:
68 N(false, message_if_nonexistent);
69 break;
70 case path::file:
71 return;
72 case path::directory:
73 N(false, message_if_directory);
74 break;
75 }
76}
77
78void
79require_path_is_directory(any_path const & path,
80 i18n_format const & message_if_nonexistent,
81 i18n_format const & message_if_file)
82{
83 switch (get_path_status(path))
84 {
85 case path::nonexistent:
86 N(false, message_if_nonexistent);
87 break;
88 case path::file:
89 N(false, message_if_file);
90 case path::directory:
91 return;
92 break;
93 }
94}
95
96bool
97path_exists(any_path const & p)
98{
99 return get_path_status(p) != path::nonexistent;
100}
101
102bool
103directory_exists(any_path const & p)
104{
105 return get_path_status(p) == path::directory;
106}
107
108bool
109file_exists(any_path const & p)
110{
111 return get_path_status(p) == path::file;
112}
113
114namespace
115{
116 struct directory_not_empty_exception {};
117 struct directory_empty_helper : public dirent_consumer
118 {
119 virtual void consume(char const *)
120 { throw directory_not_empty_exception(); }
121 };
122}
123
124bool
125directory_empty(any_path const & path)
126{
127 directory_empty_helper h;
128 try {
129 do_read_directory(system_path(path).as_external(), h, h, h);
130 } catch (directory_not_empty_exception) {
131 return false;
132 }
133 return true;
134}
135
136static bool did_char_is_binary_init;
137static bool char_is_binary[256];
138
139static void
140set_char_is_binary(char c, bool is_binary)
141{
142 char_is_binary[static_cast<u8>(c)] = is_binary;
143}
144
145static void
146init_char_is_binary()
147{
148 // these do not occur in ASCII text files
149 // FIXME: this heuristic is (a) crap and (b) hardcoded. fix both these.
150 // Should be calling a lua hook here that can use set_char_is_binary()
151 // That will at least fix (b)
152 string nontext_chars("\x01\x02\x03\x04\x05\x06\x0e\x0f"
153 "\x10\x11\x12\x13\x14\x15\x16\x17\x18"
154 "\x19\x1a\x1c\x1d\x1e\x1f");
155 set_char_is_binary('\0', true);
156 for(size_t i = 0; i < nontext_chars.size(); ++i)
157 {
158 set_char_is_binary(nontext_chars[i], true);
159 }
160}
161
162bool guess_binary(string const & s)
163{
164 if (did_char_is_binary_init == false)
165 {
166 init_char_is_binary();
167 did_char_is_binary_init = true;
168 }
169
170 for (size_t i = 0; i < s.size(); ++i)
171 {
172 if (char_is_binary[ static_cast<u8>(s[i]) ])
173 return true;
174 }
175 return false;
176}
177
178void
179mkdir_p(any_path const & p)
180{
181 switch (get_path_status(p))
182 {
183 case path::directory:
184 return;
185 case path::file:
186 E(false, F("could not create directory '%s': it is a file") % p);
187 case path::nonexistent:
188 std::string const current = p.as_external();
189 any_path const parent = p.dirname();
190 if (current != parent.as_external())
191 {
192 mkdir_p(parent);
193 }
194 do_mkdir(current);
195 }
196}
197
198void
199make_dir_for(any_path const & p)
200{
201 mkdir_p(p.dirname());
202}
203
204void
205delete_file(any_path const & p)
206{
207 require_path_is_file(p,
208 F("file to delete '%s' does not exist") % p,
209 F("file to delete, '%s', is not a file but a directory") % p);
210 do_remove(p.as_external());
211}
212
213void
214delete_dir_shallow(any_path const & p)
215{
216 require_path_is_directory(p,
217 F("directory to delete '%s' does not exist") % p,
218 F("directory to delete, '%s', is not a directory but a file") % p);
219 do_remove(p.as_external());
220}
221
222void
223delete_file_or_dir_shallow(any_path const & p)
224{
225 N(path_exists(p), F("object to delete, '%s', does not exist") % p);
226 do_remove(p.as_external());
227}
228
229namespace
230{
231 struct fill_pc_vec : public dirent_consumer
232 {
233 fill_pc_vec(vector<path_component> & v) : v(v) { v.clear(); }
234
235 // FIXME BUG: this treats 's' as being already utf8, but it is actually
236 // in the external character set. Also, will I() out on invalid
237 // pathnames, when it should N() or perhaps W() and skip.
238 virtual void consume(char const * s)
239 { v.push_back(path_component(s)); }
240
241 private:
242 vector<path_component> & v;
243 };
244
245 struct file_deleter : public dirent_consumer
246 {
247 file_deleter(any_path const & p) : parent(p) {}
248 virtual void consume(char const * f)
249 {
250 // FIXME: same bug as above.
251 do_remove((parent / path_component(f)).as_external());
252 }
253 private:
254 any_path const & parent;
255 };
256}
257
258static void
259do_remove_recursive(any_path const & p)
260{
261 // for the reasons described in walk_tree_recursive, we read the entire
262 // directory before recursing into any subdirs. however, it is safe to
263 // delete files as we encounter them, and we do so.
264 vector<path_component> subdirs;
265 fill_pc_vec subdir_fill(subdirs);
266 file_deleter delete_files(p);
267
268 do_read_directory(p.as_external(), delete_files, subdir_fill, delete_files);
269 for (vector<path_component>::const_iterator i = subdirs.begin();
270 i != subdirs.end(); i++)
271 do_remove_recursive(p / *i);
272
273 do_remove(p.as_external());
274}
275
276
277void
278delete_dir_recursive(any_path const & p)
279{
280 require_path_is_directory(p,
281 F("directory to delete, '%s', does not exist") % p,
282 F("directory to delete, '%s', is a file") % p);
283
284 do_remove_recursive(p);
285}
286
287void
288move_file(any_path const & old_path,
289 any_path const & new_path)
290{
291 require_path_is_file(old_path,
292 F("rename source file '%s' does not exist") % old_path,
293 F("rename source file '%s' is a directory "
294 "-- bug in monotone?") % old_path);
295 require_path_is_nonexistent(new_path,
296 F("rename target '%s' already exists")
297 % new_path);
298 rename_clobberingly(old_path.as_external(), new_path.as_external());
299}
300
301void
302move_dir(any_path const & old_path,
303 any_path const & new_path)
304{
305 require_path_is_directory(old_path,
306 F("rename source dir '%s' does not exist")
307 % old_path,
308 F("rename source dir '%s' is a file "
309 "-- bug in monotone?") % old_path);
310 require_path_is_nonexistent(new_path,
311 F("rename target '%s' already exists")
312 % new_path);
313 rename_clobberingly(old_path.as_external(), new_path.as_external());
314}
315
316void
317move_path(any_path const & old_path,
318 any_path const & new_path)
319{
320 N(path_exists(old_path),
321 F("rename source path '%s' does not exist") % old_path);
322 require_path_is_nonexistent(new_path,
323 F("rename target '%s' already exists")
324 % new_path);
325 rename_clobberingly(old_path.as_external(), new_path.as_external());
326}
327
328void
329read_data(any_path const & p, data & dat)
330{
331 require_path_is_file(p,
332 F("file %s does not exist") % p,
333 F("file %s cannot be read as data; it is a directory") % p);
334
335 ifstream file(p.as_external().c_str(),
336 ios_base::in | ios_base::binary);
337 N(file, F("cannot open file %s for reading") % p);
338 unfiltered_pipe->start_msg();
339 file >> *unfiltered_pipe;
340 unfiltered_pipe->end_msg();
341 dat = data(unfiltered_pipe->read_all_as_string(Botan::Pipe::LAST_MESSAGE));
342}
343
344void read_directory(any_path const & path,
345 vector<path_component> & files,
346 vector<path_component> & dirs)
347{
348 vector<path_component> special_files;
349 fill_pc_vec ff(files), df(dirs), sf(special_files);
350 do_read_directory(path.as_external(), ff, df, sf);
351 E(special_files.empty(), F("cannot handle special files in dir '%s'") % path);
352}
353
354// This function can only be called once per run.
355void
356read_data_stdin(data & dat)
357{
358 static bool have_consumed_stdin = false;
359 N(!have_consumed_stdin, F("Cannot read standard input multiple times"));
360 have_consumed_stdin = true;
361 unfiltered_pipe->start_msg();
362 cin >> *unfiltered_pipe;
363 unfiltered_pipe->end_msg();
364 dat = data(unfiltered_pipe->read_all_as_string(Botan::Pipe::LAST_MESSAGE));
365}
366
367void
368read_data_for_command_line(utf8 const & path, data & dat)
369{
370 if (path() == "-")
371 read_data_stdin(dat);
372 else
373 read_data(system_path(path), dat);
374}
375
376
377// FIXME: this is probably not enough brains to actually manage "atomic
378// filesystem writes". at some point you have to draw the line with even
379// trying, and I'm not sure it's really a strict requirement of this tool,
380// but you might want to make this code a bit tighter.
381
382static void
383write_data_impl(any_path const & p,
384 data const & dat,
385 any_path const & tmp,
386 bool user_private)
387{
388 N(!directory_exists(p),
389 F("file '%s' cannot be overwritten as data; it is a directory") % p);
390
391 make_dir_for(p);
392
393 write_data_worker(p.as_external(), dat(), tmp.as_external(), user_private);
394}
395
396void
397write_data(file_path const & path, data const & dat)
398{
399 // use the bookkeeping root as the temporary directory.
400 assert_path_is_directory(bookkeeping_root);
401 write_data_impl(path, dat, bookkeeping_root, false);
402}
403
404void
405write_data(bookkeeping_path const & path, data const & dat)
406{
407 // use the bookkeeping root as the temporary directory.
408 assert_path_is_directory(bookkeeping_root);
409 write_data_impl(path, dat, bookkeeping_root, false);
410}
411
412void
413write_data(system_path const & path,
414 data const & data,
415 system_path const & tmpdir)
416{
417 write_data_impl(path, data, tmpdir, false);
418}
419
420void
421write_data_userprivate(system_path const & path,
422 data const & data,
423 system_path const & tmpdir)
424{
425 write_data_impl(path, data, tmpdir, true);
426}
427
428// recursive directory walking
429
430tree_walker::~tree_walker() {}
431
432bool
433tree_walker::visit_dir(file_path const & path)
434{
435 return true;
436}
437
438// subroutine of walk_tree_recursive: if the path composition of PATH and PC
439// is a valid file_path, write it to ENTRY and return true. otherwise,
440// generate an appropriate diagnostic and return false. in this context, an
441// invalid path is *not* an invariant failure, because it came from a
442// directory scan. ??? arguably belongs as a file_path method.
443static bool
444safe_compose(file_path const & path, path_component const & pc, bool isdir,
445 file_path & entry)
446{
447 try
448 {
449 entry = path / pc;
450 return true;
451 }
452 catch (logic_error)
453 {
454 // do what the above operator/ did, by hand, and then figure out what
455 // sort of diagnostic to issue.
456 utf8 badpth;
457 if (path.empty())
458 badpth = utf8(pc());
459 else
460 badpth = utf8(path.as_internal() + "/" + pc());
461
462 if (!isdir)
463 W(F("skipping file '%s' with unsupported name") % badpth);
464 else if (bookkeeping_path::internal_string_is_bookkeeping_path(badpth))
465 L(FL("ignoring bookkeeping directory '%s'") % badpth);
466 else
467 W(F("skipping directory '%s' with unsupported name") % badpth);
468 return false;
469 }
470}
471
472static void
473walk_tree_recursive(file_path const & path,
474 tree_walker & walker)
475{
476 // Read the directory up front, so that the directory handle is released
477 // before we recurse. This is important, because it can allocate rather a
478 // bit of memory (especially on ReiserFS, see [1]; opendir uses the
479 // filesystem's blocksize as a clue how much memory to allocate). We used
480 // to recurse into subdirectories on the fly; this left the memory
481 // describing _this_ directory pinned on the heap. Then our recursive
482 // call itself made another recursive call, etc., causing a huge spike in
483 // peak memory. By splitting the loop in half, we avoid this problem.
484 //
485 // [1] http://lkml.org/lkml/2006/2/24/215
486 vector<path_component> files, dirs;
487 read_directory(path, files, dirs);
488
489 for (vector<path_component>::const_iterator i = files.begin();
490 i != files.end(); ++i)
491 {
492 file_path entry;
493 if (safe_compose(path, *i, false, entry))
494 walker.visit_file(entry);
495 }
496
497 for (vector<path_component>::const_iterator i = dirs.begin();
498 i != dirs.end(); ++i)
499 {
500 file_path entry;
501 if (safe_compose(path, *i, true, entry))
502 if (walker.visit_dir(entry))
503 walk_tree_recursive(entry, walker);
504 }
505}
506
507// from some (safe) sub-entry of cwd
508void
509walk_tree(file_path const & path, tree_walker & walker)
510{
511 if (path.empty())
512 {
513 walk_tree_recursive(path, walker);
514 return;
515 }
516
517 switch (get_path_status(path))
518 {
519 case path::nonexistent:
520 N(false, F("no such file or directory: '%s'") % path);
521 break;
522 case path::file:
523 walker.visit_file(path);
524 break;
525 case path::directory:
526 if (walker.visit_dir(path))
527 walk_tree_recursive(path, walker);
528 break;
529 }
530}
531
532bool
533ident_existing_file(file_path const & p, file_id & ident)
534{
535 return ident_existing_file(p, ident, get_path_status(p));
536}
537
538bool
539ident_existing_file(file_path const & p, file_id & ident, path::status status)
540{
541 switch (status)
542 {
543 case path::nonexistent:
544 return false;
545 case path::file:
546 break;
547 case path::directory:
548 W(F("expected file '%s', but it is a directory.") % p);
549 return false;
550 }
551
552 calculate_ident(p, ident);
553 return true;
554}
555
556void
557calculate_ident(file_path const & file,
558 file_id & ident)
559{
560 // no conversions necessary, use streaming form
561 static cached_botan_pipe
562 p(new Botan::Pipe(new Botan::Hash_Filter("SHA-160")));
563
564 // Best to be safe and check it isn't a dir.
565 assert_path_is_file(file);
566 Botan::DataSource_Stream infile(file.as_external(), true);
567 p->process_msg(infile);
568
569 ident = file_id(p->read_all_as_string(Botan::Pipe::LAST_MESSAGE));
570}
571
572// Local Variables:
573// mode: C++
574// fill-column: 76
575// c-file-style: "gnu"
576// indent-tabs-mode: nil
577// End:
578// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status