monotone

monotone Mtn Source Tree

Root/src/dates.cc

1// Copyright (C) 2007-2009 Zack Weinberg <zackw@panix.com>
2// Markus Wanner <markus@bluegap.ch>
3//
4// This program is made available under the GNU GPL version 2.0 or
5// greater. See the accompanying file COPYING for details.
6//
7// This program is distributed WITHOUT ANY WARRANTY; without even the
8// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9// PURPOSE.
10
11#include "base.hh"
12#include "dates.hh"
13#include "sanity.hh"
14#include "platform.hh"
15
16#include <ctime>
17#include <climits>
18
19// Generic date handling routines for Monotone.
20//
21// The routines in this file substantively duplicate functionality of the
22// standard C library, so one might wonder why they are needed. There are
23// three fundamental portability problems which together force us to
24// implement our own date handling:
25//
26// 1. We want millisecond precision in our dates, and, at the same time, the
27// ability to represent dates far in the future. Support for dates far
28// in the future (in particular, past 2038) is currently only common on
29// 64-bit systems. Support for sub-second resolution is not available at
30// all in the standard 'broken-down time' format (struct tm).
31//
32// 2. There is no standardized way to convert from 'struct tm' to 'time_t'
33// without treating the 'struct tm' as local time. Some systems do
34// provide a 'timegm' function but it is not widespread.
35//
36// 3. Some (rare, nowadays) systems do not use the Unix epoch as the epoch
37// for 'time_t'. This is only a problem because we support reading
38// CVS/RCS ,v files, which encode times as decimal seconds since the Unix
39// epoch; so we must support that epoch regardless of what the system does.
40//
41// Note that while we track dates to the millisecond in memory, we do not
42// record milliseconds in the database, nor do we ask the system for
43// sub-second resolution when retrieving the current time, nor do we display
44// milliseconds to the user. There isn't much point in fixing one of these
45// problems if we don't fix all of them, and while the first two would be
46// straightforward, the third is very hard -- it would require us to
47// reimplement strftime() with our own extension for the purpose.
48
49// On Solaris, these macros are already defined by system includes. We want
50// to use our own, so we undef them here.
51#undef SEC
52#undef MILLISEC
53
54using std::localtime;
55using std::mktime;
56using std::numeric_limits;
57using std::ostream;
58using std::string;
59using std::time_t;
60using std::tm;
61
62// Our own "struct tm"-like struct to represent broken-down times
63struct broken_down_time {
64 int millisec; /* milliseconds (0 - 999) */
65 int sec; /* seconds (0 - 59) */
66 int min; /* minutes (0 - 59) */
67 int hour; /* hours (0 - 23) */
68 int day; /* day of the month (1 - 31) */
69 int month; /* month (1 - 12) */
70 int year; /* years (anno Domini, i.e. 1999) */
71};
72
73// The Unix epoch is 1970-01-01T00:00:00 (in UTC). As we cannot safely
74// assume that the system's epoch is the Unix epoch, we implement the
75// conversion to broken-down time by hand instead of relying on gmtime().
76//
77// Unix time_t values are a linear count of seconds since the epoch,
78// and should be interpreted according to the Gregorian calendar:
79//
80// - There are 60 seconds in a minute, 3600 seconds in an hour,
81// 86400 seconds in a day.
82// - Years not divisible by 4 have 365 days, or 31536000 seconds.
83// - Years divisible by 4 have 366 days, or 31622400 seconds, except ...
84// - Years divisible by 100 have only 365 days, except ...
85// - Years divisible by 400 have 366 days.
86//
87// The last two rules are the Gregorian correction to the Julian calendar.
88// Note that dates before 1582 are treated as if the Gregorian calendar had
89// been in effect on that day in history (the 'proleptic' calendar). Also,
90// we make no attempt to handle leap seconds.
91
92s64 const INVALID = PROBABLE_S64_MAX;
93
94// This is the date 292278994-01-01T00:00:00.000. The year 292,278,994
95// overflows a signed 64-bit millisecond counter somewhere in August, so
96// we've rounded down to the last whole year that fits.
97s64 const LATEST_SUPPORTED_DATE = s64_C(9223372017129600000);
98
99// This is the date 0001-01-01T00:00:00.000. There is no year zero in the
100// Gregorian calendar, and what are you doing using monotone to version
101// data from before the common era, anyway?
102s64 const EARLIEST_SUPPORTED_DATE = s64_C(-62135596800000);
103
104// These constants are all in seconds.
105u32 const SEC = 1;
106u32 const MIN = 60*SEC;
107u32 const HOUR = 60*MIN;
108u64 const DAY = 24*HOUR;
109u64 const YEAR = 365*DAY;
110
111inline s64 MILLISEC(s64 n) { return n * 1000; }
112
113unsigned char const DAYS_PER_MONTH[] = {
114 31, // jan
115 28, // feb (non-leap)
116 31, // mar
117 30, // apr
118 31, // may
119 30, // jun
120 31, // jul
121 31, // aug
122 30, // sep
123 31, // oct
124 30, // nov
125 31, // dec
126};
127
128inline bool
129is_leap_year(s32 year)
130{
131 return (year % 4 == 0
132 && (year % 100 != 0 || year % 400 == 0));
133}
134inline s32
135days_in_year(s32 year)
136{
137 return is_leap_year(year) ? 366 : 365;
138}
139
140inline bool
141valid_ms_count(s64 d)
142{
143 return (d >= EARLIEST_SUPPORTED_DATE && d <= LATEST_SUPPORTED_DATE);
144}
145
146static void
147our_gmtime(s64 ts, broken_down_time & tb)
148{
149 // validate our assumptions about which basic type is u64 (see above).
150 I(PROBABLE_S64_MAX == numeric_limits<s64>::max());
151 I(LATEST_SUPPORTED_DATE < PROBABLE_S64_MAX);
152
153 I(valid_ms_count(ts));
154
155 // All subsequent calculations are easier if 't' is always positive, so we
156 // make zero be EARLIEST_SUPPORTED_DATE, which happens to be
157 // 0001-01-01T00:00:00 and is thus a convenient fixed point for leap year
158 // calculations.
159
160 u64 t = u64(ts) - u64(EARLIEST_SUPPORTED_DATE);
161
162 // sub-day components
163 u64 days = t / MILLISEC(DAY);
164 u32 ms_in_day = t % MILLISEC(DAY);
165
166 tb.millisec = ms_in_day % 1000;
167 ms_in_day /= 1000;
168
169 tb.sec = ms_in_day % 60;
170 ms_in_day /= 60;
171
172 tb.min = ms_in_day % 60;
173 tb.hour = ms_in_day / 60;
174
175 // This is the result of inverting the equation
176 // yb = y*365 + y/4 - y/100 + y/400
177 // it approximates years since the epoch for any day count.
178 u32 year = (400*days / 146097);
179
180 // Compute the _exact_ number of days from the epoch to the beginning of
181 // the approximate year determined above.
182 u64 yearbeg;
183 yearbeg = widen<u64,u32>(year)*365 + year/4 - year/100 + year/400;
184
185 // Our epoch is year 1, not year 0 (there is no year 0).
186 year++;
187
188 s64 delta = days - yearbeg;
189 // The approximation above occasionally guesses the year before the
190 // correct one, but never the year after, or any further off than that.
191 if (delta >= days_in_year(year))
192 {
193 delta -= days_in_year(year);
194 year++;
195 }
196 I(0 <= delta && delta < days_in_year(year));
197
198 tb.year = year;
199 days = delta;
200
201 // <yakko> Now, the months digit!
202 u32 month = 1;
203 for (;;)
204 {
205 u32 this_month = DAYS_PER_MONTH[month-1];
206 if (month == 2 && is_leap_year(year))
207 this_month += 1;
208 if (days < this_month)
209 break;
210
211 days -= this_month;
212 month++;
213 I(month <= 12);
214 }
215 tb.month = month;
216 tb.day = days + 1;
217}
218
219static s64
220our_timegm(broken_down_time const & tb)
221{
222 s64 d;
223
224 // range checks
225 I(tb.year > 0 && tb.year <= 292278994);
226 I(tb.month >= 1 && tb.month <= 12);
227 I(tb.day >= 1 && tb.day <= 31);
228 I(tb.hour >= 0 && tb.hour <= 23);
229 I(tb.min >= 0 && tb.min <= 59);
230 I(tb.sec >= 0 && tb.sec <= 60);
231 I(tb.millisec >= 0 && tb.millisec <= 999);
232
233 // years (since 1970)
234 d = YEAR * (tb.year - 1970);
235 // leap days to add (or subtract)
236 int add_leap_days = (tb.year - 1) / 4 - 492;
237 add_leap_days -= (tb.year - 1) / 100 - 19;
238 add_leap_days += (tb.year - 1) / 400 - 4;
239 d += add_leap_days * DAY;
240
241 // months
242 for (int m = 1; m < tb.month; ++m)
243 {
244 d += DAYS_PER_MONTH[m-1] * DAY;
245 if (m == 2 && is_leap_year(tb.year))
246 d += DAY;
247 }
248
249 // days within month, and so on
250 d += (tb.day - 1) * DAY;
251 d += tb.hour * HOUR;
252 d += tb.min * MIN;
253 d += tb.sec * SEC;
254
255 return MILLISEC(d) + tb.millisec;
256}
257
258// In a few places we need to know the offset between the Unix epoch and the
259// system epoch.
260static s64
261get_epoch_offset()
262{
263 static s64 epoch_offset;
264 static bool know_epoch_offset = false;
265 broken_down_time our_t;
266
267 if (know_epoch_offset)
268 return epoch_offset;
269
270 time_t epoch = 0;
271 tm t = *std::gmtime(&epoch);
272
273 our_t.millisec = 0;
274 our_t.sec = t.tm_sec;
275 our_t.min = t.tm_min;
276 our_t.hour = t.tm_hour;
277 our_t.day = t.tm_mday;
278 our_t.month = t.tm_mon + 1;
279 our_t.year = t.tm_year + 1900;
280
281 epoch_offset = our_timegm(our_t);
282
283 know_epoch_offset = true;
284 return epoch_offset;
285}
286
287
288//
289// date_t methods
290//
291bool
292date_t::valid() const
293{
294 return valid_ms_count(d);
295}
296
297// initialize to an invalid date
298date_t::date_t()
299 : d(INVALID)
300{
301 I(!valid());
302}
303
304date_t::date_t(s64 d)
305 : d(d)
306{
307 // When initialized from a millisecods since Unix epoch value, we require
308 // it to be in a valid range. Use the constructor without any argument to
309 // generate an invalid date.
310 I(valid());
311}
312
313date_t::date_t(int year, int month, int day,
314 int hour, int min, int sec, int millisec)
315{
316 broken_down_time t;
317 t.millisec = millisec;
318 t.sec = sec;
319 t.min = min;
320 t.hour = hour;
321 t.day = day;
322 t.month = month;
323 t.year = year;
324
325 d = our_timegm(t);
326 I(valid());
327}
328
329// WARNING: do not log anything within this function; since this is used in
330// user_interface::output_prefix() this might lead to an indefinite loop!
331date_t
332date_t::now()
333{
334 time_t t = std::time(0);
335 s64 tu = MILLISEC(t) + get_epoch_offset();
336 E(valid_ms_count(tu), origin::system,
337 F("current date '%s' is outside usable range\n"
338 "(your system clock may not be set correctly)")
339 % std::ctime(&t));
340 return date_t(tu);
341}
342
343string
344date_t::as_iso_8601_extended() const
345{
346 broken_down_time tb;
347 I(valid());
348 our_gmtime(d, tb);
349 return (FL("%04u-%02u-%02uT%02u:%02u:%02u")
350 % tb.year % tb.month % tb.day
351 % tb.hour % tb.min % tb.sec).str();
352}
353
354ostream &
355operator<< (ostream & o, date_t const & d)
356{
357 return o << d.as_iso_8601_extended();
358}
359
360template <> void
361dump(date_t const & d, string & s)
362{
363 s = d.as_iso_8601_extended();
364}
365
366string
367date_t::as_formatted_localtime(string const & fmt) const
368{
369 L(FL("formatting date '%s' with format '%s'") % *this % fmt);
370
371 // note that the time_t value here may underflow or overflow if our date
372 // is outside of the representable range. for 32 bit time_t's the earliest
373 // representable time is 1901-12-13 20:45:52 UTC and the latest
374 // representable time is 2038-01-19 03:14:07 UTC. assert that the value is
375 // within range for the current time_t type so that localtime doesn't
376 // produce a bad result.
377
378 s64 seconds = d/1000 - get_epoch_offset();
379
380 L(FL("%s seconds UTC since unix epoch") % seconds);
381
382 E(seconds >= numeric_limits<time_t>::min(), origin::user,
383 F("date '%s' is out of range and cannot be formatted")
384 % as_iso_8601_extended());
385
386 E(seconds <= numeric_limits<time_t>::max(), origin::user,
387 F("date '%s' is out of range and cannot be formatted")
388 % as_iso_8601_extended());
389
390 time_t t(seconds); // seconds since unix epoch in UTC
391 tm tb(*localtime(&t)); // converted to local timezone values
392
393 L(FL("localtime %4s/%02s/%02s %02s:%02s:%02s WD %s YD %s DST %d")
394 % (tb.tm_year + 1900)
395 % (tb.tm_mon + 1)
396 % tb.tm_mday
397 % tb.tm_hour
398 % tb.tm_min
399 % tb.tm_sec
400 % tb.tm_wday
401 % tb.tm_yday
402 % tb.tm_isdst);
403
404 char buf[128];
405
406 // Poison the buffer so we can tell whether strftime() produced
407 // no output at all.
408 buf[0] = '#';
409
410 size_t wrote = strftime(buf, sizeof buf, fmt.c_str(), &tb);
411
412 if (wrote > 0)
413 {
414 string formatted(buf);
415 L(FL("formatted date '%s'") % formatted);
416 return formatted; // yay, it worked
417 }
418
419 if (wrote == 0 && buf[0] == '\0') // no output
420 {
421 static bool warned = false;
422 if (!warned)
423 {
424 warned = true;
425 W(F("time format specification '%s' produces no output") % fmt);
426 }
427 return string();
428 }
429
430 E(false, origin::user,
431 F("date '%s' is too long when formatted using '%s'"
432 " (the result must fit in %d characters)")
433 % (sizeof buf - 1));
434}
435
436date_t
437date_t::from_formatted_localtime(string const & s, string const & fmt)
438{
439 tm tb;
440 memset(&tb, 0, sizeof(tb));
441
442 L(FL("parsing date '%s' with format '%s'") % s % fmt);
443
444 // get local timezone values
445 parse_date(s, fmt, &tb);
446
447 // strptime does *not* set the tm_isdst field in the broken down time
448 // struct. setting it to -1 is apparently the way to tell mktime to
449 // determine whether DST is in effect or not.
450
451 tb.tm_isdst = -1;
452
453 L(FL("localtime %4s/%02s/%02s %02s:%02s:%02s WD %s YD %s DST %d")
454 % (tb.tm_year + 1900)
455 % (tb.tm_mon + 1)
456 % tb.tm_mday
457 % tb.tm_hour
458 % tb.tm_min
459 % tb.tm_sec
460 % tb.tm_wday
461 % tb.tm_yday
462 % tb.tm_isdst);
463
464 // note that the time_t value here may underflow or overflow if our date
465 // is outside of the representable range. for 32 bit time_t's the earliest
466 // representable time is 1901-12-13 20:45:52 UTC and the latest
467 // representable time is 2038-01-19 03:14:07 UTC. mktime seems to detect
468 // this and return -1 for values it cannot handle, which strptime will
469 // happily produce.
470
471 time_t t = mktime(&tb); // convert to seconds since unix epoch in UTC
472
473 L(FL("%s seconds UTC since unix epoch") % t);
474
475 // mktime may return a time_t that has the value -1 to indicate an error.
476 // however this is also the valid date 1969-12-31 23:59:59. so we ignore this
477 // error indication and convert the resulting time_t back to a struct tm
478 // for comparison with the input to mktime to detect out of range errors.
479
480 tm check(*localtime(&t)); // back to local timezone values
481
482 E(tb.tm_sec == check.tm_sec &&
483 tb.tm_min == check.tm_min &&
484 tb.tm_hour == check.tm_hour &&
485 tb.tm_mday == check.tm_mday &&
486 tb.tm_mon == check.tm_mon &&
487 tb.tm_year == check.tm_year &&
488 tb.tm_wday == check.tm_wday &&
489 tb.tm_yday == check.tm_yday &&
490 tb.tm_isdst == check.tm_isdst,
491 origin::user,
492 F("date '%s' is out of range and cannot be parsed")
493 % s);
494
495 date_t date(MILLISEC(t) + get_epoch_offset());
496
497 L(FL("parsed date '%s'") % date);
498
499 return date;
500}
501
502s64
503date_t::as_millisecs_since_unix_epoch() const
504{
505 return d;
506}
507
508// We might want to consider teaching this routine more time formats.
509// gnulib has a rather nice date parser, except that it requires Bison
510// (not even just yacc).
511
512date_t::date_t(string const & s)
513{
514 try
515 {
516 size_t i = s.size() - 1; // last character of the array
517
518 // check the first character which is not a digit
519 while (s.at(i) >= '0' && s.at(i) <= '9')
520 i--;
521
522 // ignore fractional seconds, if present, or go back to the end of the
523 // date string to parse the digits for seconds.
524 if (s.at(i) == '.')
525 i--;
526 else
527 i = s.size() - 1;
528
529 // seconds
530 u8 sec;
531 E(s.at(i) >= '0' && s.at(i) <= '9'
532 && s.at(i-1) >= '0' && s.at(i-1) <= '5', origin::user,
533 F("unrecognized date (monotone only understands ISO 8601 format)"));
534 sec = (s.at(i-1) - '0')*10 + (s.at(i) - '0');
535 i -= 2;
536 E(sec <= 60, origin::user,
537 F("seconds out of range"));
538
539 // optional colon
540 if (s.at(i) == ':')
541 i--;
542
543 // minutes
544 u8 min;
545 E(s.at(i) >= '0' && s.at(i) <= '9'
546 && s.at(i-1) >= '0' && s.at(i-1) <= '5', origin::user,
547 F("unrecognized date (monotone only understands ISO 8601 format)"));
548 min = (s.at(i-1) - '0')*10 + (s.at(i) - '0');
549 i -= 2;
550 E(min < 60, origin::user,
551 F("minutes out of range"));
552
553 // optional colon
554 if (s.at(i) == ':')
555 i--;
556
557 // hours
558 u8 hour;
559 E((s.at(i-1) >= '0' && s.at(i-1) <= '1'
560 && s.at(i) >= '0' && s.at(i) <= '9')
561 || (s.at(i-1) == '2' && s.at(i) >= '0' && s.at(i) <= '3'), origin::user,
562 F("unrecognized date (monotone only understands ISO 8601 format)"));
563 hour = (s.at(i-1) - '0')*10 + (s.at(i) - '0');
564 i -= 2;
565 E(hour < 24, origin::user,
566 F("hour out of range"));
567
568 // We accept 'T' as well as spaces between the date and the time
569 E(s.at(i) == 'T' || s.at(i) == ' ', origin::user,
570 F("unrecognized date (monotone only understands ISO 8601 format)"));
571 i--;
572
573 // day
574 u8 day;
575 E(s.at(i-1) >= '0' && s.at(i-1) <= '3'
576 && s.at(i) >= '0' && s.at(i) <= '9', origin::user,
577 F("unrecognized date (monotone only understands ISO 8601 format)"));
578 day = (s.at(i-1) - '0')*10 + (s.at(i) - '0');
579 i -= 2;
580
581 // optional dash
582 if (s.at(i) == '-')
583 i--;
584
585 // month
586 u8 month;
587 E(s.at(i-1) >= '0' && s.at(i-1) <= '1'
588 && s.at(i) >= '0' && s.at(i) <= '9', origin::user,
589 F("unrecognized date (monotone only understands ISO 8601 format)"));
590 month = (s.at(i-1) - '0')*10 + (s.at(i) - '0');
591 E(month >= 1 && month <= 12, origin::user,
592 F("month out of range in '%s'") % s);
593 i -= 2;
594
595 // optional dash
596 if (s.at(i) == '-')
597 i--;
598
599 // year
600 E(i >= 3, origin::user,
601 F("unrecognized date (monotone only understands ISO 8601 format)"));
602
603 // this counts down through zero and stops when it wraps around
604 // (size_t being unsigned)
605 u32 year = 0;
606 u32 digit = 1;
607 while (i < s.size())
608 {
609 E(s.at(i) >= '0' && s.at(i) <= '9', origin::user,
610 F("unrecognized date (monotone only understands ISO 8601 format)"));
611 year += (s.at(i) - '0')*digit;
612 i--;
613 digit *= 10;
614 }
615
616 E(year >= 1, origin::user,
617 F("date too early (monotone only goes back to 0001-01-01T00:00:00)"));
618 E(year <= 292278994, origin::user,
619 F("date too late (monotone only goes forward to year 292,278,993)"));
620
621 u8 mdays;
622 if (month == 2 && is_leap_year(year))
623 mdays = DAYS_PER_MONTH[month-1] + 1;
624 else
625 mdays = DAYS_PER_MONTH[month-1];
626
627 E(day >= 1 && day <= mdays, origin::user,
628 F("day out of range for its month in '%s'") % s);
629
630 broken_down_time t;
631 t.millisec = 0;
632 t.sec = sec;
633 t.min = min;
634 t.hour = hour;
635 t.day = day;
636 t.month = month;
637 t.year = year;
638
639 d = our_timegm(t);
640 }
641 catch (std::out_of_range)
642 {
643 E(false, origin::user,
644 F("unrecognized date (monotone only understands ISO 8601 format)"));
645 }
646}
647
648date_t &
649date_t::operator +=(s64 const other)
650{
651 // only operate on vaild dates, prevent turning an invalid
652 // date into a valid one.
653 I(valid());
654
655 d += other;
656
657 I(valid());
658
659 return *this;
660}
661
662date_t &
663date_t::operator -=(s64 const other)
664{
665 // simply use the addition operator with inversed sign
666 return operator+=(-other);
667}
668
669date_t
670date_t::operator +(s64 const other) const
671{
672 date_t result(d);
673 result += other;
674 return result;
675}
676
677date_t
678date_t::operator -(s64 const other) const
679{
680 date_t result(d);
681 result += -other;
682 return result;
683}
684
685s64
686date_t::operator -(date_t const & other) const
687{
688 return d - other.d;
689}
690
691
692// Local Variables:
693// mode: C++
694// fill-column: 76
695// c-file-style: "gnu"
696// indent-tabs-mode: nil
697// End:
698// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status