monotone

monotone Mtn Source Tree

Root/dates.cc

1// Copyright (C) 2007 Zack Weinberg <zackw@panix.com>
2//
3// This program is made available under the GNU GPL version 2.0 or
4// greater. See the accompanying file COPYING for details.
5//
6// This program is distributed WITHOUT ANY WARRANTY; without even the
7// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
8// PURPOSE.
9
10#include "base.hh"
11#include "dates.hh"
12
13#include <ctime>
14#include <climits>
15
16using std::string;
17
18// Writing a 64-bit constant is tricky. We cannot use the macros that
19// <stdint.h> provides in C99 (UINT64_C, or even UINT64_MAX) because those
20// macros are not in C++'s version of <stdint.h>. std::numeric_limits<u64>
21// cannot be used directly, so we have to resort to #ifdef chains on the old
22// skool C limits macros. BOOST_STATIC_ASSERT is defined in a way that
23// doesn't let us use std::numeric_limits<u64>::max(), so we have to
24// postpone checking it until runtime (date_t::from_unix_epoch), bleah.
25// However, the check will be optimized out, and the unit tests exercise it.
26#if defined ULONG_MAX && ULONG_MAX > UINT_MAX
27 #define PROBABLE_U64_MAX ULONG_MAX
28 #define u64_C(x) x##UL
29#elif defined ULLONG_MAX && ULLONG_MAX > UINT_MAX
30 #define PROBABLE_U64_MAX ULLONG_MAX
31 #define u64_C(x) x##ULL
32#elif defined ULONG_LONG_MAX && ULONG_LONG_MAX > UINT_MAX
33 #define PROBABLE_U64_MAX ULONG_LONG_MAX
34 #define u64_C(x) x##ULL
35#else
36 #error "How do I write a constant of type u64?"
37#endif
38
39const string &
40date_t::as_iso_8601_extended() const
41{
42 I(this->valid());
43 return d;
44}
45
46std::ostream &
47operator<< (std::ostream & o, date_t const & d)
48{
49 return o << d.as_iso_8601_extended();
50}
51
52template <> void
53dump(date_t const & d, std::string & s)
54{
55 s = d.as_iso_8601_extended();
56}
57
58date_t
59date_t::now()
60{
61 using std::time_t;
62 using std::time;
63 using std::tm;
64 using std::gmtime;
65 using std::strftime;
66
67 time_t t = time(0);
68 struct tm b = *gmtime(&t);
69
70 // in CE 10000, you will need to increase the size of 'buf'.
71 I(b.tm_year <= 9999);
72
73 char buf[20];
74 strftime(buf, sizeof buf, "%Y-%m-%dT%H:%M:%S", &b);
75 return date_t(string(buf));
76}
77
78// The Unix epoch is 1970-01-01T00:00:00 (in UTC). As we cannot safely
79// assume that the system's epoch is the Unix epoch, we implement the
80// conversion to broken-down time by hand instead of relying on gmtime().
81// The algorithm below has been tested on one value from every day in the
82// range [1970-01-01T00:00:00, 36812-02-20T00:36:16) -- that is, [0, 2**40).
83//
84// Unix time_t values are a linear count of seconds since the epoch,
85// and should be interpreted according to the Gregorian calendar:
86//
87// - There are 60 seconds in a minute, 3600 seconds in an hour,
88// 86400 seconds in a day.
89// - Years not divisible by 4 have 365 days, or 31536000 seconds.
90// - Years divisible by 4 have 366 days, or 31622400 seconds, except ...
91// - Years divisible by 100 have only 365 days, except ...
92// - Years divisible by 400 have 366 days.
93//
94// The last two rules are the Gregorian correction to the Julian calendar.
95// We make no attempt to handle leap seconds.
96
97unsigned int const MIN = 60;
98unsigned int const HOUR = MIN * 60;
99unsigned int const DAY = HOUR * 24;
100unsigned int const YEAR = DAY * 365;
101unsigned int const LEAP = DAY * 366;
102
103unsigned char const MONTHS[] = {
104 31, // jan
105 28, // feb (non-leap)
106 31, // mar
107 30, // apr
108 31, // may
109 30, // jun
110 31, // jul
111 31, // aug
112 30, // sep
113 31, // oct
114 30, // nov
115 31, // dec
116};
117
118
119inline bool
120is_leap_year(unsigned int year)
121{
122 return (year % 4 == 0
123 && (year % 100 != 0 || year % 400 == 0));
124}
125inline u32
126secs_in_year(unsigned int year)
127{
128 return is_leap_year(year) ? LEAP : YEAR;
129}
130
131date_t
132date_t::from_unix_epoch(u64 t)
133{
134 // these types hint to the compiler that narrowing divides are safe
135 u64 yearbeg;
136 u32 year;
137 u32 month;
138 u32 day;
139 u32 secofday;
140 u16 hour;
141 u16 secofhour;
142 u8 min;
143 u8 sec;
144
145 // validate our assumptions about which basic type is u64 (see above).
146 I(PROBABLE_U64_MAX == std::numeric_limits<u64>::max());
147
148 // time_t values after this point will overflow a signed 32-bit year
149 // counter. 'year' above is unsigned, but the system's struct tm almost
150 // certainly uses a signed tm_year; it is best to be consistent.
151 I(t <= u64_C(67767976233532799));
152
153 // There are 31556952 seconds (365d 5h 43m 12s) in the average Gregorian
154 // year. This will therefore approximate the correct year (minus 1970).
155 // It may be off in either direction, but by no more than one year
156 // (empirically tested for every year from 1970 to 2**32 - 1).
157 year = t/31556952;
158
159 // Given the above approximation, recalculate the _exact_ number of
160 // seconds to the beginning of that year. For this to work correctly
161 // (i.e. for the year/4, year/100, year/400 terms to increment exactly
162 // when they ought to) it is necessary to count years from 1601 (as if the
163 // Gregorian calendar had been in effect at that time) and then correct
164 // the final number of seconds back to the 1970 epoch.
165 year += 369;
166
167 yearbeg = (widen<u64,u32>(year)*365 + year/4 - year/100 + year/400)*DAY;
168 yearbeg -= (widen<u64,u32>(369)*365 + 369/4 - 369/100 + 369/400)*DAY;
169
170 // *now* we want year to have its true value.
171 year += 1601;
172
173 // Linear search for the range of seconds that really contains t.
174 // At most one of these loops should iterate, and only once.
175 while (yearbeg > t)
176 yearbeg -= secs_in_year(--year);
177 while (yearbeg + secs_in_year(year) <= t)
178 yearbeg += secs_in_year(year++);
179
180 t -= yearbeg;
181
182 // <yakko> Now, the months digit!
183 month = 0;
184 for (;;)
185 {
186 unsigned int this_month = MONTHS[month] * DAY;
187 if (month == 1 && is_leap_year(year))
188this_month += DAY;
189 if (t < this_month)
190break;
191
192 t -= this_month;
193 month++;
194 L(FL("from_unix_epoch: month >= %u, t now %llu") % month % t);
195 I(month < 12);
196 }
197
198 // the rest is straightforward.
199 day = t / DAY;
200 secofday = t % DAY;
201
202 hour = secofday / HOUR;
203 secofhour = secofday % HOUR;
204
205 min = secofhour / MIN;
206 sec = secofhour % MIN;
207
208 // the widen<>s here are necessary because boost::format *ignores the
209 // format specification* and prints u8s as characters.
210 return date_t((FL("%u-%02u-%02uT%02u:%02u:%02u")
211 % year % (month + 1) % (day + 1)
212 % hour % widen<u32,u8>(min) % widen<u32,u8>(sec)).str());
213}
214
215// We might want to consider teaching this routine more time formats.
216// gnulib has a rather nice date parser, except that it requires Bison
217// (not even just yacc).
218
219date_t
220date_t::from_string(string const & s)
221{
222 try
223 {
224 string d = s;
225 size_t i = d.size() - 1; // last character of the array
226
227 // seconds
228 N(d.at(i) >= '0' && d.at(i) <= '9'
229 && d.at(i-1) >= '0' && d.at(i-1) <= '5',
230 F("unrecognized date (monotone only understands ISO 8601 format)"));
231 i -= 2;
232
233 // optional colon
234 if (d.at(i) == ':')
235 i--;
236 else
237 d.insert(i+1, 1, ':');
238
239 // minutes
240 N(d.at(i) >= '0' && d.at(i) <= '9'
241 && d.at(i-1) >= '0' && d.at(i-1) <= '5',
242 F("unrecognized date (monotone only understands ISO 8601 format)"));
243 i -= 2;
244
245 // optional colon
246 if (d.at(i) == ':')
247 i--;
248 else
249 d.insert(i+1, 1, ':');
250
251 // hours
252 N((d.at(i-1) >= '0' && d.at(i-1) <= '1'
253 && d.at(i) >= '0' && d.at(i) <= '9')
254 || (d.at(i-1) == '2' && d.at(i) >= '0' && d.at(i) <= '3'),
255 F("unrecognized date (monotone only understands ISO 8601 format)"));
256 i -= 2;
257
258 // 'T' is required at this point; we also accept a space
259 N(d.at(i) == 'T' || d.at(i) == ' ',
260 F("unrecognized date (monotone only understands ISO 8601 format)"));
261
262 if (d.at(i) == ' ')
263 d.at(i) = 'T';
264 i--;
265
266 // day
267 u8 day;
268 N(d.at(i-1) >= '0' && d.at(i-1) <= '3'
269 && d.at(i) >= '0' && d.at(i) <= '9',
270 F("unrecognized date (monotone only understands ISO 8601 format)"));
271 day = (d.at(i-1) - '0')*10 + (d.at(i) - '0');
272 i -= 2;
273
274 // optional dash
275 if (d.at(i) == '-')
276 i--;
277 else
278 d.insert(i+1, 1, '-');
279
280 // month
281 u8 month;
282 N(d.at(i-1) >= '0' && d.at(i-1) <= '1'
283 && d.at(i) >= '0' && d.at(i) <= '9',
284 F("unrecognized date (monotone only understands ISO 8601 format)"));
285 month = (d.at(i-1) - '0')*10 + (d.at(i) - '0');
286 N(month >= 1 && month <= 12,
287 F("month out of range in '%s'") % d);
288 i -= 2;
289
290 // optional dash
291 if (d.at(i) == '-')
292 i--;
293 else
294 d.insert(i+1, 1, '-');
295
296 // year
297 N(i >= 3,
298 F("unrecognized date (monotone only understands ISO 8601 format)"));
299
300 // this counts down through zero and stops when it wraps around
301 // (size_t being unsigned)
302 u32 year = 0;
303 u32 digit = 1;
304 while (i < d.size())
305 {
306 N(d.at(i) >= '0' && d.at(i) <= '9',
307 F("unrecognized date (monotone only understands ISO 8601 format)"));
308 year += (d.at(i) - '0')*digit;
309 i--;
310 digit *= 10;
311 }
312
313 N(year >= 1970,
314 F("date too early (monotone only goes back to 1970-01-01T00:00:00)"));
315
316 u8 mdays;
317 if (month == 2 && is_leap_year(year))
318 mdays = MONTHS[month-1] + 1;
319 else
320 mdays = MONTHS[month-1];
321
322 N(day >= 1 && day <= mdays,
323 F("day out of range for its month in '%s'") % d);
324
325 return date_t(d);
326 }
327 catch (std::out_of_range)
328 {
329 N(false,
330 F("unrecognized date (monotone only understands ISO 8601 format)"));
331 }
332}
333
334#ifdef BUILD_UNIT_TESTS
335#include "unit_tests.hh"
336
337UNIT_TEST(date, from_string)
338{
339#define OK(x,y) UNIT_TEST_CHECK(date_t::from_string(x).as_iso_8601_extended() \
340 == (y))
341#define NO(x) UNIT_TEST_CHECK_THROW(date_t::from_string(x), informative_failure)
342
343 // canonical format
344 OK("2007-03-01T18:41:13", "2007-03-01T18:41:13");
345 // squashed format
346 OK("20070301T184113", "2007-03-01T18:41:13");
347 // space between date and time
348 OK("2007-03-01 18:41:13", "2007-03-01T18:41:13");
349 // squashed, space
350 OK("20070301 184113", "2007-03-01T18:41:13");
351 // more than four digits in the year
352 OK("120070301T184113", "12007-03-01T18:41:13");
353
354 // inappropriate character at every possible position
355 NO("x007-03-01T18:41:13");
356 NO("2x07-03-01T18:41:13");
357 NO("20x7-03-01T18:41:13");
358 NO("200x-03-01T18:41:13");
359 NO("2007x03-01T18:41:13");
360 NO("2007-x3-01T18:41:13");
361 NO("2007-0x-01T18:41:13");
362 NO("2007-03x01T18:41:13");
363 NO("2007-03-x1T18:41:13");
364 NO("2007-03-0xT18:41:13");
365 NO("2007-03-01x18:41:13");
366 NO("2007-03-01Tx8:41:13");
367 NO("2007-03-01T1x:41:13");
368 NO("2007-03-01T18x41:13");
369 NO("2007-03-01T18:x1:13");
370 NO("2007-03-01T18:4x:13");
371 NO("2007-03-01T18:41x13");
372 NO("2007-03-01T18:41:x3");
373 NO("2007-03-01T18:41:1x");
374
375 NO("x0070301T184113");
376 NO("2x070301T184113");
377 NO("20x70301T184113");
378 NO("200x0301T184113");
379 NO("2007x301T184113");
380 NO("20070x01T184113");
381 NO("200703x1T184113");
382 NO("2007030xT184113");
383 NO("20070301x184113");
384 NO("20070301Tx84113");
385 NO("20070301T1x4113");
386 NO("20070301T18x113");
387 NO("20070301T184x13");
388 NO("20070301T1841x3");
389 NO("20070301T18411x");
390
391 // two digit years are not accepted
392 NO("07-03-01T18:41:13");
393
394 // components out of range
395 NO("1969-03-01T18:41:13");
396
397 NO("2007-00-01T18:41:13");
398 NO("2007-13-01T18:41:13");
399
400 NO("2007-01-00T18:41:13");
401 NO("2007-01-32T18:41:13");
402 NO("2007-02-29T18:41:13");
403 NO("2007-03-32T18:41:13");
404 NO("2007-04-31T18:41:13");
405 NO("2007-05-32T18:41:13");
406 NO("2007-06-31T18:41:13");
407 NO("2007-07-32T18:41:13");
408 NO("2007-08-32T18:41:13");
409 NO("2007-09-31T18:41:13");
410 NO("2007-10-32T18:41:13");
411 NO("2007-11-31T18:41:13");
412 NO("2007-03-32T18:41:13");
413
414 NO("2007-03-01T24:41:13");
415 NO("2007-03-01T18:60:13");
416 NO("2007-03-01T18:41:60");
417
418 // leap year February
419 OK("2008-02-29T18:41:13", "2008-02-29T18:41:13");
420 NO("2008-02-30T18:41:13");
421
422 // maybe we should support these, but we don't
423 NO("2007-03-01");
424 NO("18:41");
425 NO("18:41:13");
426 NO("Thu Mar 1 18:41:13 PST 2007");
427 NO("Thu, 01 Mar 2007 18:47:22");
428 NO("Thu, 01 Mar 2007 18:47:22 -0800");
429 NO("torsdag, mars 01, 2007, 18.50.10");
430 // et cetera
431#undef OK
432#undef NO
433}
434
435UNIT_TEST(date, from_unix_epoch)
436{
437#define OK(x,y) do { \
438 string s_ = date_t::from_unix_epoch(x).as_iso_8601_extended(); \
439 L(FL("from_unix_epoch: %lu -> %s") % (x) % s_); \
440 UNIT_TEST_CHECK(s_ == (y)); \
441 } while (0)
442
443 // every month boundary in 1970
444 OK(0, "1970-01-01T00:00:00");
445 OK(2678399, "1970-01-31T23:59:59");
446 OK(2678400, "1970-02-01T00:00:00");
447 OK(5097599, "1970-02-28T23:59:59");
448 OK(5097600, "1970-03-01T00:00:00");
449 OK(7775999, "1970-03-31T23:59:59");
450 OK(7776000, "1970-04-01T00:00:00");
451 OK(10367999, "1970-04-30T23:59:59");
452 OK(10368000, "1970-05-01T00:00:00");
453 OK(13046399, "1970-05-31T23:59:59");
454 OK(13046400, "1970-06-01T00:00:00");
455 OK(15638399, "1970-06-30T23:59:59");
456 OK(15638400, "1970-07-01T00:00:00");
457 OK(18316799, "1970-07-31T23:59:59");
458 OK(18316800, "1970-08-01T00:00:00");
459 OK(20995199, "1970-08-31T23:59:59");
460 OK(20995200, "1970-09-01T00:00:00");
461 OK(23587199, "1970-09-30T23:59:59");
462 OK(23587200, "1970-10-01T00:00:00");
463 OK(26265599, "1970-10-31T23:59:59");
464 OK(26265600, "1970-11-01T00:00:00");
465 OK(28857599, "1970-11-30T23:59:59");
466 OK(28857600, "1970-12-01T00:00:00");
467 OK(31535999, "1970-12-31T23:59:59");
468 OK(31536000, "1971-01-01T00:00:00");
469
470 // every month boundary in 1972 (an ordinary leap year)
471 OK(63071999, "1971-12-31T23:59:59");
472 OK(63072000, "1972-01-01T00:00:00");
473 OK(65750399, "1972-01-31T23:59:59");
474 OK(65750400, "1972-02-01T00:00:00");
475 OK(68255999, "1972-02-29T23:59:59");
476 OK(68256000, "1972-03-01T00:00:00");
477 OK(70934399, "1972-03-31T23:59:59");
478 OK(70934400, "1972-04-01T00:00:00");
479 OK(73526399, "1972-04-30T23:59:59");
480 OK(73526400, "1972-05-01T00:00:00");
481 OK(76204799, "1972-05-31T23:59:59");
482 OK(76204800, "1972-06-01T00:00:00");
483 OK(78796799, "1972-06-30T23:59:59");
484 OK(78796800, "1972-07-01T00:00:00");
485 OK(81475199, "1972-07-31T23:59:59");
486 OK(81475200, "1972-08-01T00:00:00");
487 OK(84153599, "1972-08-31T23:59:59");
488 OK(84153600, "1972-09-01T00:00:00");
489 OK(86745599, "1972-09-30T23:59:59");
490 OK(86745600, "1972-10-01T00:00:00");
491 OK(89423999, "1972-10-31T23:59:59");
492 OK(89424000, "1972-11-01T00:00:00");
493 OK(92015999, "1972-11-30T23:59:59");
494 OK(92016000, "1972-12-01T00:00:00");
495 OK(94694399, "1972-12-31T23:59:59");
496 OK(94694400, "1973-01-01T00:00:00");
497
498 // every month boundary in 2000 (a leap year per rule 5)
499 OK(946684799, "1999-12-31T23:59:59");
500 OK(946684800, "2000-01-01T00:00:00");
501 OK(949363199, "2000-01-31T23:59:59");
502 OK(949363200, "2000-02-01T00:00:00");
503 OK(951868799, "2000-02-29T23:59:59");
504 OK(951868800, "2000-03-01T00:00:00");
505 OK(954547199, "2000-03-31T23:59:59");
506 OK(954547200, "2000-04-01T00:00:00");
507 OK(957139199, "2000-04-30T23:59:59");
508 OK(957139200, "2000-05-01T00:00:00");
509 OK(959817599, "2000-05-31T23:59:59");
510 OK(959817600, "2000-06-01T00:00:00");
511 OK(962409599, "2000-06-30T23:59:59");
512 OK(962409600, "2000-07-01T00:00:00");
513 OK(965087999, "2000-07-31T23:59:59");
514 OK(965088000, "2000-08-01T00:00:00");
515 OK(967766399, "2000-08-31T23:59:59");
516 OK(967766400, "2000-09-01T00:00:00");
517 OK(970358399, "2000-09-30T23:59:59");
518 OK(970358400, "2000-10-01T00:00:00");
519 OK(973036799, "2000-10-31T23:59:59");
520 OK(973036800, "2000-11-01T00:00:00");
521 OK(975628799, "2000-11-30T23:59:59");
522 OK(975628800, "2000-12-01T00:00:00");
523 OK(978307199, "2000-12-31T23:59:59");
524 OK(978307200, "2001-01-01T00:00:00");
525
526 // every month boundary in 2100 (a normal year per rule 4)
527 OK(u64_C(4102444800), "2100-01-01T00:00:00");
528 OK(u64_C(4105123199), "2100-01-31T23:59:59");
529 OK(u64_C(4105123200), "2100-02-01T00:00:00");
530 OK(u64_C(4107542399), "2100-02-28T23:59:59");
531 OK(u64_C(4107542400), "2100-03-01T00:00:00");
532 OK(u64_C(4110220799), "2100-03-31T23:59:59");
533 OK(u64_C(4110220800), "2100-04-01T00:00:00");
534 OK(u64_C(4112812799), "2100-04-30T23:59:59");
535 OK(u64_C(4112812800), "2100-05-01T00:00:00");
536 OK(u64_C(4115491199), "2100-05-31T23:59:59");
537 OK(u64_C(4115491200), "2100-06-01T00:00:00");
538 OK(u64_C(4118083199), "2100-06-30T23:59:59");
539 OK(u64_C(4118083200), "2100-07-01T00:00:00");
540 OK(u64_C(4120761599), "2100-07-31T23:59:59");
541 OK(u64_C(4120761600), "2100-08-01T00:00:00");
542 OK(u64_C(4123439999), "2100-08-31T23:59:59");
543 OK(u64_C(4123440000), "2100-09-01T00:00:00");
544 OK(u64_C(4126031999), "2100-09-30T23:59:59");
545 OK(u64_C(4126032000), "2100-10-01T00:00:00");
546 OK(u64_C(4128710399), "2100-10-31T23:59:59");
547 OK(u64_C(4128710400), "2100-11-01T00:00:00");
548 OK(u64_C(4131302399), "2100-11-30T23:59:59");
549 OK(u64_C(4131302400), "2100-12-01T00:00:00");
550 OK(u64_C(4133980799), "2100-12-31T23:59:59");
551
552 // limit of a (signed) 32-bit year counter
553 OK(u64_C(67767976233532799), "2147483647-12-31T23:59:59");
554 UNIT_TEST_CHECK_THROW(date_t::from_unix_epoch(u64_C(67768036191676800)),
555 std::logic_error);
556
557#undef OK
558}
559
560#endif
561
562// Local Variables:
563// mode: C++
564// fill-column: 76
565// c-file-style: "gnu"
566// indent-tabs-mode: nil
567// End:
568// vim: et:sw=2:sts=2:ts=2:cino=>2s,{s,\:s,+s,t0,g0,^-2,e-2,n-2,p2s,(0,=s:

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status