Ruby  2.7.2p137(2020-10-01revision5445e0435260b449decf2ac16f9d09bae3cafe72)
date_strptime.c
Go to the documentation of this file.
1 /*
2  date_strptime.c: Coded by Tadayoshi Funaba 2011,2012
3 */
4 
5 #include "ruby.h"
6 #include "ruby/encoding.h"
7 #include "ruby/re.h"
8 #include <ctype.h>
9 
10 static const char *day_names[] = {
11  "Sunday", "Monday", "Tuesday", "Wednesday",
12  "Thursday", "Friday", "Saturday",
13  "Sun", "Mon", "Tue", "Wed",
14  "Thu", "Fri", "Sat"
15 };
16 
17 static const char *month_names[] = {
18  "January", "February", "March", "April",
19  "May", "June", "July", "August", "September",
20  "October", "November", "December",
21  "Jan", "Feb", "Mar", "Apr", "May", "Jun",
22  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
23 };
24 
25 static const char *merid_names[] = {
26  "am", "pm",
27  "a.m.", "p.m."
28 };
29 
30 static const char *extz_pats[] = {
31  ":z",
32  "::z",
33  ":::z"
34 };
35 
36 #define sizeof_array(o) (sizeof o / sizeof o[0])
37 
38 #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0)
39 #define f_add(x,y) rb_funcall(x, '+', 1, y)
40 #define f_sub(x,y) rb_funcall(x, '-', 1, y)
41 #define f_mul(x,y) rb_funcall(x, '*', 1, y)
42 #define f_div(x,y) rb_funcall(x, '/', 1, y)
43 #define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y)
44 #define f_mod(x,y) rb_funcall(x, '%', 1, y)
45 #define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y)
46 
47 #define f_lt_p(x,y) rb_funcall(x, '<', 1, y)
48 #define f_gt_p(x,y) rb_funcall(x, '>', 1, y)
49 #define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y)
50 #define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y)
51 
52 #define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s)
53 #define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i)
54 #define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i)
55 
56 #define issign(c) ((c) == '-' || (c) == '+')
57 
58 static int
59 num_pattern_p(const char *s)
60 {
61  if (isdigit((unsigned char)*s))
62  return 1;
63  if (*s == '%') {
64  s++;
65  if (*s == 'E' || *s == 'O')
66  s++;
67  if (*s &&
68  (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) ||
69  isdigit((unsigned char)*s)))
70  return 1;
71  }
72  return 0;
73 }
74 
75 #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1])
76 
77 static long
78 read_digits(const char *s, VALUE *n, size_t width)
79 {
80  size_t l;
81 
82  if (!width)
83  return 0;
84 
85  l = 0;
86  while (ISDIGIT(s[l])) {
87  if (++l == width) break;
88  }
89 
90  if (l == 0)
91  return 0;
92 
93  if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) {
94  const char *os = s;
95  long v;
96 
97  v = 0;
98  while ((size_t)(s - os) < l) {
99  v *= 10;
100  v += *s - '0';
101  s++;
102  }
103  if (os == s)
104  return 0;
105  *n = LONG2NUM(v);
106  return l;
107  }
108  else {
109  VALUE vbuf = 0;
110  char *s2 = ALLOCV_N(char, vbuf, l + 1);
111  memcpy(s2, s, l);
112  s2[l] = '\0';
113  *n = rb_cstr_to_inum(s2, 10, 0);
114  ALLOCV_END(vbuf);
115  return l;
116  }
117 }
118 
119 #define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k"")), v)
120 #define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k"")))
121 #define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k"")))
122 
123 #define fail() \
124 do { \
125  set_hash("_fail", Qtrue); \
126  return 0; \
127 } while (0)
128 
129 #define fail_p() (!NIL_P(ref_hash("_fail")))
130 
131 #define READ_DIGITS(n,w) \
132 do { \
133  size_t l; \
134  l = read_digits(&str[si], &n, w); \
135  if (l == 0) \
136  fail(); \
137  si += l; \
138 } while (0)
139 
140 #define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX)
141 
142 static int
143 valid_range_p(VALUE v, int a, int b)
144 {
145  if (FIXNUM_P(v)) {
146  int vi = FIX2INT(v);
147  return !(vi < a || vi > b);
148  }
149  return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b)));
150 }
151 
152 #define recur(fmt) \
153 do { \
154  size_t l; \
155  l = date__strptime_internal(&str[si], slen - si, \
156  fmt, sizeof fmt - 1, hash); \
157  if (fail_p()) \
158  return 0; \
159  si += l; \
160 } while (0)
161 
163 
164 static size_t
165 date__strptime_internal(const char *str, size_t slen,
166  const char *fmt, size_t flen, VALUE hash)
167 {
168  size_t si, fi;
169  int c;
170 
171  si = fi = 0;
172 
173  while (fi < flen) {
174 
175  switch (fmt[fi]) {
176  case '%':
177 
178  again:
179  fi++;
180  c = fmt[fi];
181 
182  switch (c) {
183  case 'E':
184  if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1]))
185  goto again;
186  fi--;
187  goto ordinal;
188  case 'O':
189  if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1]))
190  goto again;
191  fi--;
192  goto ordinal;
193  case ':':
194  {
195  int i;
196 
197  for (i = 0; i < (int)sizeof_array(extz_pats); i++)
198  if (strncmp(extz_pats[i], &fmt[fi],
199  strlen(extz_pats[i])) == 0) {
200  fi += i;
201  goto again;
202  }
203  fail();
204  }
205 
206  case 'A':
207  case 'a':
208  {
209  int i;
210 
211  for (i = 0; i < (int)sizeof_array(day_names); i++) {
212  size_t l = strlen(day_names[i]);
213  if (strncasecmp(day_names[i], &str[si], l) == 0) {
214  si += l;
215  set_hash("wday", INT2FIX(i % 7));
216  goto matched;
217  }
218  }
219  fail();
220  }
221  case 'B':
222  case 'b':
223  case 'h':
224  {
225  int i;
226 
227  for (i = 0; i < (int)sizeof_array(month_names); i++) {
228  size_t l = strlen(month_names[i]);
229  if (strncasecmp(month_names[i], &str[si], l) == 0) {
230  si += l;
231  set_hash("mon", INT2FIX((i % 12) + 1));
232  goto matched;
233  }
234  }
235  fail();
236  }
237 
238  case 'C':
239  {
240  VALUE n;
241 
242  if (NUM_PATTERN_P())
243  READ_DIGITS(n, 2);
244  else
246  set_hash("_cent", n);
247  goto matched;
248  }
249 
250  case 'c':
251  recur("%a %b %e %H:%M:%S %Y");
252  goto matched;
253 
254  case 'D':
255  recur("%m/%d/%y");
256  goto matched;
257 
258  case 'd':
259  case 'e':
260  {
261  VALUE n;
262 
263  if (str[si] == ' ') {
264  si++;
265  READ_DIGITS(n, 1);
266  } else {
267  READ_DIGITS(n, 2);
268  }
269  if (!valid_range_p(n, 1, 31))
270  fail();
271  set_hash("mday", n);
272  goto matched;
273  }
274 
275  case 'F':
276  recur("%Y-%m-%d");
277  goto matched;
278 
279  case 'G':
280  {
281  VALUE n;
282 
283  if (NUM_PATTERN_P())
284  READ_DIGITS(n, 4);
285  else
287  set_hash("cwyear", n);
288  goto matched;
289  }
290 
291  case 'g':
292  {
293  VALUE n;
294 
295  READ_DIGITS(n, 2);
296  if (!valid_range_p(n, 0, 99))
297  fail();
298  set_hash("cwyear",n);
299  if (NIL_P(ref_hash("_cent")))
300  set_hash("_cent",
301  INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
302  goto matched;
303  }
304 
305  case 'H':
306  case 'k':
307  {
308  VALUE n;
309 
310  if (str[si] == ' ') {
311  si++;
312  READ_DIGITS(n, 1);
313  } else {
314  READ_DIGITS(n, 2);
315  }
316  if (!valid_range_p(n, 0, 24))
317  fail();
318  set_hash("hour", n);
319  goto matched;
320  }
321 
322  case 'I':
323  case 'l':
324  {
325  VALUE n;
326 
327  if (str[si] == ' ') {
328  si++;
329  READ_DIGITS(n, 1);
330  } else {
331  READ_DIGITS(n, 2);
332  }
333  if (!valid_range_p(n, 1, 12))
334  fail();
335  set_hash("hour", n);
336  goto matched;
337  }
338 
339  case 'j':
340  {
341  VALUE n;
342 
343  READ_DIGITS(n, 3);
344  if (!valid_range_p(n, 1, 366))
345  fail();
346  set_hash("yday", n);
347  goto matched;
348  }
349 
350  case 'L':
351  case 'N':
352  {
353  VALUE n;
354  int sign = 1;
355  size_t osi;
356 
357  if (issign(str[si])) {
358  if (str[si] == '-')
359  sign = -1;
360  si++;
361  }
362  osi = si;
363  if (NUM_PATTERN_P())
364  READ_DIGITS(n, c == 'L' ? 3 : 9);
365  else
367  if (sign == -1)
368  n = f_negate(n);
369  set_hash("sec_fraction",
371  f_expt(INT2FIX(10),
372  ULONG2NUM(si - osi))));
373  goto matched;
374  }
375 
376  case 'M':
377  {
378  VALUE n;
379 
380  READ_DIGITS(n, 2);
381  if (!valid_range_p(n, 0, 59))
382  fail();
383  set_hash("min", n);
384  goto matched;
385  }
386 
387  case 'm':
388  {
389  VALUE n;
390 
391  READ_DIGITS(n, 2);
392  if (!valid_range_p(n, 1, 12))
393  fail();
394  set_hash("mon", n);
395  goto matched;
396  }
397 
398  case 'n':
399  case 't':
400  recur(" ");
401  goto matched;
402 
403  case 'P':
404  case 'p':
405  {
406  int i;
407 
408  for (i = 0; i < 4; i++) {
409  size_t l = strlen(merid_names[i]);
410  if (strncasecmp(merid_names[i], &str[si], l) == 0) {
411  si += l;
412  set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12));
413  goto matched;
414  }
415  }
416  fail();
417  }
418 
419  case 'Q':
420  {
421  VALUE n;
422  int sign = 1;
423 
424  if (str[si] == '-') {
425  sign = -1;
426  si++;
427  }
429  if (sign == -1)
430  n = f_negate(n);
431  set_hash("seconds",
432  rb_rational_new2(n, INT2FIX(1000)));
433  goto matched;
434  }
435 
436  case 'R':
437  recur("%H:%M");
438  goto matched;
439 
440  case 'r':
441  recur("%I:%M:%S %p");
442  goto matched;
443 
444  case 'S':
445  {
446  VALUE n;
447 
448  READ_DIGITS(n, 2);
449  if (!valid_range_p(n, 0, 60))
450  fail();
451  set_hash("sec", n);
452  goto matched;
453  }
454 
455  case 's':
456  {
457  VALUE n;
458  int sign = 1;
459 
460  if (str[si] == '-') {
461  sign = -1;
462  si++;
463  }
465  if (sign == -1)
466  n = f_negate(n);
467  set_hash("seconds", n);
468  goto matched;
469  }
470 
471  case 'T':
472  recur("%H:%M:%S");
473  goto matched;
474 
475  case 'U':
476  case 'W':
477  {
478  VALUE n;
479 
480  READ_DIGITS(n, 2);
481  if (!valid_range_p(n, 0, 53))
482  fail();
483  set_hash(c == 'U' ? "wnum0" : "wnum1", n);
484  goto matched;
485  }
486 
487  case 'u':
488  {
489  VALUE n;
490 
491  READ_DIGITS(n, 1);
492  if (!valid_range_p(n, 1, 7))
493  fail();
494  set_hash("cwday", n);
495  goto matched;
496  }
497 
498  case 'V':
499  {
500  VALUE n;
501 
502  READ_DIGITS(n, 2);
503  if (!valid_range_p(n, 1, 53))
504  fail();
505  set_hash("cweek", n);
506  goto matched;
507  }
508 
509  case 'v':
510  recur("%e-%b-%Y");
511  goto matched;
512 
513  case 'w':
514  {
515  VALUE n;
516 
517  READ_DIGITS(n, 1);
518  if (!valid_range_p(n, 0, 6))
519  fail();
520  set_hash("wday", n);
521  goto matched;
522  }
523 
524  case 'X':
525  recur("%H:%M:%S");
526  goto matched;
527 
528  case 'x':
529  recur("%m/%d/%y");
530  goto matched;
531 
532  case 'Y':
533  {
534  VALUE n;
535  int sign = 1;
536 
537  if (issign(str[si])) {
538  if (str[si] == '-')
539  sign = -1;
540  si++;
541  }
542  if (NUM_PATTERN_P())
543  READ_DIGITS(n, 4);
544  else
546  if (sign == -1)
547  n = f_negate(n);
548  set_hash("year", n);
549  goto matched;
550  }
551 
552  case 'y':
553  {
554  VALUE n;
555  int sign = 1;
556 
557  READ_DIGITS(n, 2);
558  if (!valid_range_p(n, 0, 99))
559  fail();
560  if (sign == -1)
561  n = f_negate(n);
562  set_hash("year", n);
563  if (NIL_P(ref_hash("_cent")))
564  set_hash("_cent",
565  INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20));
566  goto matched;
567  }
568 
569  case 'Z':
570  case 'z':
571  {
572  static const char pat_source[] =
573  "\\A("
574  "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?"
575  "|(?-i:[[:alpha:].\\s]+)(?:standard|daylight)\\s+time\\b"
576  "|(?-i:[[:alpha:]]+)(?:\\s+dst)?\\b"
577  ")";
578  static VALUE pat = Qnil;
579  VALUE m, b;
580 
581  if (NIL_P(pat)) {
582  pat = rb_reg_new(pat_source, sizeof pat_source - 1,
585  }
586 
587  b = rb_backref_get();
588  rb_match_busy(b);
589  m = f_match(pat, rb_usascii_str_new2(&str[si]));
590 
591  if (!NIL_P(m)) {
592  VALUE s, l, o;
593 
594  s = rb_reg_nth_match(1, m);
595  l = f_end(m, INT2FIX(0));
596  o = date_zone_to_diff(s);
597  si += NUM2LONG(l);
598  set_hash("zone", s);
599  set_hash("offset", o);
600  rb_backref_set(b);
601  goto matched;
602  }
603  rb_backref_set(b);
604  fail();
605  }
606 
607  case '%':
608  if (str[si] != '%')
609  fail();
610  si++;
611  goto matched;
612 
613  case '+':
614  recur("%a %b %e %H:%M:%S %Z %Y");
615  goto matched;
616 
617  default:
618  if (str[si] != '%')
619  fail();
620  si++;
621  if (fi < flen)
622  if (str[si] != fmt[fi])
623  fail();
624  si++;
625  goto matched;
626  }
627  case ' ':
628  case '\t':
629  case '\n':
630  case '\v':
631  case '\f':
632  case '\r':
633  while (isspace((unsigned char)str[si]))
634  si++;
635  fi++;
636  break;
637  default:
638  ordinal:
639  if (str[si] != fmt[fi])
640  fail();
641  si++;
642  fi++;
643  break;
644  matched:
645  fi++;
646  break;
647  }
648  }
649 
650  return si;
651 }
652 
653 VALUE
654 date__strptime(const char *str, size_t slen,
655  const char *fmt, size_t flen, VALUE hash)
656 {
657  size_t si;
658  VALUE cent, merid;
659 
660  si = date__strptime_internal(str, slen, fmt, flen, hash);
661 
662  if (slen > si) {
663  VALUE s;
664 
665  s = rb_usascii_str_new(&str[si], slen - si);
666  set_hash("leftover", s);
667  }
668 
669  if (fail_p())
670  return Qnil;
671 
672  cent = del_hash("_cent");
673  if (!NIL_P(cent)) {
674  VALUE year;
675 
676  year = ref_hash("cwyear");
677  if (!NIL_P(year))
678  set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100))));
679  year = ref_hash("year");
680  if (!NIL_P(year))
681  set_hash("year", f_add(year, f_mul(cent, INT2FIX(100))));
682  }
683 
684  merid = del_hash("_merid");
685  if (!NIL_P(merid)) {
686  VALUE hour;
687 
688  hour = ref_hash("hour");
689  if (!NIL_P(hour)) {
690  hour = f_mod(hour, INT2FIX(12));
691  set_hash("hour", f_add(hour, merid));
692  }
693  }
694 
695  return hash;
696 }
697 
698 /*
699 Local variables:
700 c-file-style: "ruby"
701 End:
702 */
strncmp
int strncmp(const char *, const char *, size_t)
rb_match_busy
void rb_match_busy(VALUE)
Definition: re.c:1287
f_end
#define f_end(o, i)
Definition: date_strptime.c:54
FIX2INT
#define FIX2INT(x)
Definition: ruby.h:717
rb_gc_register_mark_object
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:7066
ONIG_OPTION_IGNORECASE
#define ONIG_OPTION_IGNORECASE
Definition: onigmo.h:451
s2
const char * s2
Definition: rb_mjit_min_header-2.7.2.h:5489
ISDIGIT
#define ISDIGIT(c)
Definition: ruby.h:2312
int
__inline__ int
Definition: rb_mjit_min_header-2.7.2.h:2877
INT2FIX
#define INT2FIX(i)
Definition: ruby.h:263
strchr
char * strchr(char *, char)
re.h
i
uint32_t i
Definition: rb_mjit_min_header-2.7.2.h:5499
NUM2LONG
#define NUM2LONG(x)
Definition: ruby.h:679
VALUE
unsigned long VALUE
Definition: ruby.h:102
encoding.h
ref_hash
#define ref_hash(k)
Definition: date_strptime.c:120
issign
#define issign(c)
Definition: date_strptime.c:56
f_add
#define f_add(x, y)
Definition: date_strptime.c:39
READ_DIGITS_MAX
#define READ_DIGITS_MAX(n)
Definition: date_strptime.c:140
fail_p
#define fail_p()
Definition: date_strptime.c:129
CHAR_BIT
#define CHAR_BIT
Definition: ruby.h:227
rb_usascii_str_new
#define rb_usascii_str_new(str, len)
Definition: rb_mjit_min_header-2.7.2.h:6153
INT2NUM
#define INT2NUM(x)
Definition: ruby.h:1609
rb_cstr_to_inum
VALUE rb_cstr_to_inum(const char *str, int base, int badcheck)
Definition: bignum.c:4012
sizeof_array
#define sizeof_array(o)
Definition: date_strptime.c:36
set_hash
#define set_hash(k, v)
Definition: date_strptime.c:119
rb_reg_new
VALUE rb_reg_new(const char *, long, int)
Definition: re.c:2947
fmt
const VALUE int int int int int int VALUE char * fmt
Definition: rb_mjit_min_header-2.7.2.h:6497
strlen
size_t strlen(const char *)
del_hash
#define del_hash(k)
Definition: date_strptime.c:121
f_gt_p
#define f_gt_p(x, y)
Definition: date_strptime.c:48
LONG2NUM
#define LONG2NUM(x)
Definition: ruby.h:1644
rb_rational_new2
#define rb_rational_new2(x, y)
Definition: intern.h:180
ULONG2NUM
#define ULONG2NUM(x)
Definition: ruby.h:1645
READ_DIGITS
#define READ_DIGITS(n, w)
Definition: date_strptime.c:131
ALLOCV_END
#define ALLOCV_END(v)
Definition: ruby.h:1750
f_lt_p
#define f_lt_p(x, y)
Definition: date_strptime.c:47
ALLOCV_N
#define ALLOCV_N(type, v, n)
Definition: ruby.h:1749
f_match
#define f_match(r, s)
Definition: date_strptime.c:52
ruby.h
date_zone_to_diff
VALUE date_zone_to_diff(VALUE)
Definition: date_parse.c:410
rb_backref_get
VALUE rb_backref_get(void)
Definition: vm.c:1304
FIXNUM_P
#define FIXNUM_P(f)
Definition: ruby.h:396
strncasecmp
int strncasecmp(const char *, const char *, size_t) __attribute__((__pure__))
f_ge_p
#define f_ge_p(x, y)
Definition: date_strptime.c:50
rb_backref_set
void rb_backref_set(VALUE)
Definition: vm.c:1310
f_mod
#define f_mod(x, y)
Definition: date_strptime.c:44
str
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
f_mul
#define f_mul(x, y)
Definition: date_strptime.c:41
memcpy
void * memcpy(void *__restrict, const void *__restrict, size_t)
f_expt
#define f_expt(x, y)
Definition: date_strptime.c:45
NIL_P
#define NIL_P(v)
Definition: ruby.h:482
fail
#define fail()
Definition: date_strptime.c:123
recur
#define recur(fmt)
Definition: date_strptime.c:152
rb_reg_nth_match
VALUE rb_reg_nth_match(int, VALUE)
Definition: re.c:1706
v
int VALUE v
Definition: rb_mjit_min_header-2.7.2.h:12380
rb_usascii_str_new2
#define rb_usascii_str_new2
Definition: intern.h:909
Qnil
#define Qnil
Definition: ruby.h:469
f_negate
#define f_negate(x)
Definition: date_strptime.c:38
NUM_PATTERN_P
#define NUM_PATTERN_P()
Definition: date_strptime.c:75
date__strptime
VALUE date__strptime(const char *str, size_t slen, const char *fmt, size_t flen, VALUE hash)
n
const char size_t n
Definition: rb_mjit_min_header-2.7.2.h:5491