Ruby  2.7.2p137(2020-10-01revision5445e0435260b449decf2ac16f9d09bae3cafe72)
strscan.c
Go to the documentation of this file.
1 /*
2  $Id$
3 
4  Copyright (c) 1999-2006 Minero Aoki
5 
6  This program is free software.
7  You can distribute/modify this program under the terms of
8  the Ruby License. For details, see the file COPYING.
9 */
10 
11 #include "ruby/ruby.h"
12 #include "ruby/re.h"
13 #include "ruby/encoding.h"
14 
15 #ifdef RUBY_EXTCONF_H
16 # include RUBY_EXTCONF_H
17 #endif
18 
19 #ifdef HAVE_ONIG_REGION_MEMSIZE
20 extern size_t onig_region_memsize(const struct re_registers *regs);
21 #endif
22 
23 #include <stdbool.h>
24 
25 #define STRSCAN_VERSION "1.0.3"
26 
27 /* =======================================================================
28  Data Type Definitions
29  ======================================================================= */
30 
31 static VALUE StringScanner;
32 static VALUE ScanError;
33 static ID id_byteslice;
34 
35 struct strscanner
36 {
37  /* multi-purpose flags */
38  unsigned long flags;
39 #define FLAG_MATCHED (1 << 0)
40 
41  /* the string to scan */
43 
44  /* scan pointers */
45  long prev; /* legal only when MATCHED_P(s) */
46  long curr; /* always legal */
47 
48  /* the regexp register; legal only when MATCHED_P(s) */
49  struct re_registers regs;
50 
51  /* regexp used for last scan */
53 
54  /* anchor mode */
56 };
57 
58 #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
59 #define MATCHED(s) (s)->flags |= FLAG_MATCHED
60 #define CLEAR_MATCH_STATUS(s) (s)->flags &= ~FLAG_MATCHED
61 
62 #define S_PBEG(s) (RSTRING_PTR((s)->str))
63 #define S_LEN(s) (RSTRING_LEN((s)->str))
64 #define S_PEND(s) (S_PBEG(s) + S_LEN(s))
65 #define CURPTR(s) (S_PBEG(s) + (s)->curr)
66 #define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
67 
68 #define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str))
69 
70 #define GET_SCANNER(obj,var) do {\
71  (var) = check_strscan(obj);\
72  if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\
73 } while (0)
74 
75 /* =======================================================================
76  Function Prototypes
77  ======================================================================= */
78 
79 static inline long minl _((const long n, const long x));
80 static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
81 static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
82 
83 static struct strscanner *check_strscan _((VALUE obj));
84 static void strscan_mark _((void *p));
85 static void strscan_free _((void *p));
86 static size_t strscan_memsize _((const void *p));
87 static VALUE strscan_s_allocate _((VALUE klass));
88 static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self));
89 static VALUE strscan_init_copy _((VALUE vself, VALUE vorig));
90 
91 static VALUE strscan_s_mustc _((VALUE self));
92 static VALUE strscan_terminate _((VALUE self));
93 static VALUE strscan_clear _((VALUE self));
94 static VALUE strscan_get_string _((VALUE self));
95 static VALUE strscan_set_string _((VALUE self, VALUE str));
96 static VALUE strscan_concat _((VALUE self, VALUE str));
97 static VALUE strscan_get_pos _((VALUE self));
98 static VALUE strscan_set_pos _((VALUE self, VALUE pos));
99 static VALUE strscan_do_scan _((VALUE self, VALUE regex,
100  int succptr, int getstr, int headonly));
101 static VALUE strscan_scan _((VALUE self, VALUE re));
102 static VALUE strscan_match_p _((VALUE self, VALUE re));
103 static VALUE strscan_skip _((VALUE self, VALUE re));
104 static VALUE strscan_check _((VALUE self, VALUE re));
105 static VALUE strscan_scan_full _((VALUE self, VALUE re,
106  VALUE succp, VALUE getp));
107 static VALUE strscan_scan_until _((VALUE self, VALUE re));
108 static VALUE strscan_skip_until _((VALUE self, VALUE re));
109 static VALUE strscan_check_until _((VALUE self, VALUE re));
110 static VALUE strscan_search_full _((VALUE self, VALUE re,
111  VALUE succp, VALUE getp));
112 static void adjust_registers_to_matched _((struct strscanner *p));
113 static VALUE strscan_getch _((VALUE self));
114 static VALUE strscan_get_byte _((VALUE self));
115 static VALUE strscan_getbyte _((VALUE self));
116 static VALUE strscan_peek _((VALUE self, VALUE len));
117 static VALUE strscan_peep _((VALUE self, VALUE len));
118 static VALUE strscan_unscan _((VALUE self));
119 static VALUE strscan_bol_p _((VALUE self));
120 static VALUE strscan_eos_p _((VALUE self));
121 static VALUE strscan_empty_p _((VALUE self));
122 static VALUE strscan_rest_p _((VALUE self));
123 static VALUE strscan_matched_p _((VALUE self));
124 static VALUE strscan_matched _((VALUE self));
125 static VALUE strscan_matched_size _((VALUE self));
126 static VALUE strscan_aref _((VALUE self, VALUE idx));
127 static VALUE strscan_pre_match _((VALUE self));
128 static VALUE strscan_post_match _((VALUE self));
129 static VALUE strscan_rest _((VALUE self));
130 static VALUE strscan_rest_size _((VALUE self));
131 
132 static VALUE strscan_inspect _((VALUE self));
133 static VALUE inspect1 _((struct strscanner *p));
134 static VALUE inspect2 _((struct strscanner *p));
135 
136 /* =======================================================================
137  Utils
138  ======================================================================= */
139 
140 static VALUE
141 str_new(struct strscanner *p, const char *ptr, long len)
142 {
143  VALUE str = rb_str_new(ptr, len);
144  rb_enc_copy(str, p->str);
145  return str;
146 }
147 
148 static inline long
149 minl(const long x, const long y)
150 {
151  return (x < y) ? x : y;
152 }
153 
154 static VALUE
155 extract_range(struct strscanner *p, long beg_i, long end_i)
156 {
157  if (beg_i > S_LEN(p)) return Qnil;
158  end_i = minl(end_i, S_LEN(p));
159  return str_new(p, S_PBEG(p) + beg_i, end_i - beg_i);
160 }
161 
162 static VALUE
163 extract_beg_len(struct strscanner *p, long beg_i, long len)
164 {
165  if (beg_i > S_LEN(p)) return Qnil;
166  len = minl(len, S_LEN(p) - beg_i);
167  return str_new(p, S_PBEG(p) + beg_i, len);
168 }
169 
170 /* =======================================================================
171  Constructor
172  ======================================================================= */
173 
174 static void
175 strscan_mark(void *ptr)
176 {
177  struct strscanner *p = ptr;
178  rb_gc_mark(p->str);
179 }
180 
181 static void
182 strscan_free(void *ptr)
183 {
184  struct strscanner *p = ptr;
185  onig_region_free(&(p->regs), 0);
186  ruby_xfree(p);
187 }
188 
189 static size_t
190 strscan_memsize(const void *ptr)
191 {
192  const struct strscanner *p = ptr;
193  size_t size = sizeof(*p) - sizeof(p->regs);
194 #ifdef HAVE_ONIG_REGION_MEMSIZE
195  size += onig_region_memsize(&p->regs);
196 #endif
197  return size;
198 }
199 
200 static const rb_data_type_t strscanner_type = {
201  "StringScanner",
202  {strscan_mark, strscan_free, strscan_memsize},
204 };
205 
206 static VALUE
207 strscan_s_allocate(VALUE klass)
208 {
209  struct strscanner *p;
210  VALUE obj = TypedData_Make_Struct(klass, struct strscanner, &strscanner_type, p);
211 
213  onig_region_init(&(p->regs));
214  p->str = Qnil;
215  return obj;
216 }
217 
218 /*
219  * call-seq:
220  * StringScanner.new(string, fixed_anchor: false)
221  * StringScanner.new(string, dup = false)
222  *
223  * Creates a new StringScanner object to scan over the given +string+.
224  *
225  * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
226  * the string. Otherwise, +\A+ always matches the current position.
227  *
228  * +dup+ argument is obsolete and not used now.
229  */
230 static VALUE
231 strscan_initialize(int argc, VALUE *argv, VALUE self)
232 {
233  struct strscanner *p;
234  VALUE str, options;
235 
236  p = check_strscan(self);
237  rb_scan_args(argc, argv, "11", &str, &options);
238  options = rb_check_hash_type(options);
239  if (!NIL_P(options)) {
240  VALUE fixed_anchor;
241  ID keyword_ids[1];
242  keyword_ids[0] = rb_intern("fixed_anchor");
243  rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
244  if (fixed_anchor == Qundef) {
245  p->fixed_anchor_p = false;
246  }
247  else {
248  p->fixed_anchor_p = RTEST(fixed_anchor);
249  }
250  }
251  else {
252  p->fixed_anchor_p = false;
253  }
254  StringValue(str);
255  p->str = str;
256 
257  return self;
258 }
259 
260 static struct strscanner *
261 check_strscan(VALUE obj)
262 {
263  return rb_check_typeddata(obj, &strscanner_type);
264 }
265 
266 /*
267  * call-seq:
268  * dup
269  * clone
270  *
271  * Duplicates a StringScanner object.
272  */
273 static VALUE
274 strscan_init_copy(VALUE vself, VALUE vorig)
275 {
276  struct strscanner *self, *orig;
277 
278  self = check_strscan(vself);
279  orig = check_strscan(vorig);
280  if (self != orig) {
281  self->flags = orig->flags;
282  self->str = orig->str;
283  self->prev = orig->prev;
284  self->curr = orig->curr;
285  if (rb_reg_region_copy(&self->regs, &orig->regs))
286  rb_memerror();
287  RB_GC_GUARD(vorig);
288  }
289 
290  return vself;
291 }
292 
293 /* =======================================================================
294  Instance Methods
295  ======================================================================= */
296 
297 /*
298  * call-seq: StringScanner.must_C_version
299  *
300  * This method is defined for backward compatibility.
301  */
302 static VALUE
303 strscan_s_mustc(VALUE self)
304 {
305  return self;
306 }
307 
308 /*
309  * Reset the scan pointer (index 0) and clear matching data.
310  */
311 static VALUE
312 strscan_reset(VALUE self)
313 {
314  struct strscanner *p;
315 
316  GET_SCANNER(self, p);
317  p->curr = 0;
319  return self;
320 }
321 
322 /*
323  * call-seq:
324  * terminate
325  * clear
326  *
327  * Sets the scan pointer to the end of the string and clear matching data.
328  */
329 static VALUE
330 strscan_terminate(VALUE self)
331 {
332  struct strscanner *p;
333 
334  GET_SCANNER(self, p);
335  p->curr = S_LEN(p);
337  return self;
338 }
339 
340 /*
341  * Equivalent to #terminate.
342  * This method is obsolete; use #terminate instead.
343  */
344 static VALUE
345 strscan_clear(VALUE self)
346 {
347  rb_warning("StringScanner#clear is obsolete; use #terminate instead");
348  return strscan_terminate(self);
349 }
350 
351 /*
352  * Returns the string being scanned.
353  */
354 static VALUE
355 strscan_get_string(VALUE self)
356 {
357  struct strscanner *p;
358 
359  GET_SCANNER(self, p);
360  return p->str;
361 }
362 
363 /*
364  * call-seq: string=(str)
365  *
366  * Changes the string being scanned to +str+ and resets the scanner.
367  * Returns +str+.
368  */
369 static VALUE
370 strscan_set_string(VALUE self, VALUE str)
371 {
372  struct strscanner *p = check_strscan(self);
373 
374  StringValue(str);
375  p->str = str;
376  p->curr = 0;
378  return str;
379 }
380 
381 /*
382  * call-seq:
383  * concat(str)
384  * <<(str)
385  *
386  * Appends +str+ to the string being scanned.
387  * This method does not affect scan pointer.
388  *
389  * s = StringScanner.new("Fri Dec 12 1975 14:39")
390  * s.scan(/Fri /)
391  * s << " +1000 GMT"
392  * s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
393  * s.scan(/Dec/) # -> "Dec"
394  */
395 static VALUE
396 strscan_concat(VALUE self, VALUE str)
397 {
398  struct strscanner *p;
399 
400  GET_SCANNER(self, p);
401  StringValue(str);
402  rb_str_append(p->str, str);
403  return self;
404 }
405 
406 /*
407  * Returns the byte position of the scan pointer. In the 'reset' position, this
408  * value is zero. In the 'terminated' position (i.e. the string is exhausted),
409  * this value is the bytesize of the string.
410  *
411  * In short, it's a 0-based index into bytes of the string.
412  *
413  * s = StringScanner.new('test string')
414  * s.pos # -> 0
415  * s.scan_until /str/ # -> "test str"
416  * s.pos # -> 8
417  * s.terminate # -> #<StringScanner fin>
418  * s.pos # -> 11
419  */
420 static VALUE
421 strscan_get_pos(VALUE self)
422 {
423  struct strscanner *p;
424 
425  GET_SCANNER(self, p);
426  return INT2FIX(p->curr);
427 }
428 
429 /*
430  * Returns the character position of the scan pointer. In the 'reset' position, this
431  * value is zero. In the 'terminated' position (i.e. the string is exhausted),
432  * this value is the size of the string.
433  *
434  * In short, it's a 0-based index into the string.
435  *
436  * s = StringScanner.new("abcädeföghi")
437  * s.charpos # -> 0
438  * s.scan_until(/ä/) # -> "abcä"
439  * s.pos # -> 5
440  * s.charpos # -> 4
441  */
442 static VALUE
443 strscan_get_charpos(VALUE self)
444 {
445  struct strscanner *p;
446  VALUE substr;
447 
448  GET_SCANNER(self, p);
449 
450  substr = rb_funcall(p->str, id_byteslice, 2, INT2FIX(0), INT2NUM(p->curr));
451 
452  return rb_str_length(substr);
453 }
454 
455 /*
456  * call-seq: pos=(n)
457  *
458  * Sets the byte position of the scan pointer.
459  *
460  * s = StringScanner.new('test string')
461  * s.pos = 7 # -> 7
462  * s.rest # -> "ring"
463  */
464 static VALUE
465 strscan_set_pos(VALUE self, VALUE v)
466 {
467  struct strscanner *p;
468  long i;
469 
470  GET_SCANNER(self, p);
471  i = NUM2INT(v);
472  if (i < 0) i += S_LEN(p);
473  if (i < 0) rb_raise(rb_eRangeError, "index out of range");
474  if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
475  p->curr = i;
476  return INT2NUM(i);
477 }
478 
479 static inline UChar *
480 match_target(struct strscanner *p)
481 {
482  if (p->fixed_anchor_p) {
483  return (UChar *)S_PBEG(p);
484  }
485  else
486  {
487  return (UChar *)CURPTR(p);
488  }
489 }
490 
491 static inline void
492 set_registers(struct strscanner *p, size_t length)
493 {
494  const int at = 0;
495  OnigRegion *regs = &(p->regs);
497  if (onig_region_set(regs, at, 0, 0)) return;
498  if (p->fixed_anchor_p) {
499  regs->beg[at] = p->curr;
500  regs->end[at] = p->curr + length;
501  }
502  else
503  {
504  regs->end[at] = length;
505  }
506 }
507 
508 static inline void
509 succ(struct strscanner *p)
510 {
511  if (p->fixed_anchor_p) {
512  p->curr = p->regs.end[0];
513  }
514  else
515  {
516  p->curr += p->regs.end[0];
517  }
518 }
519 
520 static inline long
521 last_match_length(struct strscanner *p)
522 {
523  if (p->fixed_anchor_p) {
524  return p->regs.end[0] - p->prev;
525  }
526  else
527  {
528  return p->regs.end[0];
529  }
530 }
531 
532 static inline long
533 adjust_register_position(struct strscanner *p, long position)
534 {
535  if (p->fixed_anchor_p) {
536  return position;
537  }
538  else {
539  return p->prev + position;
540  }
541 }
542 
543 static VALUE
544 strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
545 {
546  struct strscanner *p;
547 
548  if (headonly) {
549  if (!RB_TYPE_P(pattern, T_REGEXP)) {
550  StringValue(pattern);
551  }
552  }
553  else {
554  Check_Type(pattern, T_REGEXP);
555  }
556  GET_SCANNER(self, p);
557 
559  if (S_RESTLEN(p) < 0) {
560  return Qnil;
561  }
562 
563  if (RB_TYPE_P(pattern, T_REGEXP)) {
565  regex_t *re;
566  long ret;
567  int tmpreg;
568 
569  p->regex = pattern;
570  re = rb_reg_prepare_re(pattern, p->str);
571  tmpreg = re != RREGEXP_PTR(pattern);
572  if (!tmpreg) RREGEXP(pattern)->usecnt++;
573 
574  if (headonly) {
575  ret = onig_match(re,
576  match_target(p),
577  (UChar* )(CURPTR(p) + S_RESTLEN(p)),
578  (UChar* )CURPTR(p),
579  &(p->regs),
581  }
582  else {
583  ret = onig_search(re,
584  match_target(p),
585  (UChar* )(CURPTR(p) + S_RESTLEN(p)),
586  (UChar* )CURPTR(p),
587  (UChar* )(CURPTR(p) + S_RESTLEN(p)),
588  &(p->regs),
590  }
591  if (!tmpreg) RREGEXP(pattern)->usecnt--;
592  if (tmpreg) {
593  if (RREGEXP(pattern)->usecnt) {
594  onig_free(re);
595  }
596  else {
597  onig_free(RREGEXP_PTR(pattern));
598  RREGEXP_PTR(pattern) = re;
599  }
600  }
601 
602  if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
603  if (ret < 0) {
604  /* not matched */
605  return Qnil;
606  }
607  }
608  else {
609  rb_enc_check(p->str, pattern);
610  if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
611  return Qnil;
612  }
613  if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
614  return Qnil;
615  }
616  set_registers(p, RSTRING_LEN(pattern));
617  }
618 
619  MATCHED(p);
620  p->prev = p->curr;
621 
622  if (succptr) {
623  succ(p);
624  }
625  {
626  const long length = last_match_length(p);
627  if (getstr) {
628  return extract_beg_len(p, p->prev, length);
629  }
630  else {
631  return INT2FIX(length);
632  }
633  }
634 }
635 
636 /*
637  * call-seq: scan(pattern) => String
638  *
639  * Tries to match with +pattern+ at the current position. If there's a match,
640  * the scanner advances the "scan pointer" and returns the matched string.
641  * Otherwise, the scanner returns +nil+.
642  *
643  * s = StringScanner.new('test string')
644  * p s.scan(/\w+/) # -> "test"
645  * p s.scan(/\w+/) # -> nil
646  * p s.scan(/\s+/) # -> " "
647  * p s.scan("str") # -> "str"
648  * p s.scan(/\w+/) # -> "ing"
649  * p s.scan(/./) # -> nil
650  *
651  */
652 static VALUE
653 strscan_scan(VALUE self, VALUE re)
654 {
655  return strscan_do_scan(self, re, 1, 1, 1);
656 }
657 
658 /*
659  * call-seq: match?(pattern)
660  *
661  * Tests whether the given +pattern+ is matched from the current scan pointer.
662  * Returns the length of the match, or +nil+. The scan pointer is not advanced.
663  *
664  * s = StringScanner.new('test string')
665  * p s.match?(/\w+/) # -> 4
666  * p s.match?(/\w+/) # -> 4
667  * p s.match?("test") # -> 4
668  * p s.match?(/\s+/) # -> nil
669  */
670 static VALUE
671 strscan_match_p(VALUE self, VALUE re)
672 {
673  return strscan_do_scan(self, re, 0, 0, 1);
674 }
675 
676 /*
677  * call-seq: skip(pattern)
678  *
679  * Attempts to skip over the given +pattern+ beginning with the scan pointer.
680  * If it matches, the scan pointer is advanced to the end of the match, and the
681  * length of the match is returned. Otherwise, +nil+ is returned.
682  *
683  * It's similar to #scan, but without returning the matched string.
684  *
685  * s = StringScanner.new('test string')
686  * p s.skip(/\w+/) # -> 4
687  * p s.skip(/\w+/) # -> nil
688  * p s.skip(/\s+/) # -> 1
689  * p s.skip("st") # -> 2
690  * p s.skip(/\w+/) # -> 4
691  * p s.skip(/./) # -> nil
692  *
693  */
694 static VALUE
695 strscan_skip(VALUE self, VALUE re)
696 {
697  return strscan_do_scan(self, re, 1, 0, 1);
698 }
699 
700 /*
701  * call-seq: check(pattern)
702  *
703  * This returns the value that #scan would return, without advancing the scan
704  * pointer. The match register is affected, though.
705  *
706  * s = StringScanner.new("Fri Dec 12 1975 14:39")
707  * s.check /Fri/ # -> "Fri"
708  * s.pos # -> 0
709  * s.matched # -> "Fri"
710  * s.check /12/ # -> nil
711  * s.matched # -> nil
712  *
713  * Mnemonic: it "checks" to see whether a #scan will return a value.
714  */
715 static VALUE
716 strscan_check(VALUE self, VALUE re)
717 {
718  return strscan_do_scan(self, re, 0, 1, 1);
719 }
720 
721 /*
722  * call-seq: scan_full(pattern, advance_pointer_p, return_string_p)
723  *
724  * Tests whether the given +pattern+ is matched from the current scan pointer.
725  * Advances the scan pointer if +advance_pointer_p+ is true.
726  * Returns the matched string if +return_string_p+ is true.
727  * The match register is affected.
728  *
729  * "full" means "#scan with full parameters".
730  */
731 static VALUE
732 strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
733 {
734  return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1);
735 }
736 
737 /*
738  * call-seq: scan_until(pattern)
739  *
740  * Scans the string _until_ the +pattern+ is matched. Returns the substring up
741  * to and including the end of the match, advancing the scan pointer to that
742  * location. If there is no match, +nil+ is returned.
743  *
744  * s = StringScanner.new("Fri Dec 12 1975 14:39")
745  * s.scan_until(/1/) # -> "Fri Dec 1"
746  * s.pre_match # -> "Fri Dec "
747  * s.scan_until(/XYZ/) # -> nil
748  */
749 static VALUE
750 strscan_scan_until(VALUE self, VALUE re)
751 {
752  return strscan_do_scan(self, re, 1, 1, 0);
753 }
754 
755 /*
756  * call-seq: exist?(pattern)
757  *
758  * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
759  * without advancing the scan pointer. This predicates whether a #scan_until
760  * will return a value.
761  *
762  * s = StringScanner.new('test string')
763  * s.exist? /s/ # -> 3
764  * s.scan /test/ # -> "test"
765  * s.exist? /s/ # -> 2
766  * s.exist? /e/ # -> nil
767  */
768 static VALUE
769 strscan_exist_p(VALUE self, VALUE re)
770 {
771  return strscan_do_scan(self, re, 0, 0, 0);
772 }
773 
774 /*
775  * call-seq: skip_until(pattern)
776  *
777  * Advances the scan pointer until +pattern+ is matched and consumed. Returns
778  * the number of bytes advanced, or +nil+ if no match was found.
779  *
780  * Look ahead to match +pattern+, and advance the scan pointer to the _end_
781  * of the match. Return the number of characters advanced, or +nil+ if the
782  * match was unsuccessful.
783  *
784  * It's similar to #scan_until, but without returning the intervening string.
785  *
786  * s = StringScanner.new("Fri Dec 12 1975 14:39")
787  * s.skip_until /12/ # -> 10
788  * s #
789  */
790 static VALUE
791 strscan_skip_until(VALUE self, VALUE re)
792 {
793  return strscan_do_scan(self, re, 1, 0, 0);
794 }
795 
796 /*
797  * call-seq: check_until(pattern)
798  *
799  * This returns the value that #scan_until would return, without advancing the
800  * scan pointer. The match register is affected, though.
801  *
802  * s = StringScanner.new("Fri Dec 12 1975 14:39")
803  * s.check_until /12/ # -> "Fri Dec 12"
804  * s.pos # -> 0
805  * s.matched # -> 12
806  *
807  * Mnemonic: it "checks" to see whether a #scan_until will return a value.
808  */
809 static VALUE
810 strscan_check_until(VALUE self, VALUE re)
811 {
812  return strscan_do_scan(self, re, 0, 1, 0);
813 }
814 
815 /*
816  * call-seq: search_full(pattern, advance_pointer_p, return_string_p)
817  *
818  * Scans the string _until_ the +pattern+ is matched.
819  * Advances the scan pointer if +advance_pointer_p+, otherwise not.
820  * Returns the matched string if +return_string_p+ is true, otherwise
821  * returns the number of bytes advanced.
822  * This method does affect the match register.
823  */
824 static VALUE
825 strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
826 {
827  return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0);
828 }
829 
830 static void
831 adjust_registers_to_matched(struct strscanner *p)
832 {
833  onig_region_clear(&(p->regs));
834  if (p->fixed_anchor_p) {
835  onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
836  }
837  else {
838  onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
839  }
840 }
841 
842 /*
843  * Scans one character and returns it.
844  * This method is multibyte character sensitive.
845  *
846  * s = StringScanner.new("ab")
847  * s.getch # => "a"
848  * s.getch # => "b"
849  * s.getch # => nil
850  *
851  * $KCODE = 'EUC'
852  * s = StringScanner.new("\244\242")
853  * s.getch # => "\244\242" # Japanese hira-kana "A" in EUC-JP
854  * s.getch # => nil
855  */
856 static VALUE
857 strscan_getch(VALUE self)
858 {
859  struct strscanner *p;
860  long len;
861 
862  GET_SCANNER(self, p);
864  if (EOS_P(p))
865  return Qnil;
866 
867  len = rb_enc_mbclen(CURPTR(p), S_PEND(p), rb_enc_get(p->str));
868  len = minl(len, S_RESTLEN(p));
869  p->prev = p->curr;
870  p->curr += len;
871  MATCHED(p);
872  adjust_registers_to_matched(p);
873  return extract_range(p,
874  adjust_register_position(p, p->regs.beg[0]),
875  adjust_register_position(p, p->regs.end[0]));
876 }
877 
878 /*
879  * Scans one byte and returns it.
880  * This method is not multibyte character sensitive.
881  * See also: #getch.
882  *
883  * s = StringScanner.new('ab')
884  * s.get_byte # => "a"
885  * s.get_byte # => "b"
886  * s.get_byte # => nil
887  *
888  * $KCODE = 'EUC'
889  * s = StringScanner.new("\244\242")
890  * s.get_byte # => "\244"
891  * s.get_byte # => "\242"
892  * s.get_byte # => nil
893  */
894 static VALUE
895 strscan_get_byte(VALUE self)
896 {
897  struct strscanner *p;
898 
899  GET_SCANNER(self, p);
901  if (EOS_P(p))
902  return Qnil;
903 
904  p->prev = p->curr;
905  p->curr++;
906  MATCHED(p);
907  adjust_registers_to_matched(p);
908  return extract_range(p,
909  adjust_register_position(p, p->regs.beg[0]),
910  adjust_register_position(p, p->regs.end[0]));
911 }
912 
913 /*
914  * Equivalent to #get_byte.
915  * This method is obsolete; use #get_byte instead.
916  */
917 static VALUE
918 strscan_getbyte(VALUE self)
919 {
920  rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead");
921  return strscan_get_byte(self);
922 }
923 
924 /*
925  * call-seq: peek(len)
926  *
927  * Extracts a string corresponding to <tt>string[pos,len]</tt>, without
928  * advancing the scan pointer.
929  *
930  * s = StringScanner.new('test string')
931  * s.peek(7) # => "test st"
932  * s.peek(7) # => "test st"
933  *
934  */
935 static VALUE
936 strscan_peek(VALUE self, VALUE vlen)
937 {
938  struct strscanner *p;
939  long len;
940 
941  GET_SCANNER(self, p);
942 
943  len = NUM2LONG(vlen);
944  if (EOS_P(p))
945  return str_new(p, "", 0);
946 
947  len = minl(len, S_RESTLEN(p));
948  return extract_beg_len(p, p->curr, len);
949 }
950 
951 /*
952  * Equivalent to #peek.
953  * This method is obsolete; use #peek instead.
954  */
955 static VALUE
956 strscan_peep(VALUE self, VALUE vlen)
957 {
958  rb_warning("StringScanner#peep is obsolete; use #peek instead");
959  return strscan_peek(self, vlen);
960 }
961 
962 /*
963  * Sets the scan pointer to the previous position. Only one previous position is
964  * remembered, and it changes with each scanning operation.
965  *
966  * s = StringScanner.new('test string')
967  * s.scan(/\w+/) # => "test"
968  * s.unscan
969  * s.scan(/../) # => "te"
970  * s.scan(/\d/) # => nil
971  * s.unscan # ScanError: unscan failed: previous match record not exist
972  */
973 static VALUE
974 strscan_unscan(VALUE self)
975 {
976  struct strscanner *p;
977 
978  GET_SCANNER(self, p);
979  if (! MATCHED_P(p))
980  rb_raise(ScanError, "unscan failed: previous match record not exist");
981  p->curr = p->prev;
983  return self;
984 }
985 
986 /*
987  * Returns +true+ iff the scan pointer is at the beginning of the line.
988  *
989  * s = StringScanner.new("test\ntest\n")
990  * s.bol? # => true
991  * s.scan(/te/)
992  * s.bol? # => false
993  * s.scan(/st\n/)
994  * s.bol? # => true
995  * s.terminate
996  * s.bol? # => true
997  */
998 static VALUE
999 strscan_bol_p(VALUE self)
1000 {
1001  struct strscanner *p;
1002 
1003  GET_SCANNER(self, p);
1004  if (CURPTR(p) > S_PEND(p)) return Qnil;
1005  if (p->curr == 0) return Qtrue;
1006  return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
1007 }
1008 
1009 /*
1010  * Returns +true+ if the scan pointer is at the end of the string.
1011  *
1012  * s = StringScanner.new('test string')
1013  * p s.eos? # => false
1014  * s.scan(/test/)
1015  * p s.eos? # => false
1016  * s.terminate
1017  * p s.eos? # => true
1018  */
1019 static VALUE
1020 strscan_eos_p(VALUE self)
1021 {
1022  struct strscanner *p;
1023 
1024  GET_SCANNER(self, p);
1025  return EOS_P(p) ? Qtrue : Qfalse;
1026 }
1027 
1028 /*
1029  * Equivalent to #eos?.
1030  * This method is obsolete, use #eos? instead.
1031  */
1032 static VALUE
1033 strscan_empty_p(VALUE self)
1034 {
1035  rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
1036  return strscan_eos_p(self);
1037 }
1038 
1039 /*
1040  * Returns true iff there is more data in the string. See #eos?.
1041  * This method is obsolete; use #eos? instead.
1042  *
1043  * s = StringScanner.new('test string')
1044  * s.eos? # These two
1045  * s.rest? # are opposites.
1046  */
1047 static VALUE
1048 strscan_rest_p(VALUE self)
1049 {
1050  struct strscanner *p;
1051 
1052  GET_SCANNER(self, p);
1053  return EOS_P(p) ? Qfalse : Qtrue;
1054 }
1055 
1056 /*
1057  * Returns +true+ iff the last match was successful.
1058  *
1059  * s = StringScanner.new('test string')
1060  * s.match?(/\w+/) # => 4
1061  * s.matched? # => true
1062  * s.match?(/\d+/) # => nil
1063  * s.matched? # => false
1064  */
1065 static VALUE
1066 strscan_matched_p(VALUE self)
1067 {
1068  struct strscanner *p;
1069 
1070  GET_SCANNER(self, p);
1071  return MATCHED_P(p) ? Qtrue : Qfalse;
1072 }
1073 
1074 /*
1075  * Returns the last matched string.
1076  *
1077  * s = StringScanner.new('test string')
1078  * s.match?(/\w+/) # -> 4
1079  * s.matched # -> "test"
1080  */
1081 static VALUE
1082 strscan_matched(VALUE self)
1083 {
1084  struct strscanner *p;
1085 
1086  GET_SCANNER(self, p);
1087  if (! MATCHED_P(p)) return Qnil;
1088  return extract_range(p,
1089  adjust_register_position(p, p->regs.beg[0]),
1090  adjust_register_position(p, p->regs.end[0]));
1091 }
1092 
1093 /*
1094  * Returns the size of the most recent match (see #matched), or +nil+ if there
1095  * was no recent match.
1096  *
1097  * s = StringScanner.new('test string')
1098  * s.check /\w+/ # -> "test"
1099  * s.matched_size # -> 4
1100  * s.check /\d+/ # -> nil
1101  * s.matched_size # -> nil
1102  */
1103 static VALUE
1104 strscan_matched_size(VALUE self)
1105 {
1106  struct strscanner *p;
1107 
1108  GET_SCANNER(self, p);
1109  if (! MATCHED_P(p)) return Qnil;
1110  return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
1111 }
1112 
1113 static int
1114 name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc)
1115 {
1116  int num;
1117 
1119  (const unsigned char* )name, (const unsigned char* )name_end, regs);
1120  if (num >= 1) {
1121  return num;
1122  }
1123  else {
1124  rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s",
1125  rb_long2int(name_end - name), name);
1126  }
1127 
1128  UNREACHABLE;
1129 }
1130 
1131 /*
1132  * call-seq: [](n)
1133  *
1134  * Returns the n-th subgroup in the most recent match.
1135  *
1136  * s = StringScanner.new("Fri Dec 12 1975 14:39")
1137  * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1138  * s[0] # -> "Fri Dec 12 "
1139  * s[1] # -> "Fri"
1140  * s[2] # -> "Dec"
1141  * s[3] # -> "12"
1142  * s.post_match # -> "1975 14:39"
1143  * s.pre_match # -> ""
1144  *
1145  * s.reset
1146  * s.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /) # -> "Fri Dec 12 "
1147  * s[0] # -> "Fri Dec 12 "
1148  * s[1] # -> "Fri"
1149  * s[2] # -> "Dec"
1150  * s[3] # -> "12"
1151  * s[:wday] # -> "Fri"
1152  * s[:month] # -> "Dec"
1153  * s[:day] # -> "12"
1154  * s.post_match # -> "1975 14:39"
1155  * s.pre_match # -> ""
1156  */
1157 static VALUE
1158 strscan_aref(VALUE self, VALUE idx)
1159 {
1160  const char *name;
1161  struct strscanner *p;
1162  long i;
1163 
1164  GET_SCANNER(self, p);
1165  if (! MATCHED_P(p)) return Qnil;
1166 
1167  switch (TYPE(idx)) {
1168  case T_SYMBOL:
1169  idx = rb_sym2str(idx);
1170  /* fall through */
1171  case T_STRING:
1172  if (!p->regex) return Qnil;
1173  RSTRING_GETMEM(idx, name, i);
1174  i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
1175  break;
1176  default:
1177  i = NUM2LONG(idx);
1178  }
1179 
1180  if (i < 0)
1181  i += p->regs.num_regs;
1182  if (i < 0) return Qnil;
1183  if (i >= p->regs.num_regs) return Qnil;
1184  if (p->regs.beg[i] == -1) return Qnil;
1185 
1186  return extract_range(p,
1187  adjust_register_position(p, p->regs.beg[i]),
1188  adjust_register_position(p, p->regs.end[i]));
1189 }
1190 
1191 /*
1192  * call-seq: size
1193  *
1194  * Returns the amount of subgroups in the most recent match.
1195  * The full match counts as a subgroup.
1196  *
1197  * s = StringScanner.new("Fri Dec 12 1975 14:39")
1198  * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1199  * s.size # -> 4
1200  */
1201 static VALUE
1202 strscan_size(VALUE self)
1203 {
1204  struct strscanner *p;
1205 
1206  GET_SCANNER(self, p);
1207  if (! MATCHED_P(p)) return Qnil;
1208  return INT2FIX(p->regs.num_regs);
1209 }
1210 
1211 /*
1212  * call-seq: captures
1213  *
1214  * Returns the subgroups in the most recent match (not including the full match).
1215  * If nothing was priorly matched, it returns nil.
1216  *
1217  * s = StringScanner.new("Fri Dec 12 1975 14:39")
1218  * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1219  * s.captures # -> ["Fri", "Dec", "12"]
1220  * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
1221  * s.captures # -> nil
1222  */
1223 static VALUE
1224 strscan_captures(VALUE self)
1225 {
1226  struct strscanner *p;
1227  int i, num_regs;
1228  VALUE new_ary;
1229 
1230  GET_SCANNER(self, p);
1231  if (! MATCHED_P(p)) return Qnil;
1232 
1233  num_regs = p->regs.num_regs;
1234  new_ary = rb_ary_new2(num_regs);
1235 
1236  for (i = 1; i < num_regs; i++) {
1237  VALUE str = extract_range(p,
1238  adjust_register_position(p, p->regs.beg[i]),
1239  adjust_register_position(p, p->regs.end[i]));
1240  rb_ary_push(new_ary, str);
1241  }
1242 
1243  return new_ary;
1244 }
1245 
1246 /*
1247  * call-seq:
1248  * scanner.values_at( i1, i2, ... iN ) -> an_array
1249  *
1250  * Returns the subgroups in the most recent match at the given indices.
1251  * If nothing was priorly matched, it returns nil.
1252  *
1253  * s = StringScanner.new("Fri Dec 12 1975 14:39")
1254  * s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
1255  * s.values_at 0, -1, 5, 2 # -> ["Fri Dec 12 ", "12", nil, "Dec"]
1256  * s.scan(/(\w+) (\w+) (\d+) /) # -> nil
1257  * s.values_at 0, -1, 5, 2 # -> nil
1258  */
1259 
1260 static VALUE
1261 strscan_values_at(int argc, VALUE *argv, VALUE self)
1262 {
1263  struct strscanner *p;
1264  long i;
1265  VALUE new_ary;
1266 
1267  GET_SCANNER(self, p);
1268  if (! MATCHED_P(p)) return Qnil;
1269 
1270  new_ary = rb_ary_new2(argc);
1271  for (i = 0; i<argc; i++) {
1272  rb_ary_push(new_ary, strscan_aref(self, argv[i]));
1273  }
1274 
1275  return new_ary;
1276 }
1277 
1278 /*
1279  * Returns the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
1280  *
1281  * s = StringScanner.new('test string')
1282  * s.scan(/\w+/) # -> "test"
1283  * s.scan(/\s+/) # -> " "
1284  * s.pre_match # -> "test"
1285  * s.post_match # -> "string"
1286  */
1287 static VALUE
1288 strscan_pre_match(VALUE self)
1289 {
1290  struct strscanner *p;
1291 
1292  GET_SCANNER(self, p);
1293  if (! MATCHED_P(p)) return Qnil;
1294  return extract_range(p,
1295  0,
1296  adjust_register_position(p, p->regs.beg[0]));
1297 }
1298 
1299 /*
1300  * Returns the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
1301  *
1302  * s = StringScanner.new('test string')
1303  * s.scan(/\w+/) # -> "test"
1304  * s.scan(/\s+/) # -> " "
1305  * s.pre_match # -> "test"
1306  * s.post_match # -> "string"
1307  */
1308 static VALUE
1309 strscan_post_match(VALUE self)
1310 {
1311  struct strscanner *p;
1312 
1313  GET_SCANNER(self, p);
1314  if (! MATCHED_P(p)) return Qnil;
1315  return extract_range(p,
1316  adjust_register_position(p, p->regs.end[0]),
1317  S_LEN(p));
1318 }
1319 
1320 /*
1321  * Returns the "rest" of the string (i.e. everything after the scan pointer).
1322  * If there is no more data (eos? = true), it returns <tt>""</tt>.
1323  */
1324 static VALUE
1325 strscan_rest(VALUE self)
1326 {
1327  struct strscanner *p;
1328 
1329  GET_SCANNER(self, p);
1330  if (EOS_P(p)) {
1331  return str_new(p, "", 0);
1332  }
1333  return extract_range(p, p->curr, S_LEN(p));
1334 }
1335 
1336 /*
1337  * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
1338  */
1339 static VALUE
1340 strscan_rest_size(VALUE self)
1341 {
1342  struct strscanner *p;
1343  long i;
1344 
1345  GET_SCANNER(self, p);
1346  if (EOS_P(p)) {
1347  return INT2FIX(0);
1348  }
1349  i = S_RESTLEN(p);
1350  return INT2FIX(i);
1351 }
1352 
1353 /*
1354  * <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
1355  * This method is obsolete; use #rest_size instead.
1356  */
1357 static VALUE
1358 strscan_restsize(VALUE self)
1359 {
1360  rb_warning("StringScanner#restsize is obsolete; use #rest_size instead");
1361  return strscan_rest_size(self);
1362 }
1363 
1364 #define INSPECT_LENGTH 5
1365 
1366 /*
1367  * Returns a string that represents the StringScanner object, showing:
1368  * - the current position
1369  * - the size of the string
1370  * - the characters surrounding the scan pointer
1371  *
1372  * s = StringScanner.new("Fri Dec 12 1975 14:39")
1373  * s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
1374  * s.scan_until /12/ # -> "Fri Dec 12"
1375  * s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
1376  */
1377 static VALUE
1378 strscan_inspect(VALUE self)
1379 {
1380  struct strscanner *p;
1381  VALUE a, b;
1382 
1383  p = check_strscan(self);
1384  if (NIL_P(p->str)) {
1385  a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
1386  return a;
1387  }
1388  if (EOS_P(p)) {
1389  a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
1390  return a;
1391  }
1392  if (p->curr == 0) {
1393  b = inspect2(p);
1394  a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">",
1395  rb_obj_class(self),
1396  p->curr, S_LEN(p),
1397  b);
1398  return a;
1399  }
1400  a = inspect1(p);
1401  b = inspect2(p);
1402  a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">",
1403  rb_obj_class(self),
1404  p->curr, S_LEN(p),
1405  a, b);
1406  return a;
1407 }
1408 
1409 static VALUE
1410 inspect1(struct strscanner *p)
1411 {
1412  VALUE str;
1413  long len;
1414 
1415  if (p->curr == 0) return rb_str_new2("");
1416  if (p->curr > INSPECT_LENGTH) {
1417  str = rb_str_new_cstr("...");
1418  len = INSPECT_LENGTH;
1419  }
1420  else {
1421  str = rb_str_new(0, 0);
1422  len = p->curr;
1423  }
1424  rb_str_cat(str, CURPTR(p) - len, len);
1425  return rb_str_dump(str);
1426 }
1427 
1428 static VALUE
1429 inspect2(struct strscanner *p)
1430 {
1431  VALUE str;
1432  long len;
1433 
1434  if (EOS_P(p)) return rb_str_new2("");
1435  len = S_RESTLEN(p);
1436  if (len > INSPECT_LENGTH) {
1438  rb_str_cat2(str, "...");
1439  }
1440  else {
1441  str = rb_str_new(CURPTR(p), len);
1442  }
1443  return rb_str_dump(str);
1444 }
1445 
1446 /*
1447  * call-seq:
1448  * scanner.fixed_anchor? -> true or false
1449  *
1450  * Whether +scanner+ uses fixed anchor mode or not.
1451  *
1452  * If fixed anchor mode is used, +\A+ always matches the beginning of
1453  * the string. Otherwise, +\A+ always matches the current position.
1454  */
1455 static VALUE
1456 strscan_fixed_anchor_p(VALUE self)
1457 {
1458  struct strscanner *p;
1459  p = check_strscan(self);
1460  return p->fixed_anchor_p ? Qtrue : Qfalse;
1461 }
1462 
1463 /* =======================================================================
1464  Ruby Interface
1465  ======================================================================= */
1466 
1467 /*
1468  * Document-class: StringScanner
1469  *
1470  * StringScanner provides for lexical scanning operations on a String. Here is
1471  * an example of its usage:
1472  *
1473  * s = StringScanner.new('This is an example string')
1474  * s.eos? # -> false
1475  *
1476  * p s.scan(/\w+/) # -> "This"
1477  * p s.scan(/\w+/) # -> nil
1478  * p s.scan(/\s+/) # -> " "
1479  * p s.scan(/\s+/) # -> nil
1480  * p s.scan(/\w+/) # -> "is"
1481  * s.eos? # -> false
1482  *
1483  * p s.scan(/\s+/) # -> " "
1484  * p s.scan(/\w+/) # -> "an"
1485  * p s.scan(/\s+/) # -> " "
1486  * p s.scan(/\w+/) # -> "example"
1487  * p s.scan(/\s+/) # -> " "
1488  * p s.scan(/\w+/) # -> "string"
1489  * s.eos? # -> true
1490  *
1491  * p s.scan(/\s+/) # -> nil
1492  * p s.scan(/\w+/) # -> nil
1493  *
1494  * Scanning a string means remembering the position of a <i>scan pointer</i>,
1495  * which is just an index. The point of scanning is to move forward a bit at
1496  * a time, so matches are sought after the scan pointer; usually immediately
1497  * after it.
1498  *
1499  * Given the string "test string", here are the pertinent scan pointer
1500  * positions:
1501  *
1502  * t e s t s t r i n g
1503  * 0 1 2 ... 1
1504  * 0
1505  *
1506  * When you #scan for a pattern (a regular expression), the match must occur
1507  * at the character after the scan pointer. If you use #scan_until, then the
1508  * match can occur anywhere after the scan pointer. In both cases, the scan
1509  * pointer moves <i>just beyond</i> the last character of the match, ready to
1510  * scan again from the next character onwards. This is demonstrated by the
1511  * example above.
1512  *
1513  * == Method Categories
1514  *
1515  * There are other methods besides the plain scanners. You can look ahead in
1516  * the string without actually scanning. You can access the most recent match.
1517  * You can modify the string being scanned, reset or terminate the scanner,
1518  * find out or change the position of the scan pointer, skip ahead, and so on.
1519  *
1520  * === Advancing the Scan Pointer
1521  *
1522  * - #getch
1523  * - #get_byte
1524  * - #scan
1525  * - #scan_until
1526  * - #skip
1527  * - #skip_until
1528  *
1529  * === Looking Ahead
1530  *
1531  * - #check
1532  * - #check_until
1533  * - #exist?
1534  * - #match?
1535  * - #peek
1536  *
1537  * === Finding Where we Are
1538  *
1539  * - #beginning_of_line? (#bol?)
1540  * - #eos?
1541  * - #rest?
1542  * - #rest_size
1543  * - #pos
1544  *
1545  * === Setting Where we Are
1546  *
1547  * - #reset
1548  * - #terminate
1549  * - #pos=
1550  *
1551  * === Match Data
1552  *
1553  * - #matched
1554  * - #matched?
1555  * - #matched_size
1556  * - []
1557  * - #pre_match
1558  * - #post_match
1559  *
1560  * === Miscellaneous
1561  *
1562  * - <<
1563  * - #concat
1564  * - #string
1565  * - #string=
1566  * - #unscan
1567  *
1568  * There are aliases to several of the methods.
1569  */
1570 void
1572 {
1573 #undef rb_intern
1574  ID id_scanerr = rb_intern("ScanError");
1575  VALUE tmp;
1576 
1577  id_byteslice = rb_intern("byteslice");
1578 
1579  StringScanner = rb_define_class("StringScanner", rb_cObject);
1580  ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
1581  if (!rb_const_defined(rb_cObject, id_scanerr)) {
1582  rb_const_set(rb_cObject, id_scanerr, ScanError);
1583  }
1585  rb_obj_freeze(tmp);
1586  rb_const_set(StringScanner, rb_intern("Version"), tmp);
1587  tmp = rb_str_new2("$Id$");
1588  rb_obj_freeze(tmp);
1589  rb_const_set(StringScanner, rb_intern("Id"), tmp);
1590 
1591  rb_define_alloc_func(StringScanner, strscan_s_allocate);
1592  rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1);
1593  rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1);
1594  rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0);
1595  rb_define_method(StringScanner, "reset", strscan_reset, 0);
1596  rb_define_method(StringScanner, "terminate", strscan_terminate, 0);
1597  rb_define_method(StringScanner, "clear", strscan_clear, 0);
1598  rb_define_method(StringScanner, "string", strscan_get_string, 0);
1599  rb_define_method(StringScanner, "string=", strscan_set_string, 1);
1600  rb_define_method(StringScanner, "concat", strscan_concat, 1);
1601  rb_define_method(StringScanner, "<<", strscan_concat, 1);
1602  rb_define_method(StringScanner, "pos", strscan_get_pos, 0);
1603  rb_define_method(StringScanner, "pos=", strscan_set_pos, 1);
1604  rb_define_method(StringScanner, "charpos", strscan_get_charpos, 0);
1605  rb_define_method(StringScanner, "pointer", strscan_get_pos, 0);
1606  rb_define_method(StringScanner, "pointer=", strscan_set_pos, 1);
1607 
1608  rb_define_method(StringScanner, "scan", strscan_scan, 1);
1609  rb_define_method(StringScanner, "skip", strscan_skip, 1);
1610  rb_define_method(StringScanner, "match?", strscan_match_p, 1);
1611  rb_define_method(StringScanner, "check", strscan_check, 1);
1612  rb_define_method(StringScanner, "scan_full", strscan_scan_full, 3);
1613 
1614  rb_define_method(StringScanner, "scan_until", strscan_scan_until, 1);
1615  rb_define_method(StringScanner, "skip_until", strscan_skip_until, 1);
1616  rb_define_method(StringScanner, "exist?", strscan_exist_p, 1);
1617  rb_define_method(StringScanner, "check_until", strscan_check_until, 1);
1618  rb_define_method(StringScanner, "search_full", strscan_search_full, 3);
1619 
1620  rb_define_method(StringScanner, "getch", strscan_getch, 0);
1621  rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0);
1622  rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0);
1623  rb_define_method(StringScanner, "peek", strscan_peek, 1);
1624  rb_define_method(StringScanner, "peep", strscan_peep, 1);
1625 
1626  rb_define_method(StringScanner, "unscan", strscan_unscan, 0);
1627 
1628  rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
1629  rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?"));
1630  rb_define_method(StringScanner, "eos?", strscan_eos_p, 0);
1631  rb_define_method(StringScanner, "empty?", strscan_empty_p, 0);
1632  rb_define_method(StringScanner, "rest?", strscan_rest_p, 0);
1633 
1634  rb_define_method(StringScanner, "matched?", strscan_matched_p, 0);
1635  rb_define_method(StringScanner, "matched", strscan_matched, 0);
1636  rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
1637  rb_define_method(StringScanner, "[]", strscan_aref, 1);
1638  rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0);
1639  rb_define_method(StringScanner, "post_match", strscan_post_match, 0);
1640  rb_define_method(StringScanner, "size", strscan_size, 0);
1641  rb_define_method(StringScanner, "captures", strscan_captures, 0);
1642  rb_define_method(StringScanner, "values_at", strscan_values_at, -1);
1643 
1644  rb_define_method(StringScanner, "rest", strscan_rest, 0);
1645  rb_define_method(StringScanner, "rest_size", strscan_rest_size, 0);
1646  rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
1647 
1648  rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
1649 
1650  rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
1651 }
EOS_P
#define EOS_P(s)
Definition: strscan.c:68
rb_get_kwargs
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Definition: class.c:1886
ID
unsigned long ID
Definition: ruby.h:103
ruby_xfree
void ruby_xfree(void *x)
Definition: gc.c:10170
rb_define_class
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:649
re_registers::end
OnigPosition * end
Definition: onigmo.h:720
MATCHED
#define MATCHED(s)
Definition: strscan.c:59
TypedData_Make_Struct
#define TypedData_Make_Struct(klass, type, data_type, sval)
Definition: ruby.h:1244
Check_Type
#define Check_Type(v, t)
Definition: ruby.h:595
RSTRING_GETMEM
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Definition: ruby.h:1018
onig_region_set
ONIG_EXTERN int onig_region_set(OnigRegion *region, int at, int beg, int end)
Definition: regexec.c:305
rb_str_new2
#define rb_str_new2
Definition: intern.h:903
rb_const_defined
int rb_const_defined(VALUE, ID)
Definition: variable.c:2686
rb_enc_mbclen
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1020
ONIG_OPTION_NONE
#define ONIG_OPTION_NONE
Definition: onigmo.h:450
rb_warning
void rb_warning(const char *fmt,...)
Definition: error.c:336
INT2FIX
#define INT2FIX(i)
Definition: ruby.h:263
RSTRING_PTR
#define RSTRING_PTR(str)
Definition: ruby.h:1009
re.h
i
uint32_t i
Definition: rb_mjit_min_header-2.7.2.h:5499
NUM2LONG
#define NUM2LONG(x)
Definition: ruby.h:679
STRSCAN_VERSION
#define STRSCAN_VERSION
Definition: strscan.c:25
VALUE
unsigned long VALUE
Definition: ruby.h:102
encoding.h
strscanner::regs
struct re_registers regs
Definition: strscan.c:49
rb_intern
#define rb_intern(str)
RB_TYPE_P
#define RB_TYPE_P(obj, type)
Definition: ruby.h:560
TYPE
#define TYPE(x)
Definition: ruby.h:554
rb_enc_get
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:872
rb_enc_check
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:891
rb_str_length
VALUE rb_str_length(VALUE)
Definition: string.c:1843
rb_reg_region_copy
int rb_reg_region_copy(struct re_registers *, const struct re_registers *)
Definition: re.c:946
rb_reg_prepare_re
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Definition: re.c:1491
rb_eIndexError
VALUE rb_eIndexError
Definition: error.c:926
rb_str_cat2
#define rb_str_cat2
Definition: intern.h:912
Qundef
#define Qundef
Definition: ruby.h:470
rb_define_singleton_method
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1755
rb_const_set
void rb_const_set(VALUE, ID, VALUE)
Definition: variable.c:2756
rb_define_method
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1551
rb_long2int
#define rb_long2int(n)
Definition: ruby.h:350
INT2NUM
#define INT2NUM(x)
Definition: ruby.h:1609
ptr
struct RIMemo * ptr
Definition: debug.c:65
stdbool.h
Qfalse
#define Qfalse
Definition: ruby.h:467
onig_name_to_backref_number
ONIG_EXTERN int onig_name_to_backref_number(OnigRegex reg, const OnigUChar *name, const OnigUChar *name_end, const OnigRegion *region)
onig_search
ONIG_EXTERN OnigPosition onig_search(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *start, const OnigUChar *range, OnigRegion *region, OnigOptionType option)
onig_region_init
ONIG_EXTERN void onig_region_init(OnigRegion *region)
Definition: regexec.c:320
PRIsVALUE
#define PRIsVALUE
Definition: ruby.h:166
S_LEN
#define S_LEN(s)
Definition: strscan.c:63
onig_match
ONIG_EXTERN OnigPosition onig_match(OnigRegex, const OnigUChar *str, const OnigUChar *end, const OnigUChar *at, OnigRegion *region, OnigOptionType option)
ruby.h
T_SYMBOL
#define T_SYMBOL
Definition: ruby.h:540
strscanner::regex
VALUE regex
Definition: strscan.c:52
onig_region_free
ONIG_EXTERN void onig_region_free(OnigRegion *region, int free_self)
Definition: regexec.c:343
rb_raise
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2671
rb_eRangeError
VALUE rb_eRangeError
Definition: error.c:928
rb_obj_class
VALUE rb_obj_class(VALUE)
Equivalent to Object#class in Ruby.
Definition: object.c:217
onig_free
ONIG_EXTERN void onig_free(OnigRegex)
rb_alias
void rb_alias(VALUE, ID, ID)
Definition: vm_method.c:1581
rb_str_dump
VALUE rb_str_dump(VALUE)
Definition: string.c:6042
strscanner::curr
long curr
Definition: strscan.c:46
klass
VALUE klass
Definition: rb_mjit_min_header-2.7.2.h:13302
GET_SCANNER
#define GET_SCANNER(obj, var)
Definition: strscan.c:70
strscanner::fixed_anchor_p
bool fixed_anchor_p
Definition: strscan.c:55
OnigEncodingTypeST
Definition: onigmo.h:160
RREGEXP
#define RREGEXP(obj)
Definition: ruby.h:1272
T_REGEXP
#define T_REGEXP
Definition: ruby.h:529
rb_ary_push
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:1195
rb_obj_freeze
VALUE rb_obj_freeze(VALUE)
Make the object unmodifiable.
Definition: object.c:1080
CLEAR_MATCH_STATUS
#define CLEAR_MATCH_STATUS(s)
Definition: strscan.c:60
onig_region_memsize
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5669
Init_strscan
void Init_strscan(void)
Definition: strscan.c:1571
S_RESTLEN
#define S_RESTLEN(s)
Definition: strscan.c:66
rb_enc_copy
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:990
size
int size
Definition: encoding.c:58
memcmp
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
strscanner::str
VALUE str
Definition: strscan.c:42
rb_scan_args
#define rb_scan_args(argc, argvp, fmt,...)
Definition: rb_mjit_min_header-2.7.2.h:6407
rb_check_hash_type
VALUE rb_check_hash_type(VALUE hash)
Definition: hash.c:1852
rb_cObject
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:2010
rb_ary_new2
#define rb_ary_new2
Definition: intern.h:103
obj
const VALUE VALUE obj
Definition: rb_mjit_min_header-2.7.2.h:5777
re_pattern_buffer
Definition: onigmo.h:755
rb_str_append
VALUE rb_str_append(VALUE, VALUE)
Definition: string.c:2965
UChar
#define UChar
Definition: onigmo.h:76
argv
char ** argv
Definition: ruby.c:223
f
#define f
CURPTR
#define CURPTR(s)
Definition: strscan.c:65
UNREACHABLE
#define UNREACHABLE
Definition: ruby.h:63
rb_sprintf
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1197
StringValue
use StringValue() instead")))
str
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
strscanner::flags
unsigned long flags
Definition: strscan.c:38
RUBY_TYPED_FREE_IMMEDIATELY
#define RUBY_TYPED_FREE_IMMEDIATELY
Definition: ruby.h:1207
INSPECT_LENGTH
#define INSPECT_LENGTH
Definition: strscan.c:1364
NIL_P
#define NIL_P(v)
Definition: ruby.h:482
rb_funcall
#define rb_funcall(recv, mid, argc,...)
Definition: rb_mjit_min_header-2.7.2.h:6620
rb_enc_raise
void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt,...)
Definition: error.c:2652
argc
int argc
Definition: ruby.c:222
re_registers::beg
OnigPosition * beg
Definition: onigmo.h:719
re_registers
Definition: onigmo.h:716
strscanner
Definition: strscan.c:36
rb_data_type_struct
Definition: ruby.h:1148
MATCHED_P
#define MATCHED_P(s)
Definition: strscan.c:58
strscanner::prev
long prev
Definition: strscan.c:45
rb_gc_mark
void rb_gc_mark(VALUE ptr)
Definition: gc.c:5215
rb_check_typeddata
void * rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:891
_
#define _(args)
Definition: dln.h:28
Qtrue
#define Qtrue
Definition: ruby.h:468
re_registers::num_regs
int num_regs
Definition: onigmo.h:718
v
int VALUE v
Definition: rb_mjit_min_header-2.7.2.h:12380
len
uint8_t len
Definition: escape.c:17
rb_memerror
void rb_memerror(void)
Definition: gc.c:9598
S_PBEG
#define S_PBEG(s)
Definition: strscan.c:62
T_STRING
#define T_STRING
Definition: ruby.h:528
rb_define_class_under
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition: class.c:698
rb_sym2str
VALUE rb_sym2str(VALUE)
Definition: symbol.c:784
rb_str_new_cstr
#define rb_str_new_cstr(str)
Definition: rb_mjit_min_header-2.7.2.h:6152
NUM2INT
#define NUM2INT(x)
Definition: ruby.h:715
Qnil
#define Qnil
Definition: ruby.h:469
rb_str_new
#define rb_str_new(str, len)
Definition: rb_mjit_min_header-2.7.2.h:6151
S_PEND
#define S_PEND(s)
Definition: strscan.c:64
rb_eStandardError
VALUE rb_eStandardError
Definition: error.c:921
RB_GC_GUARD
#define RB_GC_GUARD(v)
Definition: ruby.h:585
onig_region_clear
ONIG_EXTERN void onig_region_clear(OnigRegion *region)
Definition: regexec.c:235
RSTRING_LEN
#define RSTRING_LEN(str)
Definition: ruby.h:1005
rb_define_private_method
void rb_define_private_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1569
rb_str_cat
VALUE rb_str_cat(VALUE, const char *, long)
Definition: string.c:2812
RREGEXP_PTR
#define RREGEXP_PTR(r)
Definition: ruby.h:1118
rb_define_alloc_func
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
RTEST
#define RTEST(v)
Definition: ruby.h:481
name
const char * name
Definition: nkf.c:208
n
const char size_t n
Definition: rb_mjit_min_header-2.7.2.h:5491