Ruby  2.1.4p265(2014-10-27revision48166)
re.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  re.c -
4 
5  $Author: nagachika $
6  created at: Mon Aug 9 18:24:49 JST 1993
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/ruby.h"
13 #include "ruby/re.h"
14 #include "ruby/encoding.h"
15 #include "ruby/util.h"
16 #include "internal.h"
17 #include "regint.h"
18 #include <ctype.h>
19 
21 
23 #define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
24 
25 #define BEG(no) (regs->beg[(no)])
26 #define END(no) (regs->end[(no)])
27 
28 #if 'a' == 97 /* it's ascii */
29 static const char casetable[] = {
30  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
31  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
32  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
33  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
34  /* ' ' '!' '"' '#' '$' '%' '&' ''' */
35  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
36  /* '(' ')' '*' '+' ',' '-' '.' '/' */
37  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
38  /* '0' '1' '2' '3' '4' '5' '6' '7' */
39  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
40  /* '8' '9' ':' ';' '<' '=' '>' '?' */
41  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
42  /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
43  '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
44  /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
45  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
46  /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
47  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
48  /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
49  '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
50  /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
51  '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
52  /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
53  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
54  /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
55  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
56  /* 'x' 'y' 'z' '{' '|' '}' '~' */
57  '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
58  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
59  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
60  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
61  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
62  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
63  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
64  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
65  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
66  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
67  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
68  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
69  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
70  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
71  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
72  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
73  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
74 };
75 #else
76 # error >>> "You lose. You will need a translation table for your character set." <<<
77 #endif
78 
79 int
80 rb_memcicmp(const void *x, const void *y, long len)
81 {
82  const unsigned char *p1 = x, *p2 = y;
83  int tmp;
84 
85  while (len--) {
86  if ((tmp = casetable[(unsigned)*p1++] - casetable[(unsigned)*p2++]))
87  return tmp;
88  }
89  return 0;
90 }
91 
92 #undef rb_memcmp
93 
94 int
95 rb_memcmp(const void *p1, const void *p2, long len)
96 {
97  return memcmp(p1, p2, len);
98 }
99 
100 #ifdef HAVE_MEMMEM
101 static inline long
102 rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
103 {
104  const unsigned char *y;
105 
106  if (y = memmem(ys, n, xs, m))
107  return y - ys;
108  else
109  return -1;
110 }
111 #else
112 static inline long
113 rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
114 {
115  const unsigned char *x = xs, *xe = xs + m;
116  const unsigned char *y = ys, *ye = ys + n;
117 #ifndef VALUE_MAX
118 # if SIZEOF_VALUE == 8
119 # define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL
120 # elif SIZEOF_VALUE == 4
121 # define VALUE_MAX 0xFFFFFFFFUL
122 # endif
123 #endif
124  VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
125 
126  if (m > SIZEOF_VALUE)
127  rb_bug("!!too long pattern string!!");
128 
129  if (!(y = memchr(y, *x, n - m + 1)))
130  return -1;
131 
132  /* Prepare hash value */
133  for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
134  hx <<= CHAR_BIT;
135  hy <<= CHAR_BIT;
136  hx |= *x;
137  hy |= *y;
138  }
139  /* Searching */
140  while (hx != hy) {
141  if (y == ye)
142  return -1;
143  hy <<= CHAR_BIT;
144  hy |= *y;
145  hy &= mask;
146  y++;
147  }
148  return y - ys - m;
149 }
150 #endif
151 
152 static inline long
153 rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
154 {
155  const unsigned char *x = xs, *xe = xs + m;
156  const unsigned char *y = ys;
157  VALUE i, qstable[256];
158 
159  /* Preprocessing */
160  for (i = 0; i < 256; ++i)
161  qstable[i] = m + 1;
162  for (; x < xe; ++x)
163  qstable[*x] = xe - x;
164  /* Searching */
165  for (; y + m <= ys + n; y += *(qstable + y[m])) {
166  if (*xs == *y && memcmp(xs, y, m) == 0)
167  return y - ys;
168  }
169  return -1;
170 }
171 
172 static inline unsigned int
173 rb_memsearch_qs_utf8_hash(const unsigned char *x)
174 {
175  register const unsigned int mix = 8353;
176  register unsigned int h = *x;
177  if (h < 0xC0) {
178  return h + 256;
179  }
180  else if (h < 0xE0) {
181  h *= mix;
182  h += x[1];
183  }
184  else if (h < 0xF0) {
185  h *= mix;
186  h += x[1];
187  h *= mix;
188  h += x[2];
189  }
190  else if (h < 0xF5) {
191  h *= mix;
192  h += x[1];
193  h *= mix;
194  h += x[2];
195  h *= mix;
196  h += x[3];
197  }
198  else {
199  return h + 256;
200  }
201  return (unsigned char)h;
202 }
203 
204 static inline long
205 rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
206 {
207  const unsigned char *x = xs, *xe = xs + m;
208  const unsigned char *y = ys;
209  VALUE i, qstable[512];
210 
211  /* Preprocessing */
212  for (i = 0; i < 512; ++i) {
213  qstable[i] = m + 1;
214  }
215  for (; x < xe; ++x) {
216  qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
217  }
218  /* Searching */
219  for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
220  if (*xs == *y && memcmp(xs, y, m) == 0)
221  return y - ys;
222  }
223  return -1;
224 }
225 
226 long
227 rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
228 {
229  const unsigned char *x = x0, *y = y0;
230 
231  if (m > n) return -1;
232  else if (m == n) {
233  return memcmp(x0, y0, m) == 0 ? 0 : -1;
234  }
235  else if (m < 1) {
236  return 0;
237  }
238  else if (m == 1) {
239  const unsigned char *ys = memchr(y, *x, n);
240 
241  if (ys)
242  return ys - y;
243  else
244  return -1;
245  }
246  else if (m <= SIZEOF_VALUE) {
247  return rb_memsearch_ss(x0, m, y0, n);
248  }
249  else if (enc == rb_utf8_encoding()){
250  return rb_memsearch_qs_utf8(x0, m, y0, n);
251  }
252  else {
253  return rb_memsearch_qs(x0, m, y0, n);
254  }
255 }
256 
257 #define REG_LITERAL FL_USER5
258 #define REG_ENCODING_NONE FL_USER6
259 
260 #define KCODE_FIXED FL_USER4
261 
262 #define ARG_REG_OPTION_MASK \
263  (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
264 #define ARG_ENCODING_FIXED 16
265 #define ARG_ENCODING_NONE 32
266 
267 static int
269 {
270  int val;
271 
272  switch (c) {
273  case 'i':
275  break;
276  case 'x':
277  val = ONIG_OPTION_EXTEND;
278  break;
279  case 'm':
280  val = ONIG_OPTION_MULTILINE;
281  break;
282  default:
283  val = 0;
284  break;
285  }
286  return val;
287 }
288 
289 static char *
290 option_to_str(char str[4], int options)
291 {
292  char *p = str;
293  if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
294  if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
295  if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
296  *p = 0;
297  return str;
298 }
299 
300 extern int
301 rb_char_to_option_kcode(int c, int *option, int *kcode)
302 {
303  *option = 0;
304 
305  switch (c) {
306  case 'n':
307  *kcode = rb_ascii8bit_encindex();
308  return (*option = ARG_ENCODING_NONE);
309  case 'e':
310  *kcode = ENCINDEX_EUC_JP;
311  break;
312  case 's':
313  *kcode = ENCINDEX_Windows_31J;
314  break;
315  case 'u':
316  *kcode = rb_utf8_encindex();
317  break;
318  default:
319  *kcode = -1;
320  return (*option = char_to_option(c));
321  }
322  *option = ARG_ENCODING_FIXED;
323  return 1;
324 }
325 
326 static void
328 {
329  if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
330  rb_raise(rb_eTypeError, "uninitialized Regexp");
331  }
332 }
333 
334 static void
335 rb_reg_expr_str(VALUE str, const char *s, long len,
336  rb_encoding *enc, rb_encoding *resenc)
337 {
338  const char *p, *pend;
339  int cr = ENC_CODERANGE_UNKNOWN;
340  int need_escape = 0;
341  int c, clen;
342 
343  p = s; pend = p + len;
344  rb_str_coderange_scan_restartable(p, pend, enc, &cr);
345  if (rb_enc_asciicompat(enc) &&
346  (cr == ENC_CODERANGE_VALID || cr == ENC_CODERANGE_7BIT)) {
347  while (p < pend) {
348  c = rb_enc_ascget(p, pend, &clen, enc);
349  if (c == -1) {
350  if (enc == resenc) {
351  p += mbclen(p, pend, enc);
352  }
353  else {
354  need_escape = 1;
355  break;
356  }
357  }
358  else if (c != '/' && rb_enc_isprint(c, enc)) {
359  p += clen;
360  }
361  else {
362  need_escape = 1;
363  break;
364  }
365  }
366  }
367  else {
368  need_escape = 1;
369  }
370 
371  if (!need_escape) {
372  rb_str_buf_cat(str, s, len);
373  }
374  else {
375  int unicode_p = rb_enc_unicode_p(enc);
376  p = s;
377  while (p<pend) {
378  c = rb_enc_ascget(p, pend, &clen, enc);
379  if (c == '\\' && p+clen < pend) {
380  int n = clen + mbclen(p+clen, pend, enc);
381  rb_str_buf_cat(str, p, n);
382  p += n;
383  continue;
384  }
385  else if (c == '/') {
386  char c = '\\';
387  rb_str_buf_cat(str, &c, 1);
388  rb_str_buf_cat(str, p, clen);
389  }
390  else if (c == -1) {
391  clen = rb_enc_precise_mbclen(p, pend, enc);
392  if (!MBCLEN_CHARFOUND_P(clen)) {
393  c = (unsigned char)*p;
394  clen = 1;
395  goto hex;
396  }
397  if (resenc) {
398  unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc);
399  rb_str_buf_cat_escaped_char(str, c, unicode_p);
400  }
401  else {
402  clen = MBCLEN_CHARFOUND_LEN(clen);
403  rb_str_buf_cat(str, p, clen);
404  }
405  }
406  else if (rb_enc_isprint(c, enc)) {
407  rb_str_buf_cat(str, p, clen);
408  }
409  else if (!rb_enc_isspace(c, enc)) {
410  char b[8];
411 
412  hex:
413  snprintf(b, sizeof(b), "\\x%02X", c);
414  rb_str_buf_cat(str, b, 4);
415  }
416  else {
417  rb_str_buf_cat(str, p, clen);
418  }
419  p += clen;
420  }
421  }
422 }
423 
424 static VALUE
425 rb_reg_desc(const char *s, long len, VALUE re)
426 {
427  rb_encoding *enc = rb_enc_get(re);
428  VALUE str = rb_str_buf_new2("/");
430  if (resenc == NULL) resenc = rb_default_external_encoding();
431 
432  if (re && rb_enc_asciicompat(enc)) {
433  rb_enc_copy(str, re);
434  }
435  else {
437  }
438  rb_reg_expr_str(str, s, len, enc, resenc);
439  rb_str_buf_cat2(str, "/");
440  if (re) {
441  char opts[4];
442  rb_reg_check(re);
443  if (*option_to_str(opts, RREGEXP(re)->ptr->options))
444  rb_str_buf_cat2(str, opts);
445  if (RBASIC(re)->flags & REG_ENCODING_NONE)
446  rb_str_buf_cat2(str, "n");
447  }
448  OBJ_INFECT(str, re);
449  return str;
450 }
451 
452 
453 /*
454  * call-seq:
455  * rxp.source -> str
456  *
457  * Returns the original string of the pattern.
458  *
459  * /ab+c/ix.source #=> "ab+c"
460  *
461  * Note that escape sequences are retained as is.
462  *
463  * /\x20\+/.source #=> "\\x20\\+"
464  *
465  */
466 
467 static VALUE
469 {
470  VALUE str;
471 
472  rb_reg_check(re);
474  if (OBJ_TAINTED(re)) OBJ_TAINT(str);
475  return str;
476 }
477 
478 /*
479  * call-seq:
480  * rxp.inspect -> string
481  *
482  * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly,
483  * <code>#inspect</code> actually produces the more natural version of
484  * the string than <code>#to_s</code>.
485  *
486  * /ab+c/ix.inspect #=> "/ab+c/ix"
487  *
488  */
489 
490 static VALUE
492 {
493  if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
494  return rb_any_to_s(re);
495  }
496  return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
497 }
498 
499 
500 /*
501  * call-seq:
502  * rxp.to_s -> str
503  *
504  * Returns a string containing the regular expression and its options (using the
505  * <code>(?opts:source)</code> notation. This string can be fed back in to
506  * <code>Regexp::new</code> to a regular expression with the same semantics as
507  * the original. (However, <code>Regexp#==</code> may not return true when
508  * comparing the two, as the source of the regular expression itself may
509  * differ, as the example shows). <code>Regexp#inspect</code> produces a
510  * generally more readable version of <i>rxp</i>.
511  *
512  * r1 = /ab+c/ix #=> /ab+c/ix
513  * s1 = r1.to_s #=> "(?ix-m:ab+c)"
514  * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
515  * r1 == r2 #=> false
516  * r1.source #=> "ab+c"
517  * r2.source #=> "(?ix-m:ab+c)"
518  */
519 
520 static VALUE
522 {
523  int options, opt;
525  long len;
526  const UChar* ptr;
527  VALUE str = rb_str_buf_new2("(?");
528  char optbuf[5];
529  rb_encoding *enc = rb_enc_get(re);
530 
531  rb_reg_check(re);
532 
533  rb_enc_copy(str, re);
534  options = RREGEXP(re)->ptr->options;
535  ptr = (UChar*)RREGEXP_SRC_PTR(re);
536  len = RREGEXP_SRC_LEN(re);
537  again:
538  if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
539  int err = 1;
540  ptr += 2;
541  if ((len -= 2) > 0) {
542  do {
543  opt = char_to_option((int )*ptr);
544  if (opt != 0) {
545  options |= opt;
546  }
547  else {
548  break;
549  }
550  ++ptr;
551  } while (--len > 0);
552  }
553  if (len > 1 && *ptr == '-') {
554  ++ptr;
555  --len;
556  do {
557  opt = char_to_option((int )*ptr);
558  if (opt != 0) {
559  options &= ~opt;
560  }
561  else {
562  break;
563  }
564  ++ptr;
565  } while (--len > 0);
566  }
567  if (*ptr == ')') {
568  --len;
569  ++ptr;
570  goto again;
571  }
572  if (*ptr == ':' && ptr[len-1] == ')') {
573  Regexp *rp;
574  VALUE verbose = ruby_verbose;
576 
577  ++ptr;
578  len -= 2;
579  err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
580  enc, OnigDefaultSyntax, NULL);
581  onig_free(rp);
582  ruby_verbose = verbose;
583  }
584  if (err) {
585  options = RREGEXP(re)->ptr->options;
586  ptr = (UChar*)RREGEXP_SRC_PTR(re);
587  len = RREGEXP_SRC_LEN(re);
588  }
589  }
590 
591  if (*option_to_str(optbuf, options)) rb_str_buf_cat2(str, optbuf);
592 
593  if ((options & embeddable) != embeddable) {
594  optbuf[0] = '-';
595  option_to_str(optbuf + 1, ~options);
596  rb_str_buf_cat2(str, optbuf);
597  }
598 
599  rb_str_buf_cat2(str, ":");
600  if (rb_enc_asciicompat(enc)) {
601  rb_reg_expr_str(str, (char*)ptr, len, enc, NULL);
602  rb_str_buf_cat2(str, ")");
603  }
604  else {
605  const char *s, *e;
606  char *paren;
607  ptrdiff_t n;
608  rb_str_buf_cat2(str, ")");
610  str = rb_str_encode(str, rb_enc_from_encoding(enc), 0, Qnil);
611 
612  /* backup encoded ")" to paren */
613  s = RSTRING_PTR(str);
614  e = RSTRING_END(str);
615  s = rb_enc_left_char_head(s, e-1, e, enc);
616  n = e - s;
617  paren = ALLOCA_N(char, n);
618  memcpy(paren, s, n);
619  rb_str_resize(str, RSTRING_LEN(str) - n);
620 
621  rb_reg_expr_str(str, (char*)ptr, len, enc, NULL);
622  rb_str_buf_cat(str, paren, n);
623  }
624  rb_enc_copy(str, re);
625 
626  OBJ_INFECT(str, re);
627  return str;
628 }
629 
630 static void
631 rb_reg_raise(const char *s, long len, const char *err, VALUE re)
632 {
633  volatile VALUE desc = rb_reg_desc(s, len, re);
634 
635  rb_raise(rb_eRegexpError, "%s: %"PRIsVALUE, err, desc);
636 }
637 
638 static VALUE
639 rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, const char *err)
640 {
641  char opts[6];
642  VALUE desc = rb_str_buf_new2(err);
644  if (resenc == NULL) resenc = rb_default_external_encoding();
645 
646  rb_enc_associate(desc, enc);
647  rb_str_buf_cat2(desc, ": /");
648  rb_reg_expr_str(desc, s, len, enc, resenc);
649  opts[0] = '/';
650  option_to_str(opts + 1, options);
651  rb_str_buf_cat2(desc, opts);
652  return rb_exc_new3(rb_eRegexpError, desc);
653 }
654 
655 static void
656 rb_enc_reg_raise(const char *s, long len, rb_encoding *enc, int options, const char *err)
657 {
658  rb_exc_raise(rb_enc_reg_error_desc(s, len, enc, options, err));
659 }
660 
661 static VALUE
662 rb_reg_error_desc(VALUE str, int options, const char *err)
663 {
665  rb_enc_get(str), options, err);
666 }
667 
668 static void
669 rb_reg_raise_str(VALUE str, int options, const char *err)
670 {
671  rb_exc_raise(rb_reg_error_desc(str, options, err));
672 }
673 
674 
675 /*
676  * call-seq:
677  * rxp.casefold? -> true or false
678  *
679  * Returns the value of the case-insensitive flag.
680  *
681  * /a/.casefold? #=> false
682  * /a/i.casefold? #=> true
683  * /(?i:a)/.casefold? #=> false
684  */
685 
686 static VALUE
688 {
689  rb_reg_check(re);
690  if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return Qtrue;
691  return Qfalse;
692 }
693 
694 
695 /*
696  * call-seq:
697  * rxp.options -> fixnum
698  *
699  * Returns the set of bits corresponding to the options used when creating this
700  * Regexp (see <code>Regexp::new</code> for details. Note that additional bits
701  * may be set in the returned options: these are used internally by the regular
702  * expression code. These extra bits are ignored if the options are passed to
703  * <code>Regexp::new</code>.
704  *
705  * Regexp::IGNORECASE #=> 1
706  * Regexp::EXTENDED #=> 2
707  * Regexp::MULTILINE #=> 4
708  *
709  * /cat/.options #=> 0
710  * /cat/ix.options #=> 3
711  * Regexp.new('cat', true).options #=> 1
712  * /\xa1\xa2/e.options #=> 16
713  *
714  * r = /cat/ix
715  * Regexp.new(r.source, r.options) #=> /cat/ix
716  */
717 
718 static VALUE
720 {
721  int options = rb_reg_options(re);
722  return INT2NUM(options);
723 }
724 
725 static int
726 reg_names_iter(const OnigUChar *name, const OnigUChar *name_end,
727  int back_num, int *back_refs, OnigRegex regex, void *arg)
728 {
729  VALUE ary = (VALUE)arg;
730  rb_ary_push(ary, rb_str_new((const char *)name, name_end-name));
731  return 0;
732 }
733 
734 /*
735  * call-seq:
736  * rxp.names -> [name1, name2, ...]
737  *
738  * Returns a list of names of captures as an array of strings.
739  *
740  * /(?<foo>.)(?<bar>.)(?<baz>.)/.names
741  * #=> ["foo", "bar", "baz"]
742  *
743  * /(?<foo>.)(?<foo>.)/.names
744  * #=> ["foo"]
745  *
746  * /(.)(.)/.names
747  * #=> []
748  */
749 
750 static VALUE
752 {
753  VALUE ary = rb_ary_new();
754  rb_reg_check(re);
755  onig_foreach_name(RREGEXP(re)->ptr, reg_names_iter, (void*)ary);
756  return ary;
757 }
758 
759 static int
761  int back_num, int *back_refs, OnigRegex regex, void *arg)
762 {
763  VALUE hash = (VALUE)arg;
764  VALUE ary = rb_ary_new2(back_num);
765  int i;
766 
767  for (i = 0; i < back_num; i++)
768  rb_ary_store(ary, i, INT2NUM(back_refs[i]));
769 
770  rb_hash_aset(hash, rb_str_new((const char*)name, name_end-name),ary);
771 
772  return 0;
773 }
774 
775 /*
776  * call-seq:
777  * rxp.named_captures -> hash
778  *
779  * Returns a hash representing information about named captures of <i>rxp</i>.
780  *
781  * A key of the hash is a name of the named captures.
782  * A value of the hash is an array which is list of indexes of corresponding
783  * named captures.
784  *
785  * /(?<foo>.)(?<bar>.)/.named_captures
786  * #=> {"foo"=>[1], "bar"=>[2]}
787  *
788  * /(?<foo>.)(?<foo>.)/.named_captures
789  * #=> {"foo"=>[1, 2]}
790  *
791  * If there are no named captures, an empty hash is returned.
792  *
793  * /(.)(.)/.named_captures
794  * #=> {}
795  */
796 
797 static VALUE
799 {
800  VALUE hash = rb_hash_new();
801  rb_reg_check(re);
802  onig_foreach_name(RREGEXP(re)->ptr, reg_named_captures_iter, (void*)hash);
803  return hash;
804 }
805 
806 static int
807 onig_new_with_source(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
808  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
809  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
810 {
811  int r;
812 
813  *reg = (regex_t* )malloc(sizeof(regex_t));
814  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
815 
816  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
817  if (r) goto err;
818 
819  r = onig_compile(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
820  if (r) {
821  err:
822  onig_free(*reg);
823  *reg = NULL;
824  }
825  return r;
826 }
827 
828 static Regexp*
829 make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_buffer err,
830  const char *sourcefile, int sourceline)
831 {
832  Regexp *rp;
833  int r;
834  OnigErrorInfo einfo;
835 
836  /* Handle escaped characters first. */
837 
838  /* Build a copy of the string (in dest) with the
839  escaped characters translated, and generate the regex
840  from that.
841  */
842 
843  r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s + len), flags,
844  enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
845  if (r) {
846  onig_error_code_to_str((UChar*)err, r, &einfo);
847  return 0;
848  }
849  return rp;
850 }
851 
852 
853 /*
854  * Document-class: MatchData
855  *
856  * <code>MatchData</code> is the type of the special variable <code>$~</code>,
857  * and is the type of the object returned by <code>Regexp#match</code> and
858  * <code>Regexp.last_match</code>. It encapsulates all the results of a pattern
859  * match, results normally accessed through the special variables
860  * <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>,
861  * <code>$2</code>, and so on.
862  *
863  */
864 
866 
867 static VALUE
869 {
870  NEWOBJ_OF(match, struct RMatch, klass, T_MATCH);
871 
872  match->str = 0;
873  match->rmatch = 0;
874  match->regexp = 0;
875  match->rmatch = ALLOC(struct rmatch);
876  MEMZERO(match->rmatch, struct rmatch, 1);
877 
878  return (VALUE)match;
879 }
880 
881 typedef struct {
882  long byte_pos;
883  long char_pos;
884 } pair_t;
885 
886 static int
887 pair_byte_cmp(const void *pair1, const void *pair2)
888 {
889  long diff = ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos;
890 #if SIZEOF_LONG > SIZEOF_INT
891  return diff ? diff > 0 ? 1 : -1 : 0;
892 #else
893  return (int)diff;
894 #endif
895 }
896 
897 static void
899 {
900  struct rmatch *rm = RMATCH(match)->rmatch;
901  struct re_registers *regs;
902  int i, num_regs, num_pos;
903  long c;
904  char *s, *p, *q;
905  rb_encoding *enc;
906  pair_t *pairs;
907 
908  if (rm->char_offset_updated)
909  return;
910 
911  regs = &rm->regs;
912  num_regs = rm->regs.num_regs;
913 
914  if (rm->char_offset_num_allocated < num_regs) {
915  REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs);
917  }
918 
919  enc = rb_enc_get(RMATCH(match)->str);
920  if (rb_enc_mbmaxlen(enc) == 1) {
921  for (i = 0; i < num_regs; i++) {
922  rm->char_offset[i].beg = BEG(i);
923  rm->char_offset[i].end = END(i);
924  }
925  rm->char_offset_updated = 1;
926  return;
927  }
928 
929  pairs = ALLOCA_N(pair_t, num_regs*2);
930  num_pos = 0;
931  for (i = 0; i < num_regs; i++) {
932  if (BEG(i) < 0)
933  continue;
934  pairs[num_pos++].byte_pos = BEG(i);
935  pairs[num_pos++].byte_pos = END(i);
936  }
937  qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
938 
939  s = p = RSTRING_PTR(RMATCH(match)->str);
940  c = 0;
941  for (i = 0; i < num_pos; i++) {
942  q = s + pairs[i].byte_pos;
943  c += rb_enc_strlen(p, q, enc);
944  pairs[i].char_pos = c;
945  p = q;
946  }
947 
948  for (i = 0; i < num_regs; i++) {
949  pair_t key, *found;
950  if (BEG(i) < 0) {
951  rm->char_offset[i].beg = -1;
952  rm->char_offset[i].end = -1;
953  continue;
954  }
955 
956  key.byte_pos = BEG(i);
957  found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
958  rm->char_offset[i].beg = found->char_pos;
959 
960  key.byte_pos = END(i);
961  found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
962  rm->char_offset[i].end = found->char_pos;
963  }
964 
965  rm->char_offset_updated = 1;
966 }
967 
968 static void
970 {
971  if (!RMATCH(match)->regexp) {
972  rb_raise(rb_eTypeError, "uninitialized Match");
973  }
974 }
975 
976 /* :nodoc: */
977 static VALUE
979 {
980  struct rmatch *rm;
981 
982  if (!OBJ_INIT_COPY(obj, orig)) return obj;
983 
984  RMATCH(obj)->str = RMATCH(orig)->str;
985  RMATCH(obj)->regexp = RMATCH(orig)->regexp;
986 
987  rm = RMATCH(obj)->rmatch;
988  onig_region_copy(&rm->regs, RMATCH_REGS(orig));
989 
990  if (!RMATCH(orig)->rmatch->char_offset_updated) {
991  rm->char_offset_updated = 0;
992  }
993  else {
994  if (rm->char_offset_num_allocated < rm->regs.num_regs) {
995  REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
997  }
999  struct rmatch_offset, rm->regs.num_regs);
1000  rm->char_offset_updated = 1;
1001  }
1002 
1003  return obj;
1004 }
1005 
1006 
1007 /*
1008  * call-seq:
1009  * mtch.regexp -> regexp
1010  *
1011  * Returns the regexp.
1012  *
1013  * m = /a.*b/.match("abc")
1014  * m.regexp #=> /a.*b/
1015  */
1016 
1017 static VALUE
1019 {
1020  match_check(match);
1021  return RMATCH(match)->regexp;
1022 }
1023 
1024 /*
1025  * call-seq:
1026  * mtch.names -> [name1, name2, ...]
1027  *
1028  * Returns a list of names of captures as an array of strings.
1029  * It is same as mtch.regexp.names.
1030  *
1031  * /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names
1032  * #=> ["foo", "bar", "baz"]
1033  *
1034  * m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil>
1035  * m.names #=> ["x", "y"]
1036  */
1037 
1038 static VALUE
1040 {
1041  match_check(match);
1042  return rb_reg_names(RMATCH(match)->regexp);
1043 }
1044 
1045 /*
1046  * call-seq:
1047  * mtch.length -> integer
1048  * mtch.size -> integer
1049  *
1050  * Returns the number of elements in the match array.
1051  *
1052  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1053  * m.length #=> 5
1054  * m.size #=> 5
1055  */
1056 
1057 static VALUE
1059 {
1060  match_check(match);
1061  return INT2FIX(RMATCH_REGS(match)->num_regs);
1062 }
1063 
1064 static int
1066 {
1067  const char *name;
1068  int num;
1069 
1070  struct re_registers *regs = RMATCH_REGS(match);
1071  VALUE regexp = RMATCH(match)->regexp;
1072 
1073  match_check(match);
1074  switch (TYPE(backref)) {
1075  default:
1076  return NUM2INT(backref);
1077 
1078  case T_SYMBOL:
1079  name = rb_id2name(SYM2ID(backref));
1080  break;
1081 
1082  case T_STRING:
1083  name = StringValueCStr(backref);
1084  break;
1085  }
1086 
1087  num = onig_name_to_backref_number(RREGEXP(regexp)->ptr,
1088  (const unsigned char*)name,
1089  (const unsigned char*)name + strlen(name),
1090  regs);
1091 
1092  if (num < 1) {
1093  rb_raise(rb_eIndexError, "undefined group name reference: %s", name);
1094  }
1095 
1096  return num;
1097 }
1098 
1099 int
1101 {
1102  return match_backref_number(match, backref);
1103 }
1104 
1105 /*
1106  * call-seq:
1107  * mtch.offset(n) -> array
1108  *
1109  * Returns a two-element array containing the beginning and ending offsets of
1110  * the <em>n</em>th match.
1111  * <em>n</em> can be a string or symbol to reference a named capture.
1112  *
1113  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1114  * m.offset(0) #=> [1, 7]
1115  * m.offset(4) #=> [6, 7]
1116  *
1117  * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
1118  * p m.offset(:foo) #=> [0, 1]
1119  * p m.offset(:bar) #=> [2, 3]
1120  *
1121  */
1122 
1123 static VALUE
1125 {
1126  int i = match_backref_number(match, n);
1127  struct re_registers *regs = RMATCH_REGS(match);
1128 
1129  match_check(match);
1130  if (i < 0 || regs->num_regs <= i)
1131  rb_raise(rb_eIndexError, "index %d out of matches", i);
1132 
1133  if (BEG(i) < 0)
1134  return rb_assoc_new(Qnil, Qnil);
1135 
1136  update_char_offset(match);
1137  return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg),
1138  INT2FIX(RMATCH(match)->rmatch->char_offset[i].end));
1139 }
1140 
1141 
1142 /*
1143  * call-seq:
1144  * mtch.begin(n) -> integer
1145  *
1146  * Returns the offset of the start of the <em>n</em>th element of the match
1147  * array in the string.
1148  * <em>n</em> can be a string or symbol to reference a named capture.
1149  *
1150  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1151  * m.begin(0) #=> 1
1152  * m.begin(2) #=> 2
1153  *
1154  * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
1155  * p m.begin(:foo) #=> 0
1156  * p m.begin(:bar) #=> 2
1157  */
1158 
1159 static VALUE
1161 {
1162  int i = match_backref_number(match, n);
1163  struct re_registers *regs = RMATCH_REGS(match);
1164 
1165  match_check(match);
1166  if (i < 0 || regs->num_regs <= i)
1167  rb_raise(rb_eIndexError, "index %d out of matches", i);
1168 
1169  if (BEG(i) < 0)
1170  return Qnil;
1171 
1172  update_char_offset(match);
1173  return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg);
1174 }
1175 
1176 
1177 /*
1178  * call-seq:
1179  * mtch.end(n) -> integer
1180  *
1181  * Returns the offset of the character immediately following the end of the
1182  * <em>n</em>th element of the match array in the string.
1183  * <em>n</em> can be a string or symbol to reference a named capture.
1184  *
1185  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1186  * m.end(0) #=> 7
1187  * m.end(2) #=> 3
1188  *
1189  * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
1190  * p m.end(:foo) #=> 1
1191  * p m.end(:bar) #=> 3
1192  */
1193 
1194 static VALUE
1196 {
1197  int i = match_backref_number(match, n);
1198  struct re_registers *regs = RMATCH_REGS(match);
1199 
1200  match_check(match);
1201  if (i < 0 || regs->num_regs <= i)
1202  rb_raise(rb_eIndexError, "index %d out of matches", i);
1203 
1204  if (BEG(i) < 0)
1205  return Qnil;
1206 
1207  update_char_offset(match);
1208  return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end);
1209 }
1210 
1211 #define MATCH_BUSY FL_USER2
1212 
1213 void
1215 {
1216  FL_SET(match, MATCH_BUSY);
1217 }
1218 
1219 /*
1220  * call-seq:
1221  * rxp.fixed_encoding? -> true or false
1222  *
1223  * Returns false if rxp is applicable to
1224  * a string with any ASCII compatible encoding.
1225  * Returns true otherwise.
1226  *
1227  * r = /a/
1228  * r.fixed_encoding? #=> false
1229  * r =~ "\u{6666} a" #=> 2
1230  * r =~ "\xa1\xa2 a".force_encoding("euc-jp") #=> 2
1231  * r =~ "abc".force_encoding("euc-jp") #=> 0
1232  *
1233  * r = /a/u
1234  * r.fixed_encoding? #=> true
1235  * r.encoding #=> #<Encoding:UTF-8>
1236  * r =~ "\u{6666} a" #=> 2
1237  * r =~ "\xa1\xa2".force_encoding("euc-jp") #=> ArgumentError
1238  * r =~ "abc".force_encoding("euc-jp") #=> 0
1239  *
1240  * r = /\u{6666}/
1241  * r.fixed_encoding? #=> true
1242  * r.encoding #=> #<Encoding:UTF-8>
1243  * r =~ "\u{6666} a" #=> 0
1244  * r =~ "\xa1\xa2".force_encoding("euc-jp") #=> ArgumentError
1245  * r =~ "abc".force_encoding("euc-jp") #=> nil
1246  */
1247 
1248 static VALUE
1250 {
1251  if (FL_TEST(re, KCODE_FIXED))
1252  return Qtrue;
1253  else
1254  return Qfalse;
1255 }
1256 
1257 static VALUE
1258 rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
1259  rb_encoding **fixed_enc, onig_errmsg_buffer err);
1260 
1261 
1262 static void
1264 {
1266  "incompatible encoding regexp match (%s regexp with %s string)",
1267  rb_enc_name(rb_enc_get(re)),
1268  rb_enc_name(rb_enc_get(str)));
1269 }
1270 
1271 static rb_encoding*
1272 rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
1273 {
1274  rb_encoding *enc = 0;
1275 
1278  "invalid byte sequence in %s",
1279  rb_enc_name(rb_enc_get(str)));
1280  }
1281 
1282  rb_reg_check(re);
1283  enc = rb_enc_get(str);
1284  if (!rb_enc_str_asciicompat_p(str)) {
1285  if (RREGEXP(re)->ptr->enc != enc) {
1286  reg_enc_error(re, str);
1287  }
1288  }
1289  else if (rb_reg_fixed_encoding_p(re)) {
1290  if (RREGEXP(re)->ptr->enc != enc &&
1291  (!rb_enc_asciicompat(RREGEXP(re)->ptr->enc) ||
1293  reg_enc_error(re, str);
1294  }
1295  enc = RREGEXP(re)->ptr->enc;
1296  }
1297  if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
1298  enc != rb_ascii8bit_encoding() &&
1300  rb_warn("regexp match /.../n against to %s string",
1301  rb_enc_name(enc));
1302  }
1303  return enc;
1304 }
1305 
1306 regex_t *
1308 {
1309  regex_t *reg = RREGEXP(re)->ptr;
1310  onig_errmsg_buffer err = "";
1311  int r;
1312  OnigErrorInfo einfo;
1313  const char *pattern;
1314  VALUE unescaped;
1315  rb_encoding *fixed_enc = 0;
1316  rb_encoding *enc = rb_reg_prepare_enc(re, str, 1);
1317 
1318  if (reg->enc == enc) return reg;
1319 
1320  rb_reg_check(re);
1321  reg = RREGEXP(re)->ptr;
1322  pattern = RREGEXP_SRC_PTR(re);
1323 
1324  unescaped = rb_reg_preprocess(
1325  pattern, pattern + RREGEXP_SRC_LEN(re), enc,
1326  &fixed_enc, err);
1327 
1328  if (unescaped == Qnil) {
1329  rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
1330  }
1331 
1332  r = onig_new(&reg, (UChar* )RSTRING_PTR(unescaped),
1333  (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)),
1334  reg->options, enc,
1335  OnigDefaultSyntax, &einfo);
1336  if (r) {
1337  onig_error_code_to_str((UChar*)err, r, &einfo);
1338  rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
1339  }
1340 
1341  RB_GC_GUARD(unescaped);
1342  return reg;
1343 }
1344 
1345 long
1346 rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
1347 {
1348  long range;
1349  rb_encoding *enc;
1350  UChar *p, *string;
1351 
1352  enc = rb_reg_prepare_enc(re, str, 0);
1353 
1354  if (reverse) {
1355  range = -pos;
1356  }
1357  else {
1358  range = RSTRING_LEN(str) - pos;
1359  }
1360 
1361  if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(str)) {
1362  string = (UChar*)RSTRING_PTR(str);
1363 
1364  if (range > 0) {
1365  p = onigenc_get_right_adjust_char_head(enc, string, string + pos, string + RSTRING_LEN(str));
1366  }
1367  else {
1368  p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos, string + RSTRING_LEN(str));
1369  }
1370  return p - string;
1371  }
1372 
1373  return pos;
1374 }
1375 
1376 /* returns byte offset */
1377 long
1378 rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
1379 {
1380  long result;
1381  VALUE match;
1382  struct re_registers regi, *regs = &regi;
1383  char *range = RSTRING_PTR(str);
1384  regex_t *reg;
1385  int tmpreg;
1386 
1387  if (pos > RSTRING_LEN(str) || pos < 0) {
1389  return -1;
1390  }
1391 
1392  reg = rb_reg_prepare_re(re, str);
1393  tmpreg = reg != RREGEXP(re)->ptr;
1394  if (!tmpreg) RREGEXP(re)->usecnt++;
1395 
1396  match = rb_backref_get();
1397  if (!NIL_P(match)) {
1398  if (FL_TEST(match, MATCH_BUSY)) {
1399  match = Qnil;
1400  }
1401  else {
1402  regs = RMATCH_REGS(match);
1403  }
1404  }
1405  if (NIL_P(match)) {
1406  MEMZERO(regs, struct re_registers, 1);
1407  }
1408  if (!reverse) {
1409  range += RSTRING_LEN(str);
1410  }
1411  result = onig_search(reg,
1412  (UChar*)(RSTRING_PTR(str)),
1413  ((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)),
1414  ((UChar*)(RSTRING_PTR(str)) + pos),
1415  ((UChar*)range),
1416  regs, ONIG_OPTION_NONE);
1417  if (!tmpreg) RREGEXP(re)->usecnt--;
1418  if (tmpreg) {
1419  if (RREGEXP(re)->usecnt) {
1420  onig_free(reg);
1421  }
1422  else {
1423  onig_free(RREGEXP(re)->ptr);
1424  RREGEXP(re)->ptr = reg;
1425  }
1426  }
1427  if (result < 0) {
1428  if (regs == &regi)
1429  onig_region_free(regs, 0);
1430  if (result == ONIG_MISMATCH) {
1432  return result;
1433  }
1434  else {
1435  onig_errmsg_buffer err = "";
1436  onig_error_code_to_str((UChar*)err, (int)result);
1437  rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
1438  }
1439  }
1440 
1441  if (NIL_P(match)) {
1442  match = match_alloc(rb_cMatch);
1443  onig_region_copy(RMATCH_REGS(match), regs);
1444  onig_region_free(regs, 0);
1445  }
1446  else {
1447  if (rb_safe_level() >= 3)
1448  OBJ_TAINT(match);
1449  else
1450  FL_UNSET(match, FL_TAINT);
1451  }
1452 
1453  RMATCH(match)->str = rb_str_new4(str);
1454  RMATCH(match)->regexp = re;
1455  RMATCH(match)->rmatch->char_offset_updated = 0;
1456  rb_backref_set(match);
1457 
1458  OBJ_INFECT(match, re);
1459  OBJ_INFECT(match, str);
1460 
1461  return result;
1462 }
1463 
1464 VALUE
1466 {
1467  struct re_registers *regs;
1468  if (NIL_P(match)) return Qnil;
1469  match_check(match);
1470  regs = RMATCH_REGS(match);
1471  if (nth >= regs->num_regs) {
1472  return Qnil;
1473  }
1474  if (nth < 0) {
1475  nth += regs->num_regs;
1476  if (nth <= 0) return Qnil;
1477  }
1478  if (BEG(nth) == -1) return Qfalse;
1479  return Qtrue;
1480 }
1481 
1482 VALUE
1484 {
1485  VALUE str;
1486  long start, end, len;
1487  struct re_registers *regs;
1488 
1489  if (NIL_P(match)) return Qnil;
1490  match_check(match);
1491  regs = RMATCH_REGS(match);
1492  if (nth >= regs->num_regs) {
1493  return Qnil;
1494  }
1495  if (nth < 0) {
1496  nth += regs->num_regs;
1497  if (nth <= 0) return Qnil;
1498  }
1499  start = BEG(nth);
1500  if (start == -1) return Qnil;
1501  end = END(nth);
1502  len = end - start;
1503  str = rb_str_subseq(RMATCH(match)->str, start, len);
1504  OBJ_INFECT(str, match);
1505  return str;
1506 }
1507 
1508 VALUE
1510 {
1511  return rb_reg_nth_match(0, match);
1512 }
1513 
1514 
1515 /*
1516  * call-seq:
1517  * mtch.pre_match -> str
1518  *
1519  * Returns the portion of the original string before the current match.
1520  * Equivalent to the special variable <code>$`</code>.
1521  *
1522  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1523  * m.pre_match #=> "T"
1524  */
1525 
1526 VALUE
1528 {
1529  VALUE str;
1530  struct re_registers *regs;
1531 
1532  if (NIL_P(match)) return Qnil;
1533  match_check(match);
1534  regs = RMATCH_REGS(match);
1535  if (BEG(0) == -1) return Qnil;
1536  str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
1537  if (OBJ_TAINTED(match)) OBJ_TAINT(str);
1538  return str;
1539 }
1540 
1541 
1542 /*
1543  * call-seq:
1544  * mtch.post_match -> str
1545  *
1546  * Returns the portion of the original string after the current match.
1547  * Equivalent to the special variable <code>$'</code>.
1548  *
1549  * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
1550  * m.post_match #=> ": The Movie"
1551  */
1552 
1553 VALUE
1555 {
1556  VALUE str;
1557  long pos;
1558  struct re_registers *regs;
1559 
1560  if (NIL_P(match)) return Qnil;
1561  match_check(match);
1562  regs = RMATCH_REGS(match);
1563  if (BEG(0) == -1) return Qnil;
1564  str = RMATCH(match)->str;
1565  pos = END(0);
1566  str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
1567  if (OBJ_TAINTED(match)) OBJ_TAINT(str);
1568  return str;
1569 }
1570 
1571 VALUE
1573 {
1574  int i;
1575  struct re_registers *regs;
1576 
1577  if (NIL_P(match)) return Qnil;
1578  match_check(match);
1579  regs = RMATCH_REGS(match);
1580  if (BEG(0) == -1) return Qnil;
1581 
1582  for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
1583  ;
1584  if (i == 0) return Qnil;
1585  return rb_reg_nth_match(i, match);
1586 }
1587 
1588 static VALUE
1590 {
1592 }
1593 
1594 static VALUE
1596 {
1597  return rb_reg_match_pre(rb_backref_get());
1598 }
1599 
1600 static VALUE
1602 {
1604 }
1605 
1606 static VALUE
1608 {
1610 }
1611 
1612 static VALUE
1614 {
1615  struct re_registers *regs;
1616  VALUE ary;
1617  VALUE target;
1618  int i;
1619  int taint = OBJ_TAINTED(match);
1620 
1621  match_check(match);
1622  regs = RMATCH_REGS(match);
1623  ary = rb_ary_new2(regs->num_regs);
1624  target = RMATCH(match)->str;
1625 
1626  for (i=start; i<regs->num_regs; i++) {
1627  if (regs->beg[i] == -1) {
1628  rb_ary_push(ary, Qnil);
1629  }
1630  else {
1631  VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]);
1632  if (taint) OBJ_TAINT(str);
1633  rb_ary_push(ary, str);
1634  }
1635  }
1636  return ary;
1637 }
1638 
1639 
1640 /* [MG]:FIXME: I put parens around the /.../.match() in the first line of the
1641  second example to prevent the '*' followed by a '/' from ending the
1642  comment. */
1643 
1644 /*
1645  * call-seq:
1646  * mtch.to_a -> anArray
1647  *
1648  * Returns the array of matches.
1649  *
1650  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1651  * m.to_a #=> ["HX1138", "H", "X", "113", "8"]
1652  *
1653  * Because <code>to_a</code> is called when expanding
1654  * <code>*</code><em>variable</em>, there's a useful assignment
1655  * shortcut for extracting matched fields. This is slightly slower than
1656  * accessing the fields directly (as an intermediate array is
1657  * generated).
1658  *
1659  * all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
1660  * all #=> "HX1138"
1661  * f1 #=> "H"
1662  * f2 #=> "X"
1663  * f3 #=> "113"
1664  */
1665 
1666 static VALUE
1668 {
1669  return match_array(match, 0);
1670 }
1671 
1672 
1673 /*
1674  * call-seq:
1675  * mtch.captures -> array
1676  *
1677  * Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
1678  *
1679  * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
1680  * f1 #=> "H"
1681  * f2 #=> "X"
1682  * f3 #=> "113"
1683  * f4 #=> "8"
1684  */
1685 static VALUE
1687 {
1688  return match_array(match, 1);
1689 }
1690 
1691 static int
1692 name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end)
1693 {
1694  return onig_name_to_backref_number(RREGEXP(regexp)->ptr,
1695  (const unsigned char* )name, (const unsigned char* )name_end, regs);
1696 }
1697 
1699 static void
1701 {
1702  rb_raise(rb_eIndexError, "undefined group name reference: % "PRIsVALUE,
1703  name);
1704 }
1705 
1706 /*
1707  * call-seq:
1708  * mtch[i] -> str or nil
1709  * mtch[start, length] -> array
1710  * mtch[range] -> array
1711  * mtch[name] -> str or nil
1712  *
1713  * Match Reference -- <code>MatchData</code> acts as an array, and may be
1714  * accessed using the normal array indexing techniques. <code>mtch[0]</code>
1715  * is equivalent to the special variable <code>$&</code>, and returns the
1716  * entire matched string. <code>mtch[1]</code>, <code>mtch[2]</code>, and so
1717  * on return the values of the matched backreferences (portions of the
1718  * pattern between parentheses).
1719  *
1720  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1721  * m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
1722  * m[0] #=> "HX1138"
1723  * m[1, 2] #=> ["H", "X"]
1724  * m[1..3] #=> ["H", "X", "113"]
1725  * m[-3, 2] #=> ["X", "113"]
1726  *
1727  * m = /(?<foo>a+)b/.match("ccaaab")
1728  * m #=> #<MatchData "aaab" foo:"aaa">
1729  * m["foo"] #=> "aaa"
1730  * m[:foo] #=> "aaa"
1731  */
1732 
1733 static VALUE
1735 {
1736  VALUE idx, rest;
1737 
1738  match_check(match);
1739  rb_scan_args(argc, argv, "11", &idx, &rest);
1740 
1741  if (NIL_P(rest)) {
1742  if (FIXNUM_P(idx)) {
1743  if (FIX2INT(idx) >= 0) {
1744  return rb_reg_nth_match(FIX2INT(idx), match);
1745  }
1746  }
1747  else {
1748  const char *p;
1749  int num;
1750 
1751  switch (TYPE(idx)) {
1752  case T_SYMBOL:
1753  idx = rb_id2str(SYM2ID(idx));
1754  /* fall through */
1755  case T_STRING:
1756  p = StringValuePtr(idx);
1757  if (!rb_enc_compatible(RREGEXP(RMATCH(match)->regexp)->src, idx) ||
1758  (num = name_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp,
1759  p, p + RSTRING_LEN(idx))) < 1) {
1760  name_to_backref_error(idx);
1761  }
1762  return rb_reg_nth_match(num, match);
1763 
1764  default:
1765  break;
1766  }
1767  }
1768  }
1769 
1770  return rb_ary_aref(argc, argv, match_to_a(match));
1771 }
1772 
1773 static VALUE
1775 {
1776  /* n should not exceed num_regs */
1777  return rb_reg_nth_match((int)n, match);
1778 }
1779 
1780 
1781 /*
1782  * call-seq:
1783  *
1784  * mtch.values_at([index]*) -> array
1785  *
1786  * Uses each <i>index</i> to access the matching values, returning an array of
1787  * the corresponding matches.
1788  *
1789  * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
1790  * m.to_a #=> ["HX1138", "H", "X", "113", "8"]
1791  * m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"]
1792  */
1793 
1794 static VALUE
1796 {
1797  struct re_registers *regs;
1798 
1799  match_check(match);
1800  regs = RMATCH_REGS(match);
1801  return rb_get_values_at(match, regs->num_regs, argc, argv, match_entry);
1802 }
1803 
1804 
1805 /*
1806  * call-seq:
1807  * mtch.to_s -> str
1808  *
1809  * Returns the entire matched string.
1810  *
1811  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1812  * m.to_s #=> "HX1138"
1813  */
1814 
1815 static VALUE
1817 {
1818  VALUE str = rb_reg_last_match(match);
1819 
1820  match_check(match);
1821  if (NIL_P(str)) str = rb_str_new(0,0);
1822  if (OBJ_TAINTED(match)) OBJ_TAINT(str);
1823  if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str);
1824  return str;
1825 }
1826 
1827 
1828 /*
1829  * call-seq:
1830  * mtch.string -> str
1831  *
1832  * Returns a frozen copy of the string passed in to <code>match</code>.
1833  *
1834  * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
1835  * m.string #=> "THX1138."
1836  */
1837 
1838 static VALUE
1840 {
1841  match_check(match);
1842  return RMATCH(match)->str; /* str is frozen */
1843 }
1844 
1846  const UChar *name;
1847  long len;
1848 };
1849 
1850 static int
1852  int back_num, int *back_refs, OnigRegex regex, void *arg0)
1853 {
1854  struct backref_name_tag *arg = (struct backref_name_tag *)arg0;
1855  int i;
1856 
1857  for (i = 0; i < back_num; i++) {
1858  arg[back_refs[i]].name = name;
1859  arg[back_refs[i]].len = name_end - name;
1860  }
1861  return 0;
1862 }
1863 
1864 /*
1865  * call-seq:
1866  * mtch.inspect -> str
1867  *
1868  * Returns a printable version of <i>mtch</i>.
1869  *
1870  * puts /.$/.match("foo").inspect
1871  * #=> #<MatchData "o">
1872  *
1873  * puts /(.)(.)(.)/.match("foo").inspect
1874  * #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
1875  *
1876  * puts /(.)(.)?(.)/.match("fo").inspect
1877  * #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
1878  *
1879  * puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
1880  * #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
1881  *
1882  */
1883 
1884 static VALUE
1886 {
1887  const char *cname = rb_obj_classname(match);
1888  VALUE str;
1889  int i;
1890  struct re_registers *regs = RMATCH_REGS(match);
1891  int num_regs = regs->num_regs;
1892  struct backref_name_tag *names;
1893  VALUE regexp = RMATCH(match)->regexp;
1894 
1895  if (regexp == 0) {
1896  return rb_sprintf("#<%s:%p>", cname, (void*)match);
1897  }
1898 
1899  names = ALLOCA_N(struct backref_name_tag, num_regs);
1900  MEMZERO(names, struct backref_name_tag, num_regs);
1901 
1902  onig_foreach_name(RREGEXP(regexp)->ptr,
1903  match_inspect_name_iter, names);
1904 
1905  str = rb_str_buf_new2("#<");
1906  rb_str_buf_cat2(str, cname);
1907 
1908  for (i = 0; i < num_regs; i++) {
1909  VALUE v;
1910  rb_str_buf_cat2(str, " ");
1911  if (0 < i) {
1912  if (names[i].name)
1913  rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
1914  else {
1915  rb_str_catf(str, "%d", i);
1916  }
1917  rb_str_buf_cat2(str, ":");
1918  }
1919  v = rb_reg_nth_match(i, match);
1920  if (v == Qnil)
1921  rb_str_buf_cat2(str, "nil");
1922  else
1924  }
1925  rb_str_buf_cat2(str, ">");
1926 
1927  return str;
1928 }
1929 
1931 
1932 static int
1933 read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err)
1934 {
1935  const char *p = *pp;
1936  int code;
1937  int meta_prefix = 0, ctrl_prefix = 0;
1938  size_t len;
1939 
1940  if (p == end || *p++ != '\\') {
1941  errcpy(err, "too short escaped multibyte character");
1942  return -1;
1943  }
1944 
1945 again:
1946  if (p == end) {
1947  errcpy(err, "too short escape sequence");
1948  return -1;
1949  }
1950  switch (*p++) {
1951  case '\\': code = '\\'; break;
1952  case 'n': code = '\n'; break;
1953  case 't': code = '\t'; break;
1954  case 'r': code = '\r'; break;
1955  case 'f': code = '\f'; break;
1956  case 'v': code = '\013'; break;
1957  case 'a': code = '\007'; break;
1958  case 'e': code = '\033'; break;
1959 
1960  /* \OOO */
1961  case '0': case '1': case '2': case '3':
1962  case '4': case '5': case '6': case '7':
1963  p--;
1964  code = scan_oct(p, end < p+3 ? end-p : 3, &len);
1965  p += len;
1966  break;
1967 
1968  case 'x': /* \xHH */
1969  code = scan_hex(p, end < p+2 ? end-p : 2, &len);
1970  if (len < 1) {
1971  errcpy(err, "invalid hex escape");
1972  return -1;
1973  }
1974  p += len;
1975  break;
1976 
1977  case 'M': /* \M-X, \M-\C-X, \M-\cX */
1978  if (meta_prefix) {
1979  errcpy(err, "duplicate meta escape");
1980  return -1;
1981  }
1982  meta_prefix = 1;
1983  if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) {
1984  if (*p == '\\') {
1985  p++;
1986  goto again;
1987  }
1988  else {
1989  code = *p++;
1990  break;
1991  }
1992  }
1993  errcpy(err, "too short meta escape");
1994  return -1;
1995 
1996  case 'C': /* \C-X, \C-\M-X */
1997  if (p == end || *p++ != '-') {
1998  errcpy(err, "too short control escape");
1999  return -1;
2000  }
2001  case 'c': /* \cX, \c\M-X */
2002  if (ctrl_prefix) {
2003  errcpy(err, "duplicate control escape");
2004  return -1;
2005  }
2006  ctrl_prefix = 1;
2007  if (p < end && (*p & 0x80) == 0) {
2008  if (*p == '\\') {
2009  p++;
2010  goto again;
2011  }
2012  else {
2013  code = *p++;
2014  break;
2015  }
2016  }
2017  errcpy(err, "too short control escape");
2018  return -1;
2019 
2020  default:
2021  errcpy(err, "unexpected escape sequence");
2022  return -1;
2023  }
2024  if (code < 0 || 0xff < code) {
2025  errcpy(err, "invalid escape code");
2026  return -1;
2027  }
2028 
2029  if (ctrl_prefix)
2030  code &= 0x1f;
2031  if (meta_prefix)
2032  code |= 0x80;
2033 
2034  *pp = p;
2035  return code;
2036 }
2037 
2038 static int
2039 unescape_escaped_nonascii(const char **pp, const char *end, rb_encoding *enc,
2041 {
2042  const char *p = *pp;
2043  int chmaxlen = rb_enc_mbmaxlen(enc);
2044  char *chbuf = ALLOCA_N(char, chmaxlen);
2045  int chlen = 0;
2046  int byte;
2047  int l;
2048 
2049  memset(chbuf, 0, chmaxlen);
2050 
2051  byte = read_escaped_byte(&p, end, err);
2052  if (byte == -1) {
2053  return -1;
2054  }
2055 
2056  chbuf[chlen++] = byte;
2057  while (chlen < chmaxlen &&
2058  MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
2059  byte = read_escaped_byte(&p, end, err);
2060  if (byte == -1) {
2061  return -1;
2062  }
2063  chbuf[chlen++] = byte;
2064  }
2065 
2066  l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
2067  if (MBCLEN_INVALID_P(l)) {
2068  errcpy(err, "invalid multibyte escape");
2069  return -1;
2070  }
2071  if (1 < chlen || (chbuf[0] & 0x80)) {
2072  rb_str_buf_cat(buf, chbuf, chlen);
2073 
2074  if (*encp == 0)
2075  *encp = enc;
2076  else if (*encp != enc) {
2077  errcpy(err, "escaped non ASCII character in UTF-8 regexp");
2078  return -1;
2079  }
2080  }
2081  else {
2082  char escbuf[5];
2083  snprintf(escbuf, sizeof(escbuf), "\\x%02X", chbuf[0]&0xff);
2084  rb_str_buf_cat(buf, escbuf, 4);
2085  }
2086  *pp = p;
2087  return 0;
2088 }
2089 
2090 static int
2092 {
2093  if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */
2094  0x10ffff < code) {
2095  errcpy(err, "invalid Unicode range");
2096  return -1;
2097  }
2098  return 0;
2099 }
2100 
2101 static int
2102 append_utf8(unsigned long uv,
2104 {
2105  if (check_unicode_range(uv, err) != 0)
2106  return -1;
2107  if (uv < 0x80) {
2108  char escbuf[5];
2109  snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv);
2110  rb_str_buf_cat(buf, escbuf, 4);
2111  }
2112  else {
2113  int len;
2114  char utf8buf[6];
2115  len = rb_uv_to_utf8(utf8buf, uv);
2116  rb_str_buf_cat(buf, utf8buf, len);
2117 
2118  if (*encp == 0)
2119  *encp = rb_utf8_encoding();
2120  else if (*encp != rb_utf8_encoding()) {
2121  errcpy(err, "UTF-8 character in non UTF-8 regexp");
2122  return -1;
2123  }
2124  }
2125  return 0;
2126 }
2127 
2128 static int
2129 unescape_unicode_list(const char **pp, const char *end,
2131 {
2132  const char *p = *pp;
2133  int has_unicode = 0;
2134  unsigned long code;
2135  size_t len;
2136 
2137  while (p < end && ISSPACE(*p)) p++;
2138 
2139  while (1) {
2140  code = ruby_scan_hex(p, end-p, &len);
2141  if (len == 0)
2142  break;
2143  if (6 < len) { /* max 10FFFF */
2144  errcpy(err, "invalid Unicode range");
2145  return -1;
2146  }
2147  p += len;
2148  if (append_utf8(code, buf, encp, err) != 0)
2149  return -1;
2150  has_unicode = 1;
2151 
2152  while (p < end && ISSPACE(*p)) p++;
2153  }
2154 
2155  if (has_unicode == 0) {
2156  errcpy(err, "invalid Unicode list");
2157  return -1;
2158  }
2159 
2160  *pp = p;
2161 
2162  return 0;
2163 }
2164 
2165 static int
2166 unescape_unicode_bmp(const char **pp, const char *end,
2168 {
2169  const char *p = *pp;
2170  size_t len;
2171  unsigned long code;
2172 
2173  if (end < p+4) {
2174  errcpy(err, "invalid Unicode escape");
2175  return -1;
2176  }
2177  code = ruby_scan_hex(p, 4, &len);
2178  if (len != 4) {
2179  errcpy(err, "invalid Unicode escape");
2180  return -1;
2181  }
2182  if (append_utf8(code, buf, encp, err) != 0)
2183  return -1;
2184  *pp = p + 4;
2185  return 0;
2186 }
2187 
2188 static int
2189 unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
2190  VALUE buf, rb_encoding **encp, int *has_property,
2192 {
2193  char c;
2194  char smallbuf[2];
2195 
2196  while (p < end) {
2197  int chlen = rb_enc_precise_mbclen(p, end, enc);
2198  if (!MBCLEN_CHARFOUND_P(chlen)) {
2199  errcpy(err, "invalid multibyte character");
2200  return -1;
2201  }
2202  chlen = MBCLEN_CHARFOUND_LEN(chlen);
2203  if (1 < chlen || (*p & 0x80)) {
2204  rb_str_buf_cat(buf, p, chlen);
2205  p += chlen;
2206  if (*encp == 0)
2207  *encp = enc;
2208  else if (*encp != enc) {
2209  errcpy(err, "non ASCII character in UTF-8 regexp");
2210  return -1;
2211  }
2212  continue;
2213  }
2214 
2215  switch (c = *p++) {
2216  case '\\':
2217  if (p == end) {
2218  errcpy(err, "too short escape sequence");
2219  return -1;
2220  }
2221  switch (c = *p++) {
2222  case '1': case '2': case '3':
2223  case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */
2224  {
2225  size_t octlen;
2226  if (ruby_scan_oct(p-1, end-(p-1), &octlen) <= 0177) {
2227  /* backref or 7bit octal.
2228  no need to unescape anyway.
2229  re-escaping may break backref */
2230  goto escape_asis;
2231  }
2232  }
2233  /* xxx: How about more than 199 subexpressions? */
2234 
2235  case '0': /* \0, \0O, \0OO */
2236 
2237  case 'x': /* \xHH */
2238  case 'c': /* \cX, \c\M-X */
2239  case 'C': /* \C-X, \C-\M-X */
2240  case 'M': /* \M-X, \M-\C-X, \M-\cX */
2241  p = p-2;
2242  if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
2243  return -1;
2244  break;
2245 
2246  case 'u':
2247  if (p == end) {
2248  errcpy(err, "too short escape sequence");
2249  return -1;
2250  }
2251  if (*p == '{') {
2252  /* \u{H HH HHH HHHH HHHHH HHHHHH ...} */
2253  p++;
2254  if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
2255  return -1;
2256  if (p == end || *p++ != '}') {
2257  errcpy(err, "invalid Unicode list");
2258  return -1;
2259  }
2260  break;
2261  }
2262  else {
2263  /* \uHHHH */
2264  if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
2265  return -1;
2266  break;
2267  }
2268 
2269  case 'p': /* \p{Hiragana} */
2270  case 'P':
2271  if (!*encp) {
2272  *has_property = 1;
2273  }
2274  goto escape_asis;
2275 
2276  default: /* \n, \\, \d, \9, etc. */
2277 escape_asis:
2278  smallbuf[0] = '\\';
2279  smallbuf[1] = c;
2280  rb_str_buf_cat(buf, smallbuf, 2);
2281  break;
2282  }
2283  break;
2284 
2285  default:
2286  rb_str_buf_cat(buf, &c, 1);
2287  break;
2288  }
2289  }
2290 
2291  return 0;
2292 }
2293 
2294 static VALUE
2295 rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
2296  rb_encoding **fixed_enc, onig_errmsg_buffer err)
2297 {
2298  VALUE buf;
2299  int has_property = 0;
2300 
2301  buf = rb_str_buf_new(0);
2302 
2303  if (rb_enc_asciicompat(enc))
2304  *fixed_enc = 0;
2305  else {
2306  *fixed_enc = enc;
2307  rb_enc_associate(buf, enc);
2308  }
2309 
2310  if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err) != 0)
2311  return Qnil;
2312 
2313  if (has_property && !*fixed_enc) {
2314  *fixed_enc = enc;
2315  }
2316 
2317  if (*fixed_enc) {
2318  rb_enc_associate(buf, *fixed_enc);
2319  }
2320 
2321  return buf;
2322 }
2323 
2324 VALUE
2326 {
2327  rb_encoding *fixed_enc = 0;
2328  onig_errmsg_buffer err = "";
2329  VALUE buf;
2330  char *p, *end;
2331  rb_encoding *enc;
2332 
2333  StringValue(str);
2334  p = RSTRING_PTR(str);
2335  end = p + RSTRING_LEN(str);
2336  enc = rb_enc_get(str);
2337 
2338  buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err);
2339  RB_GC_GUARD(str);
2340 
2341  if (buf == Qnil) {
2342  return rb_reg_error_desc(str, 0, err);
2343  }
2344  return Qnil;
2345 }
2346 
2347 static VALUE
2349 {
2350  rb_encoding *fixed_enc = 0;
2351  rb_encoding *regexp_enc = 0;
2352  onig_errmsg_buffer err = "";
2353  int i;
2354  VALUE result = 0;
2355  rb_encoding *ascii8bit = rb_ascii8bit_encoding();
2356 
2357  if (RARRAY_LEN(ary) == 0) {
2358  rb_raise(rb_eArgError, "no arguments given");
2359  }
2360 
2361  for (i = 0; i < RARRAY_LEN(ary); i++) {
2362  VALUE str = RARRAY_AREF(ary, i);
2363  VALUE buf;
2364  char *p, *end;
2365  rb_encoding *src_enc;
2366 
2367  src_enc = rb_enc_get(str);
2368  if (options & ARG_ENCODING_NONE &&
2369  src_enc != ascii8bit) {
2371  rb_raise(rb_eRegexpError, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
2372  else
2373  src_enc = ascii8bit;
2374  }
2375 
2376  StringValue(str);
2377  p = RSTRING_PTR(str);
2378  end = p + RSTRING_LEN(str);
2379 
2380  buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err);
2381 
2382  if (buf == Qnil)
2383  rb_raise(rb_eArgError, "%s", err);
2384 
2385  if (fixed_enc != 0) {
2386  if (regexp_enc != 0 && regexp_enc != fixed_enc) {
2387  rb_raise(rb_eRegexpError, "encoding mismatch in dynamic regexp : %s and %s",
2388  rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
2389  }
2390  regexp_enc = fixed_enc;
2391  }
2392 
2393  if (!result)
2394  result = rb_str_new3(str);
2395  else
2396  rb_str_buf_append(result, str);
2397  }
2398  if (regexp_enc) {
2399  rb_enc_associate(result, regexp_enc);
2400  }
2401 
2402  return result;
2403 }
2404 
2405 static int
2406 rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc,
2408  const char *sourcefile, int sourceline)
2409 {
2410  struct RRegexp *re = RREGEXP(obj);
2411  VALUE unescaped;
2412  rb_encoding *fixed_enc = 0;
2414 
2415  rb_check_frozen(obj);
2416  if (FL_TEST(obj, REG_LITERAL))
2417  rb_raise(rb_eSecurityError, "can't modify literal regexp");
2418  if (re->ptr)
2419  rb_raise(rb_eTypeError, "already initialized regexp");
2420  re->ptr = 0;
2421 
2422  if (rb_enc_dummy_p(enc)) {
2423  errcpy(err, "can't make regexp with dummy encoding");
2424  return -1;
2425  }
2426 
2427  unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err);
2428  if (unescaped == Qnil)
2429  return -1;
2430 
2431  if (fixed_enc) {
2432  if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
2433  (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
2434  errcpy(err, "incompatible character encoding");
2435  return -1;
2436  }
2437  if (fixed_enc != a_enc) {
2438  options |= ARG_ENCODING_FIXED;
2439  enc = fixed_enc;
2440  }
2441  }
2442  else if (!(options & ARG_ENCODING_FIXED)) {
2443  enc = rb_usascii_encoding();
2444  }
2445 
2446  rb_enc_associate((VALUE)re, enc);
2447  if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
2448  re->basic.flags |= KCODE_FIXED;
2449  }
2450  if (options & ARG_ENCODING_NONE) {
2451  re->basic.flags |= REG_ENCODING_NONE;
2452  }
2453 
2454  re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
2455  options & ARG_REG_OPTION_MASK, err,
2456  sourcefile, sourceline);
2457  if (!re->ptr) return -1;
2458  RB_OBJ_WRITE(obj, &re->src, rb_fstring(rb_enc_str_new(s, len, enc)));
2459  RB_GC_GUARD(unescaped);
2460  return 0;
2461 }
2462 
2463 static int
2465  const char *sourcefile, int sourceline)
2466 {
2467  int ret;
2468  rb_encoding *enc = rb_enc_get(str);
2469  if (options & ARG_ENCODING_NONE) {
2470  rb_encoding *ascii8bit = rb_ascii8bit_encoding();
2471  if (enc != ascii8bit) {
2473  errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
2474  return -1;
2475  }
2476  enc = ascii8bit;
2477  }
2478  }
2479  ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
2480  options, err, sourcefile, sourceline);
2481  OBJ_INFECT(obj, str);
2482  RB_GC_GUARD(str);
2483  return ret;
2484 }
2485 
2486 static VALUE
2488 {
2490 
2491  re->ptr = 0;
2492  RB_OBJ_WRITE(re, &re->src, 0);
2493  re->usecnt = 0;
2494 
2495  return (VALUE)re;
2496 }
2497 
2498 VALUE
2500 {
2501  return rb_reg_s_alloc(rb_cRegexp);
2502 }
2503 
2504 VALUE
2506 {
2507  return rb_reg_init_str(rb_reg_alloc(), s, options);
2508 }
2509 
2510 VALUE
2512 {
2513  onig_errmsg_buffer err = "";
2514 
2515  if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
2516  rb_reg_raise_str(s, options, err);
2517  }
2518 
2519  return re;
2520 }
2521 
2522 VALUE
2523 rb_reg_new_ary(VALUE ary, int opt)
2524 {
2525  return rb_reg_new_str(rb_reg_preprocess_dregexp(ary, opt), opt);
2526 }
2527 
2528 VALUE
2529 rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options)
2530 {
2531  VALUE re = rb_reg_alloc();
2532  onig_errmsg_buffer err = "";
2533 
2534  if (rb_reg_initialize(re, s, len, enc, options, err, NULL, 0) != 0) {
2535  rb_enc_reg_raise(s, len, enc, options, err);
2536  }
2537 
2538  return re;
2539 }
2540 
2541 VALUE
2542 rb_reg_new(const char *s, long len, int options)
2543 {
2544  return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options);
2545 }
2546 
2547 VALUE
2548 rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
2549 {
2550  VALUE re = rb_reg_alloc();
2551  onig_errmsg_buffer err = "";
2552 
2553  if (!str) str = rb_str_new(0,0);
2554  if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
2555  rb_set_errinfo(rb_reg_error_desc(str, options, err));
2556  return Qnil;
2557  }
2558  FL_SET(re, REG_LITERAL);
2559  return re;
2560 }
2561 
2563 
2564 VALUE
2566 {
2567  volatile VALUE save_str = str;
2568  if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
2569  && ENCODING_GET(reg_cache) == ENCODING_GET(str)
2570  && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
2571  return reg_cache;
2572 
2573  return reg_cache = rb_reg_new_str(save_str, 0);
2574 }
2575 
2576 static st_index_t reg_hash(VALUE re);
2577 /*
2578  * call-seq:
2579  * rxp.hash -> fixnum
2580  *
2581  * Produce a hash based on the text and options of this regular expression.
2582  */
2583 
2584 static VALUE
2586 {
2587  st_index_t hashval = reg_hash(re);
2588  return LONG2FIX(hashval);
2589 }
2590 
2591 static st_index_t
2593 {
2594  st_index_t hashval;
2595 
2596  rb_reg_check(re);
2597  hashval = RREGEXP(re)->ptr->options;
2598  hashval = rb_hash_uint(hashval, rb_memhash(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re)));
2599  return rb_hash_end(hashval);
2600 }
2601 
2602 
2603 /*
2604  * call-seq:
2605  * rxp == other_rxp -> true or false
2606  * rxp.eql?(other_rxp) -> true or false
2607  *
2608  * Equality---Two regexps are equal if their patterns are identical, they have
2609  * the same character set code, and their <code>casefold?</code> values are the
2610  * same.
2611  *
2612  * /abc/ == /abc/x #=> false
2613  * /abc/ == /abc/i #=> false
2614  * /abc/ == /abc/u #=> false
2615  * /abc/u == /abc/n #=> false
2616  */
2617 
2618 static VALUE
2620 {
2621  if (re1 == re2) return Qtrue;
2622  if (!RB_TYPE_P(re2, T_REGEXP)) return Qfalse;
2623  rb_reg_check(re1); rb_reg_check(re2);
2624  if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
2625  if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return Qfalse;
2626  if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
2627  if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
2628  if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
2629  return Qtrue;
2630  }
2631  return Qfalse;
2632 }
2633 
2634 /*
2635  * call-seq:
2636  * mtch.hash -> integer
2637  *
2638  * Produce a hash based on the target string, regexp and matched
2639  * positions of this matchdata.
2640  */
2641 
2642 static VALUE
2644 {
2645  const struct re_registers *regs;
2646  st_index_t hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
2647 
2648  rb_hash_uint(hashval, reg_hash(RMATCH(match)->regexp));
2649  regs = RMATCH_REGS(match);
2650  hashval = rb_hash_uint(hashval, regs->num_regs);
2651  hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
2652  hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
2653  hashval = rb_hash_end(hashval);
2654  return LONG2FIX(hashval);
2655 }
2656 
2657 /*
2658  * call-seq:
2659  * mtch == mtch2 -> true or false
2660  * mtch.eql?(mtch2) -> true or false
2661  *
2662  * Equality---Two matchdata are equal if their target strings,
2663  * patterns, and matched positions are identical.
2664  */
2665 
2666 static VALUE
2667 match_equal(VALUE match1, VALUE match2)
2668 {
2669  const struct re_registers *regs1, *regs2;
2670  if (match1 == match2) return Qtrue;
2671  if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
2672  if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
2673  if (!rb_reg_equal(RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return Qfalse;
2674  regs1 = RMATCH_REGS(match1);
2675  regs2 = RMATCH_REGS(match2);
2676  if (regs1->num_regs != regs2->num_regs) return Qfalse;
2677  if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
2678  if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
2679  return Qtrue;
2680 }
2681 
2682 static VALUE
2683 reg_operand(VALUE s, int check)
2684 {
2685  if (SYMBOL_P(s)) {
2686  return rb_sym_to_s(s);
2687  }
2688  else {
2689  return (check ? rb_str_to_str : rb_check_string_type)(s);
2690  }
2691 }
2692 
2693 static long
2694 reg_match_pos(VALUE re, VALUE *strp, long pos)
2695 {
2696  VALUE str = *strp;
2697 
2698  if (NIL_P(str)) {
2700  return -1;
2701  }
2702  *strp = str = reg_operand(str, TRUE);
2703  if (pos != 0) {
2704  if (pos < 0) {
2705  VALUE l = rb_str_length(str);
2706  pos += NUM2INT(l);
2707  if (pos < 0) {
2708  return pos;
2709  }
2710  }
2711  pos = rb_str_offset(str, pos);
2712  }
2713  return rb_reg_search(re, str, pos, 0);
2714 }
2715 
2716 /*
2717  * call-seq:
2718  * rxp =~ str -> integer or nil
2719  *
2720  * Match---Matches <i>rxp</i> against <i>str</i>.
2721  *
2722  * /at/ =~ "input data" #=> 7
2723  * /ax/ =~ "input data" #=> nil
2724  *
2725  * If <code>=~</code> is used with a regexp literal with named captures,
2726  * captured strings (or nil) is assigned to local variables named by
2727  * the capture names.
2728  *
2729  * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = y "
2730  * p lhs #=> "x"
2731  * p rhs #=> "y"
2732  *
2733  * If it is not matched, nil is assigned for the variables.
2734  *
2735  * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = "
2736  * p lhs #=> nil
2737  * p rhs #=> nil
2738  *
2739  * This assignment is implemented in the Ruby parser.
2740  * The parser detects 'regexp-literal =~ expression' for the assignment.
2741  * The regexp must be a literal without interpolation and placed at left hand side.
2742  *
2743  * The assignment does not occur if the regexp is not a literal.
2744  *
2745  * re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
2746  * re =~ " x = y "
2747  * p lhs # undefined local variable
2748  * p rhs # undefined local variable
2749  *
2750  * A regexp interpolation, <code>#{}</code>, also disables
2751  * the assignment.
2752  *
2753  * rhs_pat = /(?<rhs>\w+)/
2754  * /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
2755  * p lhs # undefined local variable
2756  *
2757  * The assignment does not occur if the regexp is placed at the right hand side.
2758  *
2759  * " x = y " =~ /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
2760  * p lhs, rhs # undefined local variable
2761  *
2762  */
2763 
2764 VALUE
2766 {
2767  long pos = reg_match_pos(re, &str, 0);
2768  if (pos < 0) return Qnil;
2769  pos = rb_str_sublen(str, pos);
2770  return LONG2FIX(pos);
2771 }
2772 
2773 /*
2774  * call-seq:
2775  * rxp === str -> true or false
2776  *
2777  * Case Equality---Used in case statements.
2778  *
2779  * a = "HELLO"
2780  * case a
2781  * when /^[a-z]*$/; print "Lower case\n"
2782  * when /^[A-Z]*$/; print "Upper case\n"
2783  * else; print "Mixed case\n"
2784  * end
2785  * #=> "Upper case"
2786  *
2787  * Following a regular expression literal with the #=== operator allows you to
2788  * compare against a String.
2789  *
2790  * /^[a-z]*$/ === "HELLO" #=> false
2791  * /^[A-Z]*$/ === "HELLO" #=> true
2792  */
2793 
2794 VALUE
2796 {
2797  long start;
2798 
2799  str = reg_operand(str, FALSE);
2800  if (NIL_P(str)) {
2802  return Qfalse;
2803  }
2804  start = rb_reg_search(re, str, 0, 0);
2805  if (start < 0) {
2806  return Qfalse;
2807  }
2808  return Qtrue;
2809 }
2810 
2811 
2812 /*
2813  * call-seq:
2814  * ~ rxp -> integer or nil
2815  *
2816  * Match---Matches <i>rxp</i> against the contents of <code>$_</code>.
2817  * Equivalent to <code><i>rxp</i> =~ $_</code>.
2818  *
2819  * $_ = "input data"
2820  * ~ /at/ #=> 7
2821  */
2822 
2823 VALUE
2825 {
2826  long start;
2827  VALUE line = rb_lastline_get();
2828 
2829  if (!RB_TYPE_P(line, T_STRING)) {
2831  return Qnil;
2832  }
2833 
2834  start = rb_reg_search(re, line, 0, 0);
2835  if (start < 0) {
2836  return Qnil;
2837  }
2838  start = rb_str_sublen(line, start);
2839  return LONG2FIX(start);
2840 }
2841 
2842 
2843 /*
2844  * call-seq:
2845  * rxp.match(str) -> matchdata or nil
2846  * rxp.match(str,pos) -> matchdata or nil
2847  *
2848  * Returns a <code>MatchData</code> object describing the match, or
2849  * <code>nil</code> if there was no match. This is equivalent to retrieving the
2850  * value of the special variable <code>$~</code> following a normal match.
2851  * If the second parameter is present, it specifies the position in the string
2852  * to begin the search.
2853  *
2854  * /(.)(.)(.)/.match("abc")[2] #=> "b"
2855  * /(.)(.)/.match("abc", 1)[2] #=> "c"
2856  *
2857  * If a block is given, invoke the block with MatchData if match succeed, so
2858  * that you can write
2859  *
2860  * pat.match(str) {|m| ...}
2861  *
2862  * instead of
2863  *
2864  * if m = pat.match(str)
2865  * ...
2866  * end
2867  *
2868  * The return value is a value from block execution in this case.
2869  */
2870 
2871 static VALUE
2873 {
2874  VALUE result, str, initpos;
2875  long pos;
2876 
2877  if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
2878  pos = NUM2LONG(initpos);
2879  }
2880  else {
2881  pos = 0;
2882  }
2883 
2884  pos = reg_match_pos(re, &str, pos);
2885  if (pos < 0) {
2887  return Qnil;
2888  }
2889  result = rb_backref_get();
2890  rb_match_busy(result);
2891  if (!NIL_P(result) && rb_block_given_p()) {
2892  return rb_yield(result);
2893  }
2894  return result;
2895 }
2896 
2897 /*
2898  * Document-method: compile
2899  *
2900  * Synonym for <code>Regexp.new</code>
2901  */
2902 
2903 
2904 /*
2905  * call-seq:
2906  * Regexp.new(string, [options [, kcode]]) -> regexp
2907  * Regexp.new(regexp) -> regexp
2908  * Regexp.compile(string, [options [, kcode]]) -> regexp
2909  * Regexp.compile(regexp) -> regexp
2910  *
2911  * Constructs a new regular expression from +pattern+, which can be either a
2912  * String or a Regexp (in which case that regexp's options are propagated),
2913  * and new options may not be specified (a change as of Ruby 1.8).
2914  *
2915  * If +options+ is a Fixnum, it should be one or more of the constants
2916  * Regexp::EXTENDED, Regexp::IGNORECASE, and Regexp::MULTILINE,
2917  * <em>or</em>-ed together. Otherwise, if +options+ is not
2918  * +nil+ or +false+, the regexp will be case insensitive.
2919  *
2920  * When the +kcode+ parameter is `n' or `N' sets the regexp no encoding.
2921  * It means that the regexp is for binary strings.
2922  *
2923  * r1 = Regexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
2924  * r2 = Regexp.new('cat', true) #=> /cat/i
2925  * r3 = Regexp.new(r2) #=> /cat/i
2926  * r4 = Regexp.new('dog', Regexp::EXTENDED | Regexp::IGNORECASE) #=> /dog/ix
2927  */
2928 
2929 static VALUE
2931 {
2932  onig_errmsg_buffer err = "";
2933  int flags = 0;
2934  VALUE str;
2935  rb_encoding *enc;
2936  const char *ptr;
2937  long len;
2938 
2939  rb_check_arity(argc, 1, 3);
2940  if (RB_TYPE_P(argv[0], T_REGEXP)) {
2941  VALUE re = argv[0];
2942 
2943  if (argc > 1) {
2944  rb_warn("flags ignored");
2945  }
2946  rb_reg_check(re);
2947  flags = rb_reg_options(re);
2948  ptr = RREGEXP_SRC_PTR(re);
2949  len = RREGEXP_SRC_LEN(re);
2950  enc = rb_enc_get(re);
2951  if (rb_reg_initialize(self, ptr, len, enc, flags, err, NULL, 0)) {
2952  str = rb_enc_str_new(ptr, len, enc);
2953  rb_reg_raise_str(str, flags, err);
2954  }
2955  }
2956  else {
2957  if (argc >= 2) {
2958  if (FIXNUM_P(argv[1])) flags = FIX2INT(argv[1]);
2959  else if (RTEST(argv[1])) flags = ONIG_OPTION_IGNORECASE;
2960  }
2961  enc = 0;
2962  if (argc == 3 && !NIL_P(argv[2])) {
2963  char *kcode = StringValuePtr(argv[2]);
2964  if (kcode[0] == 'n' || kcode[0] == 'N') {
2965  enc = rb_ascii8bit_encoding();
2966  flags |= ARG_ENCODING_NONE;
2967  }
2968  else {
2969  rb_warn("encoding option is ignored - %s", kcode);
2970  }
2971  }
2972  str = argv[0];
2973  ptr = StringValuePtr(str);
2974  if (enc
2975  ? rb_reg_initialize(self, ptr, RSTRING_LEN(str), enc, flags, err, NULL, 0)
2976  : rb_reg_initialize_str(self, str, flags, err, NULL, 0)) {
2977  rb_reg_raise_str(str, flags, err);
2978  }
2979  }
2980  return self;
2981 }
2982 
2983 VALUE
2985 {
2986  rb_encoding *enc = rb_enc_get(str);
2987  char *s, *send, *t;
2988  VALUE tmp;
2989  int c, clen;
2990  int ascii_only = rb_enc_str_asciionly_p(str);
2991 
2992  s = RSTRING_PTR(str);
2993  send = s + RSTRING_LEN(str);
2994  while (s < send) {
2995  c = rb_enc_ascget(s, send, &clen, enc);
2996  if (c == -1) {
2997  s += mbclen(s, send, enc);
2998  continue;
2999  }
3000  switch (c) {
3001  case '[': case ']': case '{': case '}':
3002  case '(': case ')': case '|': case '-':
3003  case '*': case '.': case '\\':
3004  case '?': case '+': case '^': case '$':
3005  case ' ': case '#':
3006  case '\t': case '\f': case '\v': case '\n': case '\r':
3007  goto meta_found;
3008  }
3009  s += clen;
3010  }
3011  tmp = rb_str_new3(str);
3012  if (ascii_only) {
3014  }
3015  return tmp;
3016 
3017  meta_found:
3018  tmp = rb_str_new(0, RSTRING_LEN(str)*2);
3019  if (ascii_only) {
3021  }
3022  else {
3023  rb_enc_copy(tmp, str);
3024  }
3025  t = RSTRING_PTR(tmp);
3026  /* copy upto metacharacter */
3027  memcpy(t, RSTRING_PTR(str), s - RSTRING_PTR(str));
3028  t += s - RSTRING_PTR(str);
3029 
3030  while (s < send) {
3031  c = rb_enc_ascget(s, send, &clen, enc);
3032  if (c == -1) {
3033  int n = mbclen(s, send, enc);
3034 
3035  while (n--)
3036  *t++ = *s++;
3037  continue;
3038  }
3039  s += clen;
3040  switch (c) {
3041  case '[': case ']': case '{': case '}':
3042  case '(': case ')': case '|': case '-':
3043  case '*': case '.': case '\\':
3044  case '?': case '+': case '^': case '$':
3045  case '#':
3046  t += rb_enc_mbcput('\\', t, enc);
3047  break;
3048  case ' ':
3049  t += rb_enc_mbcput('\\', t, enc);
3050  t += rb_enc_mbcput(' ', t, enc);
3051  continue;
3052  case '\t':
3053  t += rb_enc_mbcput('\\', t, enc);
3054  t += rb_enc_mbcput('t', t, enc);
3055  continue;
3056  case '\n':
3057  t += rb_enc_mbcput('\\', t, enc);
3058  t += rb_enc_mbcput('n', t, enc);
3059  continue;
3060  case '\r':
3061  t += rb_enc_mbcput('\\', t, enc);
3062  t += rb_enc_mbcput('r', t, enc);
3063  continue;
3064  case '\f':
3065  t += rb_enc_mbcput('\\', t, enc);
3066  t += rb_enc_mbcput('f', t, enc);
3067  continue;
3068  case '\v':
3069  t += rb_enc_mbcput('\\', t, enc);
3070  t += rb_enc_mbcput('v', t, enc);
3071  continue;
3072  }
3073  t += rb_enc_mbcput(c, t, enc);
3074  }
3075  rb_str_resize(tmp, t - RSTRING_PTR(tmp));
3076  OBJ_INFECT(tmp, str);
3077  return tmp;
3078 }
3079 
3080 
3081 /*
3082  * call-seq:
3083  * Regexp.escape(str) -> string
3084  * Regexp.quote(str) -> string
3085  *
3086  * Escapes any characters that would have special meaning in a regular
3087  * expression. Returns a new escaped string, or self if no characters are
3088  * escaped. For any string,
3089  * <code>Regexp.new(Regexp.escape(<i>str</i>))=~<i>str</i></code> will be true.
3090  *
3091  * Regexp.escape('\*?{}.') #=> \\\*\?\{\}\.
3092  *
3093  */
3094 
3095 static VALUE
3097 {
3098  return rb_reg_quote(reg_operand(str, TRUE));
3099 }
3100 
3101 int
3103 {
3104  int options;
3105 
3106  rb_reg_check(re);
3107  options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK;
3108  if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
3109  if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
3110  return options;
3111 }
3112 
3113 VALUE
3115 {
3116  return rb_check_convert_type(re, T_REGEXP, "Regexp", "to_regexp");
3117 }
3118 
3119 /*
3120  * call-seq:
3121  * Regexp.try_convert(obj) -> re or nil
3122  *
3123  * Try to convert <i>obj</i> into a Regexp, using to_regexp method.
3124  * Returns converted regexp or nil if <i>obj</i> cannot be converted
3125  * for any reason.
3126  *
3127  * Regexp.try_convert(/re/) #=> /re/
3128  * Regexp.try_convert("re") #=> nil
3129  *
3130  * o = Object.new
3131  * Regexp.try_convert(o) #=> nil
3132  * def o.to_regexp() /foo/ end
3133  * Regexp.try_convert(o) #=> /foo/
3134  *
3135  */
3136 static VALUE
3138 {
3139  return rb_check_regexp_type(re);
3140 }
3141 
3142 static VALUE
3144 {
3145  long argc = RARRAY_LEN(args0);
3146 
3147  if (argc == 0) {
3148  VALUE args[1];
3149  args[0] = rb_str_new2("(?!)");
3150  return rb_class_new_instance(1, args, rb_cRegexp);
3151  }
3152  else if (argc == 1) {
3153  VALUE arg = rb_ary_entry(args0, 0);
3154  VALUE re = rb_check_regexp_type(arg);
3155  if (!NIL_P(re))
3156  return re;
3157  else {
3158  VALUE quoted;
3159  quoted = rb_reg_s_quote(Qnil, arg);
3160  return rb_reg_new_str(quoted, 0);
3161  }
3162  }
3163  else {
3164  int i;
3165  VALUE source = rb_str_buf_new(0);
3166  rb_encoding *result_enc;
3167 
3168  int has_asciionly = 0;
3169  rb_encoding *has_ascii_compat_fixed = 0;
3170  rb_encoding *has_ascii_incompat = 0;
3171 
3172  for (i = 0; i < argc; i++) {
3173  volatile VALUE v;
3174  VALUE e = rb_ary_entry(args0, i);
3175 
3176  if (0 < i)
3177  rb_str_buf_cat_ascii(source, "|");
3178 
3179  v = rb_check_regexp_type(e);
3180  if (!NIL_P(v)) {
3181  rb_encoding *enc = rb_enc_get(v);
3182  if (!rb_enc_asciicompat(enc)) {
3183  if (!has_ascii_incompat)
3184  has_ascii_incompat = enc;
3185  else if (has_ascii_incompat != enc)
3186  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3187  rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
3188  }
3189  else if (rb_reg_fixed_encoding_p(v)) {
3190  if (!has_ascii_compat_fixed)
3191  has_ascii_compat_fixed = enc;
3192  else if (has_ascii_compat_fixed != enc)
3193  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3194  rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
3195  }
3196  else {
3197  has_asciionly = 1;
3198  }
3199  v = rb_reg_to_s(v);
3200  }
3201  else {
3202  rb_encoding *enc;
3203  StringValue(e);
3204  enc = rb_enc_get(e);
3205  if (!rb_enc_str_asciicompat_p(e)) {
3206  if (!has_ascii_incompat)
3207  has_ascii_incompat = enc;
3208  else if (has_ascii_incompat != enc)
3209  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3210  rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
3211  }
3212  else if (rb_enc_str_asciionly_p(e)) {
3213  has_asciionly = 1;
3214  }
3215  else {
3216  if (!has_ascii_compat_fixed)
3217  has_ascii_compat_fixed = enc;
3218  else if (has_ascii_compat_fixed != enc)
3219  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3220  rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
3221  }
3222  v = rb_reg_s_quote(Qnil, e);
3223  }
3224  if (has_ascii_incompat) {
3225  if (has_asciionly) {
3226  rb_raise(rb_eArgError, "ASCII incompatible encoding: %s",
3227  rb_enc_name(has_ascii_incompat));
3228  }
3229  if (has_ascii_compat_fixed) {
3230  rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
3231  rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
3232  }
3233  }
3234 
3235  if (i == 0) {
3236  rb_enc_copy(source, v);
3237  }
3238  rb_str_append(source, v);
3239  }
3240 
3241  if (has_ascii_incompat) {
3242  result_enc = has_ascii_incompat;
3243  }
3244  else if (has_ascii_compat_fixed) {
3245  result_enc = has_ascii_compat_fixed;
3246  }
3247  else {
3248  result_enc = rb_ascii8bit_encoding();
3249  }
3250 
3251  rb_enc_associate(source, result_enc);
3252  return rb_class_new_instance(1, &source, rb_cRegexp);
3253  }
3254 }
3255 
3256 /*
3257  * call-seq:
3258  * Regexp.union(pat1, pat2, ...) -> new_regexp
3259  * Regexp.union(pats_ary) -> new_regexp
3260  *
3261  * Return a <code>Regexp</code> object that is the union of the given
3262  * <em>pattern</em>s, i.e., will match any of its parts. The <em>pattern</em>s
3263  * can be Regexp objects, in which case their options will be preserved, or
3264  * Strings. If no patterns are given, returns <code>/(?!)/</code>.
3265  * The behavior is unspecified if any given <em>pattern</em> contains capture.
3266  *
3267  * Regexp.union #=> /(?!)/
3268  * Regexp.union("penzance") #=> /penzance/
3269  * Regexp.union("a+b*c") #=> /a\+b\*c/
3270  * Regexp.union("skiing", "sledding") #=> /skiing|sledding/
3271  * Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/
3272  * Regexp.union(/dogs/, /cats/i) #=> /(?-mix:dogs)|(?i-mx:cats)/
3273  *
3274  * Note: the arguments for ::union will try to be converted into a regular
3275  * expression literal via #to_regexp.
3276  */
3277 static VALUE
3279 {
3280  VALUE v;
3281  if (RARRAY_LEN(args) == 1 &&
3282  !NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
3283  return rb_reg_s_union(self, v);
3284  }
3285  return rb_reg_s_union(self, args);
3286 }
3287 
3288 /* :nodoc: */
3289 static VALUE
3291 {
3292  onig_errmsg_buffer err = "";
3293  const char *s;
3294  long len;
3295 
3296  if (!OBJ_INIT_COPY(copy, re)) return copy;
3297  rb_reg_check(re);
3298  s = RREGEXP_SRC_PTR(re);
3299  len = RREGEXP_SRC_LEN(re);
3300  if (rb_reg_initialize(copy, s, len, rb_enc_get(re), rb_reg_options(re),
3301  err, NULL, 0) != 0) {
3302  rb_reg_raise(s, len, err, re);
3303  }
3304  return copy;
3305 }
3306 
3307 VALUE
3308 rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
3309 {
3310  VALUE val = 0;
3311  char *p, *s, *e;
3312  int no, clen;
3313  rb_encoding *str_enc = rb_enc_get(str);
3314  rb_encoding *src_enc = rb_enc_get(src);
3315  int acompat = rb_enc_asciicompat(str_enc);
3316 #define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
3317 
3318  p = s = RSTRING_PTR(str);
3319  e = s + RSTRING_LEN(str);
3320 
3321  while (s < e) {
3322  int c = ASCGET(s, e, &clen);
3323  char *ss;
3324 
3325  if (c == -1) {
3326  s += mbclen(s, e, str_enc);
3327  continue;
3328  }
3329  ss = s;
3330  s += clen;
3331 
3332  if (c != '\\' || s == e) continue;
3333 
3334  if (!val) {
3335  val = rb_str_buf_new(ss-p);
3336  }
3337  rb_enc_str_buf_cat(val, p, ss-p, str_enc);
3338 
3339  c = ASCGET(s, e, &clen);
3340  if (c == -1) {
3341  s += mbclen(s, e, str_enc);
3342  rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
3343  p = s;
3344  continue;
3345  }
3346  s += clen;
3347 
3348  p = s;
3349  switch (c) {
3350  case '1': case '2': case '3': case '4':
3351  case '5': case '6': case '7': case '8': case '9':
3352  if (onig_noname_group_capture_is_active(RREGEXP(regexp)->ptr)) {
3353  no = c - '0';
3354  }
3355  else {
3356  continue;
3357  }
3358  break;
3359 
3360  case 'k':
3361  if (s < e && ASCGET(s, e, &clen) == '<') {
3362  char *name, *name_end;
3363 
3364  name_end = name = s + clen;
3365  while (name_end < e) {
3366  c = ASCGET(name_end, e, &clen);
3367  if (c == '>') break;
3368  name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
3369  }
3370  if (name_end < e) {
3371  VALUE n = rb_str_subseq(str, (long)(name - RSTRING_PTR(str)),
3372  (long)(name_end - name));
3373  if (!rb_enc_compatible(RREGEXP(regexp)->src, n) ||
3374  (no = name_to_backref_number(regs, regexp, name, name_end)) < 1) {
3376  }
3377  p = s = name_end + clen;
3378  break;
3379  }
3380  else {
3381  rb_raise(rb_eRuntimeError, "invalid group name reference format");
3382  }
3383  }
3384 
3385  rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
3386  continue;
3387 
3388  case '0':
3389  case '&':
3390  no = 0;
3391  break;
3392 
3393  case '`':
3394  rb_enc_str_buf_cat(val, RSTRING_PTR(src), BEG(0), src_enc);
3395  continue;
3396 
3397  case '\'':
3398  rb_enc_str_buf_cat(val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
3399  continue;
3400 
3401  case '+':
3402  no = regs->num_regs-1;
3403  while (BEG(no) == -1 && no > 0) no--;
3404  if (no == 0) continue;
3405  break;
3406 
3407  case '\\':
3408  rb_enc_str_buf_cat(val, s-clen, clen, str_enc);
3409  continue;
3410 
3411  default:
3412  rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
3413  continue;
3414  }
3415 
3416  if (no >= 0) {
3417  if (no >= regs->num_regs) continue;
3418  if (BEG(no) == -1) continue;
3419  rb_enc_str_buf_cat(val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
3420  }
3421  }
3422 
3423  if (!val) return str;
3424  if (p < e) {
3425  rb_enc_str_buf_cat(val, p, e-p, str_enc);
3426  }
3427 
3428  return val;
3429 }
3430 
3431 static VALUE
3433 {
3434  rb_warn("variable $KCODE is no longer effective");
3435  return Qnil;
3436 }
3437 
3438 static void
3440 {
3441  rb_warn("variable $KCODE is no longer effective; ignored");
3442 }
3443 
3444 static VALUE
3446 {
3447  rb_warn("variable $= is no longer effective");
3448  return Qfalse;
3449 }
3450 
3451 static void
3453 {
3454  rb_warn("variable $= is no longer effective; ignored");
3455 }
3456 
3457 static VALUE
3459 {
3461 
3462  if (NIL_P(match)) return Qnil;
3463  rb_match_busy(match);
3464  return match;
3465 }
3466 
3467 static void
3469 {
3470  if (!NIL_P(val)) {
3471  Check_Type(val, T_MATCH);
3472  }
3473  rb_backref_set(val);
3474 }
3475 
3476 /*
3477  * call-seq:
3478  * Regexp.last_match -> matchdata
3479  * Regexp.last_match(n) -> str
3480  *
3481  * The first form returns the MatchData object generated by the
3482  * last successful pattern match. Equivalent to reading the special global
3483  * variable <code>$~</code> (see Special global variables in Regexp for
3484  * details).
3485  *
3486  * The second form returns the <i>n</i>th field in this MatchData object.
3487  * _n_ can be a string or symbol to reference a named capture.
3488  *
3489  * Note that the last_match is local to the thread and method scope of the
3490  * method that did the pattern match.
3491  *
3492  * /c(.)t/ =~ 'cat' #=> 0
3493  * Regexp.last_match #=> #<MatchData "cat" 1:"a">
3494  * Regexp.last_match(0) #=> "cat"
3495  * Regexp.last_match(1) #=> "a"
3496  * Regexp.last_match(2) #=> nil
3497  *
3498  * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
3499  * Regexp.last_match #=> #<MatchData "var = val" lhs:"var" rhs:"val">
3500  * Regexp.last_match(:lhs) #=> "var"
3501  * Regexp.last_match(:rhs) #=> "val"
3502  */
3503 
3504 static VALUE
3506 {
3507  VALUE nth;
3508 
3509  if (argc > 0 && rb_scan_args(argc, argv, "01", &nth) == 1) {
3511  int n;
3512  if (NIL_P(match)) return Qnil;
3513  n = match_backref_number(match, nth);
3514  return rb_reg_nth_match(n, match);
3515  }
3516  return match_getter();
3517 }
3518 
3519 static void
3520 re_warn(const char *s)
3521 {
3522  rb_warn("%s", s);
3523 }
3524 
3525 /*
3526  * Document-class: RegexpError
3527  *
3528  * Raised when given an invalid regexp expression.
3529  *
3530  * Regexp.new("?")
3531  *
3532  * <em>raises the exception:</em>
3533  *
3534  * RegexpError: target of repeat operator is not specified: /?/
3535  */
3536 
3537 /*
3538  * Document-class: Regexp
3539  *
3540  * A <code>Regexp</code> holds a regular expression, used to match a pattern
3541  * against strings. Regexps are created using the <code>/.../</code> and
3542  * <code>%r{...}</code> literals, and by the <code>Regexp::new</code>
3543  * constructor.
3544  *
3545  * :include: doc/regexp.rdoc
3546  */
3547 
3548 void
3550 {
3552 
3557 
3563 
3567 
3568  rb_cRegexp = rb_define_class("Regexp", rb_cObject);
3569  rb_define_alloc_func(rb_cRegexp, rb_reg_s_alloc);
3570  rb_define_singleton_method(rb_cRegexp, "compile", rb_class_new_instance, -1);
3571  rb_define_singleton_method(rb_cRegexp, "quote", rb_reg_s_quote, 1);
3572  rb_define_singleton_method(rb_cRegexp, "escape", rb_reg_s_quote, 1);
3573  rb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2);
3574  rb_define_singleton_method(rb_cRegexp, "last_match", rb_reg_s_last_match, -1);
3575  rb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1);
3576 
3577  rb_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1);
3578  rb_define_method(rb_cRegexp, "initialize_copy", rb_reg_init_copy, 1);
3579  rb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
3580  rb_define_method(rb_cRegexp, "eql?", rb_reg_equal, 1);
3581  rb_define_method(rb_cRegexp, "==", rb_reg_equal, 1);
3582  rb_define_method(rb_cRegexp, "=~", rb_reg_match, 1);
3583  rb_define_method(rb_cRegexp, "===", rb_reg_eqq, 1);
3584  rb_define_method(rb_cRegexp, "~", rb_reg_match2, 0);
3585  rb_define_method(rb_cRegexp, "match", rb_reg_match_m, -1);
3586  rb_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0);
3587  rb_define_method(rb_cRegexp, "inspect", rb_reg_inspect, 0);
3588  rb_define_method(rb_cRegexp, "source", rb_reg_source, 0);
3589  rb_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
3590  rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
3591  rb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
3592  rb_define_method(rb_cRegexp, "fixed_encoding?", rb_reg_fixed_encoding_p, 0);
3593  rb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
3594  rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
3595 
3596  /* see Regexp.options and Regexp.new */
3597  rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
3598  /* see Regexp.options and Regexp.new */
3599  rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
3600  /* see Regexp.options and Regexp.new */
3601  rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
3602  /* see Regexp.options and Regexp.new */
3603  rb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED));
3604  /* see Regexp.options and Regexp.new */
3605  rb_define_const(rb_cRegexp, "NOENCODING", INT2FIX(ARG_ENCODING_NONE));
3606 
3607  rb_global_variable(&reg_cache);
3608 
3609  rb_cMatch = rb_define_class("MatchData", rb_cObject);
3610  rb_define_alloc_func(rb_cMatch, match_alloc);
3611  rb_undef_method(CLASS_OF(rb_cMatch), "new");
3612 
3613  rb_define_method(rb_cMatch, "initialize_copy", match_init_copy, 1);
3614  rb_define_method(rb_cMatch, "regexp", match_regexp, 0);
3615  rb_define_method(rb_cMatch, "names", match_names, 0);
3616  rb_define_method(rb_cMatch, "size", match_size, 0);
3617  rb_define_method(rb_cMatch, "length", match_size, 0);
3618  rb_define_method(rb_cMatch, "offset", match_offset, 1);
3619  rb_define_method(rb_cMatch, "begin", match_begin, 1);
3620  rb_define_method(rb_cMatch, "end", match_end, 1);
3621  rb_define_method(rb_cMatch, "to_a", match_to_a, 0);
3622  rb_define_method(rb_cMatch, "[]", match_aref, -1);
3623  rb_define_method(rb_cMatch, "captures", match_captures, 0);
3624  rb_define_method(rb_cMatch, "values_at", match_values_at, -1);
3625  rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0);
3626  rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0);
3627  rb_define_method(rb_cMatch, "to_s", match_to_s, 0);
3628  rb_define_method(rb_cMatch, "inspect", match_inspect, 0);
3629  rb_define_method(rb_cMatch, "string", match_string, 0);
3630  rb_define_method(rb_cMatch, "hash", match_hash, 0);
3631  rb_define_method(rb_cMatch, "eql?", match_equal, 1);
3632  rb_define_method(rb_cMatch, "==", match_equal, 1);
3633 }
VALUE rb_reg_match(VALUE re, VALUE str)
Definition: re.c:2765
static VALUE rb_reg_source(VALUE re)
Definition: re.c:468
static VALUE match_hash(VALUE match)
Definition: re.c:2643
static VALUE rb_reg_hash(VALUE re)
Definition: re.c:2585
#define T_SYMBOL
Definition: ruby.h:494
VALUE rb_eStandardError
Definition: error.c:546
#define ASCGET(s, e, cl)
void onig_set_warn_func(OnigWarnFunc f)
Definition: regparse.c:96
Definition: re.h:44
#define IS_NULL(p)
Definition: regint.h:276
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:139
#define ONIGENC_CASE_FOLD_DEFAULT
Definition: oniguruma.h:129
int onig_new(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5958
#define rb_str_new4
Definition: intern.h:842
VALUE rb_str_length(VALUE)
Definition: string.c:1298
int onig_foreach_name(regex_t *reg, int(*func)(const UChar *, const UChar *, int, int *, regex_t *, void *), void *arg)
Definition: regparse.c:537
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1171
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:140
static VALUE rb_reg_options_m(VALUE re)
Definition: re.c:719
static void reg_enc_error(VALUE re, VALUE str)
Definition: re.c:1263
#define RARRAY_LEN(a)
Definition: ruby.h:878
void rb_bug(const char *fmt,...)
Definition: error.c:327
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:156
#define FALSE
Definition: nkf.h:174
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:916
VALUE rb_str_equal(VALUE str1, VALUE str2)
Definition: string.c:2543
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
Definition: string.c:340
static VALUE rb_reg_s_last_match(int argc, VALUE *argv)
Definition: re.c:3505
static unsigned int rb_memsearch_qs_utf8_hash(const unsigned char *x)
Definition: re.c:173
size_t strlen(const char *)
#define INT2NUM(x)
Definition: ruby.h:1288
#define scan_oct(s, l, e)
Definition: util.h:50
void rb_backref_set(VALUE)
Definition: vm.c:953
static VALUE rb_reg_match_m(int argc, VALUE *argv, VALUE re)
Definition: re.c:2872
static int unescape_nonascii(const char *p, const char *end, rb_encoding *enc, VALUE buf, rb_encoding **encp, int *has_property, onig_errmsg_buffer err)
Definition: re.c:2189
VALUE rb_id2str(ID id)
Definition: ripper.c:17160
static VALUE rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, const char *err)
Definition: re.c:639
#define T_MATCH
Definition: ruby.h:493
void rb_define_virtual_variable(const char *, VALUE(*)(ANYARGS), void(*)(ANYARGS))
Definition: variable.c:616
#define ONIG_OPTION_NONE
Definition: oniguruma.h:352
#define NUM2INT(x)
Definition: ruby.h:630
static void kcode_setter(VALUE val, ID id)
Definition: re.c:3439
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1655
static long rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
Definition: re.c:205
static VALUE match_inspect(VALUE match)
Definition: re.c:1885
#define FL_TAINT
Definition: ruby.h:1137
UChar * onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar *start, const UChar *s, const UChar *end)
Definition: regenc.c:66
#define CLASS_OF(v)
Definition: ruby.h:440
int onigenc_set_default_encoding(OnigEncoding enc)
Definition: regenc.c:48
#define Qtrue
Definition: ruby.h:426
st_index_t rb_hash_end(st_index_t)
#define ARG_ENCODING_FIXED
Definition: re.c:264
static char * option_to_str(char str[4], int options)
Definition: re.c:290
#define OBJ_INIT_COPY(obj, orig)
Definition: intern.h:287
OnigPosition * end
Definition: oniguruma.h:616
static VALUE match_equal(VALUE match1, VALUE match2)
Definition: re.c:2667
void onig_region_copy(OnigRegion *to, OnigRegion *from)
Definition: regexec.c:331
static VALUE match_getter(void)
Definition: re.c:3458
static int append_utf8(unsigned long uv, VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
Definition: re.c:2102
#define RGENGC_WB_PROTECTED_REGEXP
Definition: ruby.h:726
#define REG_ENCODING_NONE
Definition: re.c:258
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:102
static VALUE kcode_getter(void)
Definition: re.c:3432
VALUE rb_eTypeError
Definition: error.c:548
static VALUE rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
Definition: re.c:2930
static VALUE rb_reg_equal(VALUE re1, VALUE re2)
Definition: re.c:2619
static int unescape_unicode_bmp(const char **pp, const char *end, VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
Definition: re.c:2166
#define rb_check_arity
Definition: intern.h:296
static VALUE rb_reg_s_alloc(VALUE klass)
Definition: re.c:2487
static VALUE rb_reg_desc(const char *s, long len, VALUE re)
Definition: re.c:425
st_table * names
Definition: encoding.c:50
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1436
static int pair_byte_cmp(const void *pair1, const void *pair2)
Definition: re.c:887
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:896
VALUE rb_str_buf_new2(const char *)
static VALUE match_string(VALUE match)
Definition: re.c:1839
static void update_char_offset(VALUE match)
Definition: re.c:898
#define SYM2ID(x)
Definition: ruby.h:356
static void ignorecase_setter(VALUE val, ID id)
Definition: re.c:3452
st_index_t rb_memhash(const void *ptr, long len)
Definition: random.c:1302
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:849
int onig_error_code_to_str(UChar *s, OnigPosition code, va_alist)
Definition: regerror.c:258
static VALUE match_to_s(VALUE match)
Definition: re.c:1816
VALUE rb_backref_get(void)
Definition: vm.c:947
static int onig_new_with_source(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: re.c:807
int rb_enc_str_coderange(VALUE)
Definition: string.c:435
#define Check_Type(v, t)
Definition: ruby.h:532
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1857
static int name_to_backref_number(struct re_registers *regs, VALUE regexp, const char *name, const char *name_end)
Definition: re.c:1692
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:826
struct re_registers regs
Definition: re.h:37
long byte_pos
Definition: re.c:882
VALUE rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
Definition: re.c:3308
#define rb_utf8_encindex()
Definition: internal.h:403
#define RB_GC_GUARD(v)
Definition: ruby.h:523
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
static VALUE reg_cache
Definition: re.c:2562
NORETURN(static void name_to_backref_error(VALUE name))
int rb_reg_backref_number(VALUE match, VALUE backref)
Definition: re.c:1100
static void rb_reg_expr_str(VALUE str, const char *s, long len, rb_encoding *enc, rb_encoding *resenc)
Definition: re.c:335
st_index_t rb_str_hash(VALUE)
Definition: string.c:2422
static long rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
Definition: re.c:113
VALUE rb_eSecurityError
Definition: error.c:557
#define ONIG_ENCODING_ASCII
Definition: oniguruma.h:183
st_data_t st_index_t
Definition: st.h:48
static VALUE match_aref(int argc, VALUE *argv, VALUE match)
Definition: re.c:1734
Definition: re.c:881
static VALUE rb_reg_casefold_p(VALUE re)
Definition: re.c:687
#define rb_enc_mbmaxlen(enc)
Definition: encoding.h:129
static int match_backref_number(VALUE match, VALUE backref)
Definition: re.c:1065
int char_offset_num_allocated
Definition: re.h:40
#define FIXNUM_P(f)
Definition: ruby.h:347
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1242
unsigned int OnigOptionType
Definition: oniguruma.h:347
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1506
VALUE rb_str_buf_append(VALUE, VALUE)
Definition: string.c:2282
static VALUE match_captures(VALUE match)
Definition: re.c:1686
static VALUE last_paren_match_getter(void)
Definition: re.c:1607
static int unescape_escaped_nonascii(const char **pp, const char *end, rb_encoding *enc, VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
Definition: re.c:2039
#define OBJ_TAINTED(x)
Definition: ruby.h:1176
#define ENC_CODERANGE_7BIT
Definition: encoding.h:49
const char * rb_obj_classname(VALUE)
Definition: variable.c:406
#define rb_ary_new2
Definition: intern.h:90
unsigned char OnigUChar
Definition: oniguruma.h:111
static void rb_reg_raise_str(VALUE str, int options, const char *err)
Definition: re.c:669
const UChar * name
Definition: re.c:1846
VALUE rb_reg_init_str(VALUE re, VALUE s, int options)
Definition: re.c:2511
VALUE rb_str_buf_cat(VALUE, const char *, long)
Definition: string.c:2124
#define NEWOBJ_OF(obj, type, klass, flags)
Definition: ruby.h:694
long rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
Definition: re.c:1378
void rb_global_variable(VALUE *var)
Definition: gc.c:4962
VALUE rb_reg_new_str(VALUE s, int options)
Definition: re.c:2505
long beg
Definition: re.h:32
int onig_compile(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: regcomp.c:5675
void rb_exc_raise(VALUE mesg)
Definition: eval.c:567
static int rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err, const char *sourcefile, int sourceline)
Definition: re.c:2464
static VALUE rb_reg_s_union(VALUE self, VALUE args0)
Definition: re.c:3143
static VALUE match_alloc(VALUE klass)
Definition: re.c:868
#define RB_TYPE_P(obj, type)
Definition: ruby.h:1664
VALUE rb_reg_last_match(VALUE match)
Definition: re.c:1509
#define MEMZERO(p, type, n)
Definition: ruby.h:1351
VALUE rb_lastline_get(void)
Definition: vm.c:959
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1351
#define FL_TEST(x, f)
Definition: ruby.h:1169
VALUE rb_reg_match_post(VALUE match)
Definition: re.c:1554
static st_index_t reg_hash(VALUE re)
Definition: re.c:2592
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Definition: re.c:1307
#define scan_hex(s, l, e)
Definition: util.h:52
void onigenc_set_default_caseconv_table(const UChar *table ARG_UNUSED)
Definition: regenc.c:368
VALUE rb_class_new_instance(int, VALUE *, VALUE)
Definition: object.c:1855
static VALUE ignorecase_getter(void)
Definition: re.c:3445
static int read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err)
Definition: re.c:1933
static VALUE rb_reg_preprocess_dregexp(VALUE ary, int options)
Definition: re.c:2348
int rb_block_given_p(void)
Definition: eval.c:712
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:1393
static int reg_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end, int back_num, int *back_refs, OnigRegex regex, void *arg)
Definition: re.c:760
static int check_unicode_range(unsigned long code, onig_errmsg_buffer err)
Definition: re.c:2091
static void match_check(VALUE match)
Definition: re.c:969
#define val
#define RREGEXP_SRC_PTR(r)
Definition: ruby.h:916
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:1553
VALUE rb_eRuntimeError
Definition: error.c:547
VALUE rb_reg_new_ary(VALUE ary, int opt)
Definition: re.c:2523
#define MBCLEN_NEEDMORE_P(ret)
Definition: encoding.h:142
#define RSTRING_END(str)
Definition: ruby.h:849
void Init_Regexp(void)
Definition: re.c:3549
static VALUE rb_reg_error_desc(VALUE str, int options, const char *err)
Definition: re.c:662
#define mbclen(p, e, enc)
Definition: regex.h:33
VALUE rb_ary_new(void)
Definition: array.c:495
VALUE rb_str_buf_cat2(VALUE, const char *)
Definition: string.c:2134
#define ONIG_MAX_ERROR_MESSAGE_LEN
Definition: oniguruma.h:345
#define snprintf
Definition: subst.h:6
#define NIL_P(v)
Definition: ruby.h:438
static VALUE rb_reg_fixed_encoding_p(VALUE re)
Definition: re.c:1249
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:630
VALUE rb_reg_nth_match(int nth, VALUE match)
Definition: re.c:1483
static int reg_names_iter(const OnigUChar *name, const OnigUChar *name_end, int back_num, int *back_refs, OnigRegex regex, void *arg)
Definition: re.c:726
int onig_name_to_backref_number(regex_t *reg, const UChar *name, const UChar *name_end, OnigRegion *region)
Definition: regparse.c:870
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2225
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:790
static Regexp * make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_buffer err, const char *sourcefile, int sourceline)
Definition: re.c:829
static VALUE match_values_at(int argc, VALUE *argv, VALUE match)
Definition: re.c:1795
void onig_region_free(OnigRegion *r, int free_self)
Definition: regexec.c:315
int onig_reg_init(regex_t *reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
Definition: regcomp.c:5898
void onig_set_verb_warn_func(OnigWarnFunc f)
Definition: regparse.c:101
#define TYPE(x)
Definition: ruby.h:505
int argc
Definition: ruby.c:131
#define Qfalse
Definition: ruby.h:425
static VALUE match_regexp(VALUE match)
Definition: re.c:1018
#define ALLOCA_N(type, n)
Definition: ruby.h:1337
int rb_uv_to_utf8(char[6], unsigned long)
Definition: pack.c:1900
#define range(low, item, hi)
Definition: date_strftime.c:21
#define ENC_CODERANGE_UNKNOWN
Definition: encoding.h:48
#define rb_enc_isprint(c, enc)
Definition: encoding.h:184
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1352
#define ENC_CODERANGE_BROKEN
Definition: encoding.h:51
VALUE rb_eEncCompatError
Definition: error.c:555
#define rb_str_new2
Definition: intern.h:840
int err
Definition: win32.c:114
static void match_setter(VALUE val)
Definition: re.c:3468
VALUE rb_cMatch
Definition: re.c:865
void rb_match_busy(VALUE match)
Definition: re.c:1214
VALUE rb_check_regexp_type(VALUE re)
Definition: re.c:3114
VALUE rb_sym_to_s(VALUE)
Definition: string.c:8478
static VALUE rb_reg_inspect(VALUE re)
Definition: re.c:491
VALUE rb_eIndexError
Definition: error.c:550
#define ENC_CODERANGE_VALID
Definition: encoding.h:50
VALUE rb_reg_match2(VALUE re)
Definition: re.c:2824
#define ALLOC(type)
Definition: ruby.h:1334
#define END(no)
Definition: re.c:26
static VALUE reg_operand(VALUE s, int check)
Definition: re.c:2683
VALUE rb_str_resize(VALUE, long)
Definition: string.c:2025
long rb_str_offset(VALUE, long)
Definition: string.c:1781
int rb_reg_options(VALUE re)
Definition: re.c:3102
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:970
long rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
Definition: re.c:227
VALUE rb_str_subseq(VALUE, long, long)
Definition: string.c:1839
struct re_pattern_buffer * ptr
Definition: ruby.h:911
#define RSTRING_LEN(str)
Definition: ruby.h:841
VALUE rb_yield(VALUE)
Definition: vm_eval.c:942
static void rb_reg_raise(const char *s, long len, const char *err, VALUE re)
Definition: re.c:631
static void rb_enc_reg_raise(const char *s, long len, rb_encoding *enc, int options, const char *err)
Definition: re.c:656
#define REALLOC_N(var, type, n)
Definition: ruby.h:1335
#define TRUE
Definition: nkf.h:175
static VALUE rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc, rb_encoding **fixed_enc, onig_errmsg_buffer err)
Definition: re.c:2295
ONIG_EXTERN const OnigSyntaxType * OnigDefaultSyntax
Definition: oniguruma.h:416
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1250
#define rb_enc_isspace(c, enc)
Definition: encoding.h:185
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:958
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:496
#define rb_enc_name(enc)
Definition: encoding.h:125
#define malloc
Definition: ripper.c:96
static VALUE match_to_a(VALUE match)
Definition: re.c:1667
VALUE rb_hash_new(void)
Definition: hash.c:298
#define MATCH_BUSY
Definition: re.c:1211
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1728
#define REG_LITERAL
Definition: re.c:257
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
VALUE rb_assoc_new(VALUE car, VALUE cdr)
Definition: array.c:616
#define PRIsVALUE
Definition: ruby.h:137
VALUE rb_get_values_at(VALUE obj, long olen, int argc, VALUE *argv, VALUE(*func)(VALUE, long))
Definition: array.c:2718
unsigned long ID
Definition: ruby.h:89
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1257
static void re_warn(const char *s)
Definition: re.c:3520
VALUE rb_reg_eqq(VALUE re, VALUE str)
Definition: re.c:2795
#define Qnil
Definition: ruby.h:427
static VALUE postmatch_getter(void)
Definition: re.c:1601
static int options(unsigned char *cp)
Definition: nkf.c:6357
#define OBJ_TAINT(x)
Definition: ruby.h:1177
unsigned long VALUE
Definition: ruby.h:88
static VALUE result
Definition: nkf.c:40
#define RBASIC(obj)
Definition: ruby.h:1116
#define rb_enc_str_asciicompat_p(str)
Definition: encoding.h:200
static VALUE rb_reg_s_quote(VALUE c, VALUE str)
Definition: re.c:3096
VALUE rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options)
Definition: re.c:2529
#define FIX2INT(x)
Definition: ruby.h:632
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:930
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.h:245
#define rb_enc_asciicompat(enc)
Definition: encoding.h:188
OnigPosition * beg
Definition: oniguruma.h:615
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
static VALUE match_end(VALUE match, VALUE n)
Definition: re.c:1195
static VALUE rb_reg_s_union_m(VALUE self, VALUE args)
Definition: re.c:3278
VALUE rb_fstring(VALUE)
Definition: string.c:201
static VALUE last_match_getter(void)
Definition: re.c:1589
static VALUE match_names(VALUE match)
Definition: re.c:1039
static long reg_match_pos(VALUE re, VALUE *strp, long pos)
Definition: re.c:2694
static void name_to_backref_error(VALUE name)
Definition: re.c:1700
#define CHAR_BIT
Definition: ruby.h:198
Definition: re.h:36
#define FL_UNSET(x, f)
Definition: ruby.h:1173
int rb_memcicmp(const void *x, const void *y, long len)
Definition: re.c:80
#define UChar
Definition: oniguruma.h:108
#define StringValueCStr(v)
Definition: ruby.h:541
static VALUE match_begin(VALUE match, VALUE n)
Definition: re.c:1160
#define RMATCH_REGS(obj)
Definition: re.h:52
static VALUE rb_reg_to_s(VALUE re)
Definition: re.c:521
#define RSTRING_PTR(str)
Definition: ruby.h:845
#define KCODE_FIXED
Definition: re.c:260
#define rb_exc_new3
Definition: intern.h:248
#define ENCODING_GET(obj)
Definition: encoding.h:38
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:832
int onig_noname_group_capture_is_active(regex_t *reg)
Definition: regparse.c:924
static VALUE prematch_getter(void)
Definition: re.c:1595
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end)
Definition: oniguruma.h:236
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
Definition: string.c:4747
char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN]
Definition: re.c:22
VALUE rb_reg_quote(VALUE str)
Definition: re.c:2984
#define INT2FIX(i)
Definition: ruby.h:231
long rb_enc_strlen(const char *, const char *, rb_encoding *)
Definition: string.c:1141
OnigPosition onig_search(regex_t *reg, const UChar *str, const UChar *end, const UChar *start, const UChar *range, OnigRegion *region, OnigOptionType option)
Definition: regexec.c:3902
#define MBCLEN_INVALID_P(ret)
Definition: encoding.h:141
#define RARRAY_AREF(a, i)
Definition: ruby.h:901
VALUE rb_check_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2631
static VALUE match_init_copy(VALUE obj, VALUE orig)
Definition: re.c:978
VALUE rb_cRegexp
Definition: re.c:1930
VALUE rb_str_buf_cat_ascii(VALUE, const char *)
Definition: string.c:2258
void rb_set_errinfo(VALUE err)
Definition: eval.c:1504
static int unescape_unicode_list(const char **pp, const char *end, VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
Definition: re.c:2129
VALUE rb_check_array_type(VALUE ary)
Definition: array.c:628
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
Definition: re.c:1346
typedefRUBY_SYMBOL_EXPORT_BEGIN struct re_pattern_buffer Regexp
Definition: re.h:29
#define ONIG_OPTION_MULTILINE
Definition: oniguruma.h:355
VALUE rb_str_catf(VALUE str, const char *format,...)
Definition: sprintf.c:1290
#define FL_WB_PROTECTED
Definition: ruby.h:1134
VALUE rb_check_string_type(VALUE)
Definition: string.c:1679
VALUE rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
Definition: re.c:2548
uint8_t key[16]
Definition: random.c:1250
VALUE rb_any_to_s(VALUE)
Definition: object.c:452
#define ONIGERR_MEMORY
Definition: oniguruma.h:528
#define ONIG_OPTION_EXTEND
Definition: oniguruma.h:354
static int char_to_option(int c)
Definition: re.c:268
#define LONG2FIX(i)
Definition: ruby.h:232
#define SIZEOF_VALUE
Definition: ruby.h:91
#define RTEST(v)
Definition: ruby.h:437
#define T_STRING
Definition: ruby.h:482
#define ONIG_MISMATCH
Definition: oniguruma.h:524
VALUE rb_reg_alloc(void)
Definition: re.c:2499
static VALUE match_size(VALUE match)
Definition: re.c:1058
static VALUE match_entry(VALUE match, long n)
Definition: re.c:1774
#define OBJ_INFECT(x, s)
Definition: ruby.h:1180
#define RREGEXP(obj)
Definition: ruby.h:1122
static Bigint * diff(Bigint *a, Bigint *b)
Definition: util.c:1466
st_index_t rb_hash_uint(st_index_t, st_index_t)
VALUE rb_reg_match_last(VALUE match)
Definition: re.c:1572
#define ARG_REG_OPTION_MASK
Definition: re.c:262
static rb_encoding * rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
Definition: re.c:1272
long rb_str_sublen(VALUE, long)
Definition: string.c:1828
#define rb_enc_left_char_head(s, p, e, enc)
Definition: encoding.h:170
VALUE rb_str_inspect(VALUE)
Definition: string.c:4792
#define BEG(no)
Definition: re.c:25
static unsigned int hash(const char *str, unsigned int len)
Definition: lex.c:56
VALUE rb_eRegexpError
Definition: re.c:20
int rb_char_to_option_kcode(int c, int *option, int *kcode)
Definition: re.c:301
static int match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end, int back_num, int *back_refs, OnigRegex regex, void *arg0)
Definition: re.c:1851
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
Definition: string.c:548
struct rmatch_offset * char_offset
Definition: re.h:41
#define rb_safe_level()
Definition: tcltklib.c:95
unsigned long ruby_scan_hex(const char *, size_t, size_t *)
Definition: util.c:42
#define RREGEXP_SRC_LEN(r)
Definition: ruby.h:917
const char * name
Definition: nkf.c:208
#define FL_SET(x, f)
Definition: ruby.h:1172
#define ONIG_OPTION_DEFAULT
Definition: oniguruma.h:349
static VALUE rb_reg_named_captures(VALUE re)
Definition: re.c:798
const char * rb_id2name(ID id)
Definition: ripper.c:17230
#define StringValuePtr(v)
Definition: ruby.h:540
long end
Definition: re.h:33
#define ONIG_OPTION_IGNORECASE
Definition: oniguruma.h:353
OnigEncoding enc
Definition: oniguruma.h:677
#define RMATCH(obj)
Definition: re.h:51
long char_pos
Definition: re.c:883
static VALUE rb_reg_s_try_convert(VALUE dummy, VALUE re)
Definition: re.c:3137
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1227
#define rb_check_frozen(obj)
Definition: intern.h:277
static VALUE rb_reg_names(VALUE re)
Definition: re.c:751
static void rb_reg_check(VALUE re)
Definition: re.c:327
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Definition: transcode.c:2885
long len
Definition: re.c:1847
VALUE rb_reg_nth_defined(int nth, VALUE match)
Definition: re.c:1465
#define ARG_ENCODING_NONE
Definition: re.c:265
static VALUE rb_reg_init_copy(VALUE copy, VALUE re)
Definition: re.c:3290
#define rb_enc_mbcput(c, buf, enc)
Definition: encoding.h:165
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:448
static long rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
Definition: re.c:153
VALUE rb_reg_new(const char *s, long len, int options)
Definition: re.c:2542
VALUE rb_str_buf_new(long)
Definition: string.c:891
#define SYMBOL_P(x)
Definition: ruby.h:354
#define rb_str_new3
Definition: intern.h:841
OnigOptionType options
Definition: oniguruma.h:678
VALUE rb_reg_regcomp(VALUE str)
Definition: re.c:2565
#define NULL
Definition: _sdbm.c:103
#define RREGEXP_SRC(r)
Definition: ruby.h:915
void onig_free(regex_t *reg)
Definition: regcomp.c:5587
static int match(VALUE str, VALUE pat, VALUE hash, int(*cb)(VALUE, VALUE))
Definition: date_parse.c:273
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1488
#define ruby_verbose
Definition: ruby.h:1475
VALUE rb_str_append(VALUE, VALUE)
Definition: string.c:2298
#define rb_ascii8bit_encindex()
Definition: internal.h:402
VALUE rb_reg_match_pre(VALUE match)
Definition: re.c:1527
void rb_warn(const char *fmt,...)
Definition: error.c:223
VALUE rb_str_to_str(VALUE)
Definition: string.c:964
static VALUE match_array(VALUE match, int start)
Definition: re.c:1613
VALUE rb_eArgError
Definition: error.c:549
#define T_REGEXP
Definition: ruby.h:483
#define NUM2LONG(x)
Definition: ruby.h:600
st_index_t rb_hash_start(st_index_t)
Definition: random.c:1296
static VALUE match_offset(VALUE match, VALUE n)
Definition: re.c:1124
unsigned long ruby_scan_oct(const char *, size_t, size_t *)
Definition: util.c:28
int char_offset_updated
Definition: re.h:39
#define RB_OBJ_WRITE(a, slot, b)
Definition: ruby.h:1213
VALUE rb_ary_aref(int argc, VALUE *argv, VALUE ary)
Definition: array.c:1234
VALUE rb_reg_check_preprocess(VALUE str)
Definition: re.c:2325
char ** argv
Definition: ruby.c:132
Definition: ruby.h:909
#define ISSPACE(c)
Definition: ruby.h:1770
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Definition: string.c:2251
#define StringValue(v)
Definition: ruby.h:539
static int rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc, int options, onig_errmsg_buffer err, const char *sourcefile, int sourceline)
Definition: re.c:2406
int rb_memcmp(const void *p1, const void *p2, long len)
Definition: re.c:95
VALUE rb_str_new(const char *, long)
Definition: string.c:534
#define errcpy(err, msg)
Definition: re.c:23
#define ONIGENC_MBC_MAXLEN(enc)
Definition: oniguruma.h:262