Ruby  2.1.3p242(2014-09-19revision47630)
string.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  string.c -
4 
5  $Author: nagachika $
6  created at: Mon Aug 9 17:12:58 JST 1993
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9  Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
10  Copyright (C) 2000 Information-technology Promotion Agency, Japan
11 
12 **********************************************************************/
13 
14 #include "ruby/ruby.h"
15 #include "ruby/re.h"
16 #include "ruby/encoding.h"
17 #include "vm_core.h"
18 #include "internal.h"
19 #include "probes.h"
20 #include <assert.h>
21 
22 #define BEG(no) (regs->beg[(no)])
23 #define END(no) (regs->end[(no)])
24 
25 #include <math.h>
26 #include <ctype.h>
27 
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 
32 #define STRING_ENUMERATORS_WANTARRAY 0 /* next major */
33 
34 #undef rb_str_new_cstr
35 #undef rb_tainted_str_new_cstr
36 #undef rb_usascii_str_new_cstr
37 #undef rb_enc_str_new_cstr
38 #undef rb_external_str_new_cstr
39 #undef rb_locale_str_new_cstr
40 #undef rb_str_dup_frozen
41 #undef rb_str_buf_new_cstr
42 #undef rb_str_buf_cat2
43 #undef rb_str_cat2
44 
45 static VALUE rb_str_clear(VALUE str);
46 
49 
50 #define RUBY_MAX_CHAR_LEN 16
51 #define STR_TMPLOCK FL_USER7
52 #define STR_UNSET_NOCAPA(s) do {\
53  if (FL_TEST((s),STR_NOEMBED)) FL_UNSET((s),(ELTS_SHARED|STR_ASSOC));\
54 } while (0)
55 
56 #define STR_SET_NOEMBED(str) do {\
57  FL_SET((str), STR_NOEMBED);\
58  STR_SET_EMBED_LEN((str), 0);\
59 } while (0)
60 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED)
61 #define STR_SET_EMBED_LEN(str, n) do { \
62  long tmp_n = (n);\
63  RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
64  RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
65 } while (0)
66 
67 #define STR_SET_LEN(str, n) do { \
68  if (STR_EMBED_P(str)) {\
69  STR_SET_EMBED_LEN((str), (n));\
70  }\
71  else {\
72  RSTRING(str)->as.heap.len = (n);\
73  }\
74 } while (0)
75 
76 #define STR_DEC_LEN(str) do {\
77  if (STR_EMBED_P(str)) {\
78  long n = RSTRING_LEN(str);\
79  n--;\
80  STR_SET_EMBED_LEN((str), n);\
81  }\
82  else {\
83  RSTRING(str)->as.heap.len--;\
84  }\
85 } while (0)
86 
87 #define TERM_LEN(str) rb_enc_mbminlen(rb_enc_get(str))
88 #define TERM_FILL(ptr, termlen) do {\
89  char *const term_fill_ptr = (ptr);\
90  const int term_fill_len = (termlen);\
91  *term_fill_ptr = '\0';\
92  if (UNLIKELY(term_fill_len > 1))\
93  memset(term_fill_ptr, 0, term_fill_len);\
94 } while (0)
95 
96 #define RESIZE_CAPA(str,capacity) do {\
97  const int termlen = TERM_LEN(str);\
98  if (STR_EMBED_P(str)) {\
99  if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
100  char *const tmp = ALLOC_N(char, (capacity)+termlen);\
101  const long tlen = RSTRING_LEN(str);\
102  memcpy(tmp, RSTRING_PTR(str), tlen);\
103  RSTRING(str)->as.heap.ptr = tmp;\
104  RSTRING(str)->as.heap.len = tlen;\
105  STR_SET_NOEMBED(str);\
106  RSTRING(str)->as.heap.aux.capa = (capacity);\
107  }\
108  }\
109  else {\
110  REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+termlen);\
111  if (!STR_NOCAPA_P(str))\
112  RSTRING(str)->as.heap.aux.capa = (capacity);\
113  }\
114 } while (0)
115 
116 #define STR_SET_SHARED(str, shared_str) do { \
117  RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
118  FL_SET((str), ELTS_SHARED); \
119 } while (0)
120 
121 #define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
122 #define STR_HEAP_SIZE(str) (RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
123 
124 #define STR_ENC_GET(str) get_encoding(str)
125 
127 
128 static rb_encoding *
129 get_actual_encoding(const int encidx, VALUE str)
130 {
131  const unsigned char *q;
132 
133  switch (encidx) {
134  case ENCINDEX_UTF_16:
135  if (RSTRING_LEN(str) < 2) break;
136  q = (const unsigned char *)RSTRING_PTR(str);
137  if (q[0] == 0xFE && q[1] == 0xFF) {
138  return rb_enc_get_from_index(ENCINDEX_UTF_16BE);
139  }
140  if (q[0] == 0xFF && q[1] == 0xFE) {
141  return rb_enc_get_from_index(ENCINDEX_UTF_16LE);
142  }
143  return rb_ascii8bit_encoding();
144  case ENCINDEX_UTF_32:
145  if (RSTRING_LEN(str) < 4) break;
146  q = (const unsigned char *)RSTRING_PTR(str);
147  if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF) {
148  return rb_enc_get_from_index(ENCINDEX_UTF_32BE);
149  }
150  if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF) {
151  return rb_enc_get_from_index(ENCINDEX_UTF_32LE);
152  }
153  return rb_ascii8bit_encoding();
154  }
155  return rb_enc_from_index(encidx);
156 }
157 
158 static rb_encoding *
160 {
161  return get_actual_encoding(ENCODING_GET(str), str);
162 }
163 
164 static int fstring_cmp(VALUE a, VALUE b);
165 
167 
168 static const struct st_hash_type fstring_hash_type = {
169  fstring_cmp,
170  rb_str_hash,
171 };
172 
173 static int
175 {
176  VALUE *fstr = (VALUE *)arg;
177  VALUE str = (VALUE)*key;
178 
179  if (existing) {
180  /* because of lazy sweep, str may be unmarked already and swept
181  * at next time */
182  rb_gc_resurrect(*fstr = *key);
183  return ST_STOP;
184  }
185 
186  if (STR_SHARED_P(str)) {
187  /* str should not be shared */
188  str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), STR_ENC_GET(str));
189  OBJ_FREEZE(str);
190  }
191  else {
192  str = rb_str_new_frozen(str);
193  }
194  RBASIC(str)->flags |= RSTRING_FSTR;
195 
196  *key = *value = *fstr = str;
197  return ST_CONTINUE;
198 }
199 
200 VALUE
202 {
203  VALUE fstr = Qnil;
204  Check_Type(str, T_STRING);
205 
206  if (!frozen_strings)
207  frozen_strings = st_init_table(&fstring_hash_type);
208 
209  if (FL_TEST(str, RSTRING_FSTR))
210  return str;
211 
212  st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&fstr);
213  return fstr;
214 }
215 
216 static int
218 {
219  RBASIC_SET_CLASS((VALUE)key, (VALUE)arg);
220  return ST_CONTINUE;
221 }
222 
223 static int
225 {
226  int cmp = rb_str_hash_cmp(a, b);
227  if (cmp != 0) {
228  return cmp;
229  }
230  return ENCODING_GET(b) - ENCODING_GET(a);
231 }
232 
233 static inline int
235 {
236  rb_encoding *enc;
237 
238  /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
239  if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
240  return 1;
241 
242  enc = STR_ENC_GET(str);
243  if (rb_enc_mbmaxlen(enc) == 1)
244  return 1;
245 
246  /* Conservative. Possibly single byte.
247  * "\xa1" in Shift_JIS for example. */
248  return 0;
249 }
250 
252 
253 static inline const char *
254 search_nonascii(const char *p, const char *e)
255 {
256 #if SIZEOF_VALUE == 8
257 # define NONASCII_MASK 0x8080808080808080ULL
258 #elif SIZEOF_VALUE == 4
259 # define NONASCII_MASK 0x80808080UL
260 #endif
261 #ifdef NONASCII_MASK
262  if ((int)sizeof(VALUE) * 2 < e - p) {
263  const VALUE *s, *t;
264  const VALUE lowbits = sizeof(VALUE) - 1;
265  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
266  while (p < (const char *)s) {
267  if (!ISASCII(*p))
268  return p;
269  p++;
270  }
271  t = (const VALUE*)(~lowbits & (VALUE)e);
272  while (s < t) {
273  if (*s & NONASCII_MASK) {
274  t = s;
275  break;
276  }
277  s++;
278  }
279  p = (const char *)t;
280  }
281 #endif
282  while (p < e) {
283  if (!ISASCII(*p))
284  return p;
285  p++;
286  }
287  return NULL;
288 }
289 
290 static int
291 coderange_scan(const char *p, long len, rb_encoding *enc)
292 {
293  const char *e = p + len;
294 
295  if (rb_enc_to_index(enc) == 0) {
296  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
297  p = search_nonascii(p, e);
299  }
300 
301  if (rb_enc_asciicompat(enc)) {
302  p = search_nonascii(p, e);
303  if (!p) {
304  return ENC_CODERANGE_7BIT;
305  }
306  while (p < e) {
307  int ret = rb_enc_precise_mbclen(p, e, enc);
308  if (!MBCLEN_CHARFOUND_P(ret)) {
309  return ENC_CODERANGE_BROKEN;
310  }
311  p += MBCLEN_CHARFOUND_LEN(ret);
312  if (p < e) {
313  p = search_nonascii(p, e);
314  if (!p) {
315  return ENC_CODERANGE_VALID;
316  }
317  }
318  }
319  if (e < p) {
320  return ENC_CODERANGE_BROKEN;
321  }
322  return ENC_CODERANGE_VALID;
323  }
324 
325  while (p < e) {
326  int ret = rb_enc_precise_mbclen(p, e, enc);
327 
328  if (!MBCLEN_CHARFOUND_P(ret)) {
329  return ENC_CODERANGE_BROKEN;
330  }
331  p += MBCLEN_CHARFOUND_LEN(ret);
332  }
333  if (e < p) {
334  return ENC_CODERANGE_BROKEN;
335  }
336  return ENC_CODERANGE_VALID;
337 }
338 
339 long
340 rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
341 {
342  const char *p = s;
343 
344  if (*cr == ENC_CODERANGE_BROKEN)
345  return e - s;
346 
347  if (rb_enc_to_index(enc) == 0) {
348  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
349  p = search_nonascii(p, e);
351  return e - s;
352  }
353  else if (rb_enc_asciicompat(enc)) {
354  p = search_nonascii(p, e);
355  if (!p) {
356  if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
357  return e - s;
358  }
359  while (p < e) {
360  int ret = rb_enc_precise_mbclen(p, e, enc);
361  if (!MBCLEN_CHARFOUND_P(ret)) {
363  return p - s;
364  }
365  p += MBCLEN_CHARFOUND_LEN(ret);
366  if (p < e) {
367  p = search_nonascii(p, e);
368  if (!p) {
369  *cr = ENC_CODERANGE_VALID;
370  return e - s;
371  }
372  }
373  }
375  return p - s;
376  }
377  else {
378  while (p < e) {
379  int ret = rb_enc_precise_mbclen(p, e, enc);
380  if (!MBCLEN_CHARFOUND_P(ret)) {
382  return p - s;
383  }
384  p += MBCLEN_CHARFOUND_LEN(ret);
385  }
387  return p - s;
388  }
389 }
390 
391 static inline void
393 {
394  rb_enc_set_index(str1, ENCODING_GET(str2));
395 }
396 
397 static void
399 {
400  /* this function is designed for copying encoding and coderange
401  * from src to new string "dest" which is made from the part of src.
402  */
403  str_enc_copy(dest, src);
404  if (RSTRING_LEN(dest) == 0) {
405  if (!rb_enc_asciicompat(STR_ENC_GET(src)))
407  else
409  return;
410  }
411  switch (ENC_CODERANGE(src)) {
412  case ENC_CODERANGE_7BIT:
414  break;
415  case ENC_CODERANGE_VALID:
416  if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
419  else
421  break;
422  default:
423  break;
424  }
425 }
426 
427 static void
429 {
430  str_enc_copy(dest, src);
431  ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
432 }
433 
434 int
436 {
437  int cr = ENC_CODERANGE(str);
438 
439  if (cr == ENC_CODERANGE_UNKNOWN) {
440  rb_encoding *enc = STR_ENC_GET(str);
441  cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
442  ENC_CODERANGE_SET(str, cr);
443  }
444  return cr;
445 }
446 
447 int
449 {
450  rb_encoding *enc = STR_ENC_GET(str);
451 
452  if (!rb_enc_asciicompat(enc))
453  return FALSE;
454  else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
455  return TRUE;
456  return FALSE;
457 }
458 
459 static inline void
460 str_mod_check(VALUE s, const char *p, long len)
461 {
462  if (RSTRING_PTR(s) != p || RSTRING_LEN(s) != len){
463  rb_raise(rb_eRuntimeError, "string modified");
464  }
465 }
466 
467 size_t
469 {
470  if (STR_EMBED_P(str)) {
471  return RSTRING_EMBED_LEN_MAX;
472  }
473  else if (STR_NOCAPA_P(str)) {
474  return RSTRING(str)->as.heap.len;
475  }
476  else {
477  return RSTRING(str)->as.heap.aux.capa;
478  }
479 }
480 
481 static inline VALUE
483 {
485  return (VALUE)str;
486 }
487 
488 static inline VALUE
490 {
493  }
494  return str_alloc(klass);
495 }
496 
497 static VALUE
498 str_new0(VALUE klass, const char *ptr, long len, int termlen)
499 {
500  VALUE str;
501 
502  if (len < 0) {
503  rb_raise(rb_eArgError, "negative string size (or size too big)");
504  }
505 
508  }
509 
510  str = str_alloc(klass);
511  if (len > RSTRING_EMBED_LEN_MAX) {
512  RSTRING(str)->as.heap.aux.capa = len;
513  RSTRING(str)->as.heap.ptr = ALLOC_N(char, len + termlen);
514  STR_SET_NOEMBED(str);
515  }
516  else if (len == 0) {
518  }
519  if (ptr) {
520  memcpy(RSTRING_PTR(str), ptr, len);
521  }
522  STR_SET_LEN(str, len);
523  TERM_FILL(RSTRING_PTR(str) + len, termlen);
524  return str;
525 }
526 
527 static VALUE
528 str_new(VALUE klass, const char *ptr, long len)
529 {
530  return str_new0(klass, ptr, len, 1);
531 }
532 
533 VALUE
534 rb_str_new(const char *ptr, long len)
535 {
536  return str_new(rb_cString, ptr, len);
537 }
538 
539 VALUE
540 rb_usascii_str_new(const char *ptr, long len)
541 {
542  VALUE str = rb_str_new(ptr, len);
544  return str;
545 }
546 
547 VALUE
548 rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
549 {
550  VALUE str;
551 
552  if (!enc) return rb_str_new(ptr, len);
553 
554  str = str_new0(rb_cString, ptr, len, rb_enc_mbminlen(enc));
555  rb_enc_associate(str, enc);
556  return str;
557 }
558 
559 VALUE
560 rb_str_new_cstr(const char *ptr)
561 {
562  if (!ptr) {
563  rb_raise(rb_eArgError, "NULL pointer given");
564  }
565  return rb_str_new(ptr, strlen(ptr));
566 }
567 
568 VALUE
569 rb_usascii_str_new_cstr(const char *ptr)
570 {
571  VALUE str = rb_str_new2(ptr);
573  return str;
574 }
575 
576 VALUE
577 rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
578 {
579  if (!ptr) {
580  rb_raise(rb_eArgError, "NULL pointer given");
581  }
582  if (rb_enc_mbminlen(enc) != 1) {
583  rb_raise(rb_eArgError, "wchar encoding given");
584  }
585  return rb_enc_str_new(ptr, strlen(ptr), enc);
586 }
587 
588 VALUE
589 rb_tainted_str_new(const char *ptr, long len)
590 {
591  VALUE str = rb_str_new(ptr, len);
592 
593  OBJ_TAINT(str);
594  return str;
595 }
596 
597 VALUE
598 rb_tainted_str_new_cstr(const char *ptr)
599 {
600  VALUE str = rb_str_new2(ptr);
601 
602  OBJ_TAINT(str);
603  return str;
604 }
605 
606 VALUE
607 rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
608 {
610  rb_econv_t *ec;
611  rb_econv_result_t ret;
612  long len, olen;
613  VALUE econv_wrapper;
614  VALUE newstr;
615  const unsigned char *start, *sp;
616  unsigned char *dest, *dp;
617  size_t converted_output = 0;
618 
619  if (!to) return str;
620  if (!from) from = rb_enc_get(str);
621  if (from == to) return str;
622  if ((rb_enc_asciicompat(to) && is_ascii_string(str)) ||
623  to == rb_ascii8bit_encoding()) {
624  if (STR_ENC_GET(str) != to) {
625  str = rb_str_dup(str);
626  rb_enc_associate(str, to);
627  }
628  return str;
629  }
630 
631  len = RSTRING_LEN(str);
632  newstr = rb_str_new(0, len);
633  OBJ_INFECT(newstr, str);
634  olen = len;
635 
636  econv_wrapper = rb_obj_alloc(rb_cEncodingConverter);
637  RBASIC_CLEAR_CLASS(econv_wrapper);
638  ec = rb_econv_open_opts(from->name, to->name, ecflags, ecopts);
639  if (!ec) return str;
640  DATA_PTR(econv_wrapper) = ec;
641 
642  sp = (unsigned char*)RSTRING_PTR(str);
643  start = sp;
644  while ((dest = (unsigned char*)RSTRING_PTR(newstr)),
645  (dp = dest + converted_output),
646  (ret = rb_econv_convert(ec, &sp, start + len, &dp, dest + olen, 0)),
648  /* destination buffer short */
649  size_t converted_input = sp - start;
650  size_t rest = len - converted_input;
651  converted_output = dp - dest;
652  rb_str_set_len(newstr, converted_output);
653  if (converted_input && converted_output &&
654  rest < (LONG_MAX / converted_output)) {
655  rest = (rest * converted_output) / converted_input;
656  }
657  else {
658  rest = olen;
659  }
660  olen += rest < 2 ? 2 : rest;
661  rb_str_resize(newstr, olen);
662  }
663  DATA_PTR(econv_wrapper) = 0;
664  rb_econv_close(ec);
665  rb_gc_force_recycle(econv_wrapper);
666  switch (ret) {
667  case econv_finished:
668  len = dp - (unsigned char*)RSTRING_PTR(newstr);
669  rb_str_set_len(newstr, len);
670  rb_enc_associate(newstr, to);
671  return newstr;
672 
673  default:
674  /* some error, return original */
675  return str;
676  }
677 }
678 
679 VALUE
681 {
682  return rb_str_conv_enc_opts(str, from, to, 0, Qnil);
683 }
684 
685 VALUE
686 rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
687 {
688  VALUE str;
689 
690  str = rb_tainted_str_new(ptr, len);
691  return rb_external_str_with_enc(str, eenc);
692 }
693 
694 VALUE
696 {
697  if (eenc == rb_usascii_encoding() &&
700  return str;
701  }
702  rb_enc_associate(str, eenc);
703  return rb_str_conv_enc(str, eenc, rb_default_internal_encoding());
704 }
705 
706 VALUE
707 rb_external_str_new(const char *ptr, long len)
708 {
710 }
711 
712 VALUE
713 rb_external_str_new_cstr(const char *ptr)
714 {
716 }
717 
718 VALUE
719 rb_locale_str_new(const char *ptr, long len)
720 {
722 }
723 
724 VALUE
725 rb_locale_str_new_cstr(const char *ptr)
726 {
728 }
729 
730 VALUE
731 rb_filesystem_str_new(const char *ptr, long len)
732 {
734 }
735 
736 VALUE
738 {
740 }
741 
742 VALUE
744 {
746 }
747 
748 VALUE
750 {
751  return rb_str_conv_enc(str, STR_ENC_GET(str), rb_locale_encoding());
752 }
753 
754 VALUE
756 {
757  return rb_str_conv_enc(str, STR_ENC_GET(str), enc);
758 }
759 
760 static VALUE
762 {
763  if (RSTRING_LEN(str) <= RSTRING_EMBED_LEN_MAX) {
764  STR_SET_EMBED(str2);
765  memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), RSTRING_LEN(str)+1);
766  STR_SET_EMBED_LEN(str2, RSTRING_LEN(str));
767  }
768  else {
769  str = rb_str_new_frozen(str);
770  FL_SET(str2, STR_NOEMBED);
771  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
772  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
773  STR_SET_SHARED(str2, str);
774  }
775  return str2;
776 }
777 
778 static VALUE
780 {
782  rb_enc_cr_str_exact_copy(str2, str);
783  return str2;
784 }
785 
786 static VALUE
788 {
789  return str_replace_shared(str_alloc(klass), str);
790 }
791 
792 static VALUE
793 str_new3(VALUE klass, VALUE str)
794 {
795  return str_new_shared(klass, str);
796 }
797 
798 VALUE
800 {
801  VALUE str2 = str_new3(rb_obj_class(str), str);
802 
803  OBJ_INFECT(str2, str);
804  return str2;
805 }
806 
807 static VALUE
808 str_new4(VALUE klass, VALUE str)
809 {
810  VALUE str2;
811 
812  str2 = str_alloc(klass);
813  STR_SET_NOEMBED(str2);
814  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
815  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
816  if (STR_SHARED_P(str)) {
817  VALUE shared = RSTRING(str)->as.heap.aux.shared;
818  assert(OBJ_FROZEN(shared));
819  STR_SET_SHARED(str2, shared); /* TODO: WB is not needed because str2 is *new* object */
820  }
821  else {
822  if (!STR_ASSOC_P(str)) {
823  RSTRING(str2)->as.heap.aux.capa = RSTRING(str)->as.heap.aux.capa;
824  }
825  STR_SET_SHARED(str, str2);
826  }
827  rb_enc_cr_str_exact_copy(str2, str);
828  OBJ_INFECT(str2, str);
829  return str2;
830 }
831 
832 VALUE
834 {
835  VALUE klass, str;
836 
837  if (OBJ_FROZEN(orig)) return orig;
838  klass = rb_obj_class(orig);
839  if (STR_SHARED_P(orig) && (str = RSTRING(orig)->as.heap.aux.shared)) {
840  long ofs;
841  assert(OBJ_FROZEN(str));
842  ofs = RSTRING_LEN(str) - RSTRING_LEN(orig);
843  if ((ofs > 0) || (klass != RBASIC(str)->klass) ||
844  ((RBASIC(str)->flags ^ RBASIC(orig)->flags) & FL_TAINT) ||
845  ENCODING_GET(str) != ENCODING_GET(orig)) {
846  str = str_new3(klass, str);
847  RSTRING(str)->as.heap.ptr += ofs;
848  RSTRING(str)->as.heap.len -= ofs;
849  rb_enc_cr_str_exact_copy(str, orig);
850  OBJ_INFECT(str, orig);
851  }
852  }
853  else if (STR_EMBED_P(orig)) {
854  str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
855  rb_enc_cr_str_exact_copy(str, orig);
856  OBJ_INFECT(str, orig);
857  }
858  else if (STR_ASSOC_P(orig)) {
859  VALUE assoc = RSTRING(orig)->as.heap.aux.shared;
860  FL_UNSET(orig, STR_ASSOC);
861  str = str_new4(klass, orig);
862  FL_SET(str, STR_ASSOC);
863  RB_OBJ_WRITE(str, &RSTRING(str)->as.heap.aux.shared, assoc);
864  /* TODO: WB is not needed because str is new object */
865  }
866  else {
867  str = str_new4(klass, orig);
868  }
869  OBJ_FREEZE(str);
870  return str;
871 }
872 
873 VALUE
874 rb_str_new_with_class(VALUE obj, const char *ptr, long len)
875 {
876  return str_new(rb_obj_class(obj), ptr, len);
877 }
878 
879 static VALUE
881 {
882  VALUE v = rb_str_new5(str, 0, 0);
883  rb_enc_copy(v, str);
884  OBJ_INFECT(v, str);
885  return v;
886 }
887 
888 #define STR_BUF_MIN_SIZE 128
889 
890 VALUE
891 rb_str_buf_new(long capa)
892 {
893  VALUE str = str_alloc(rb_cString);
894 
895  if (capa < STR_BUF_MIN_SIZE) {
896  capa = STR_BUF_MIN_SIZE;
897  }
898  FL_SET(str, STR_NOEMBED);
899  RSTRING(str)->as.heap.aux.capa = capa;
900  RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
901  RSTRING(str)->as.heap.ptr[0] = '\0';
902 
903  return str;
904 }
905 
906 VALUE
907 rb_str_buf_new_cstr(const char *ptr)
908 {
909  VALUE str;
910  long len = strlen(ptr);
911 
912  str = rb_str_buf_new(len);
913  rb_str_buf_cat(str, ptr, len);
914 
915  return str;
916 }
917 
918 VALUE
919 rb_str_tmp_new(long len)
920 {
921  return str_new(0, 0, len);
922 }
923 
924 void *
925 rb_alloc_tmp_buffer(volatile VALUE *store, long len)
926 {
927  VALUE s = rb_str_tmp_new(len);
928  *store = s;
929  return RSTRING_PTR(s);
930 }
931 
932 void
933 rb_free_tmp_buffer(volatile VALUE *store)
934 {
935  VALUE s = *store;
936  *store = 0;
937  if (s) rb_str_clear(s);
938 }
939 
940 void
942 {
943  if (FL_TEST(str, RSTRING_FSTR)) {
944  st_data_t fstr = (st_data_t)str;
945  st_delete(frozen_strings, &fstr, NULL);
946  }
947  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
949  }
950 }
951 
952 RUBY_FUNC_EXPORTED size_t
954 {
956  return STR_HEAP_SIZE(str);
957  }
958  else {
959  return 0;
960  }
961 }
962 
963 VALUE
965 {
966  return rb_convert_type(str, T_STRING, "String", "to_str");
967 }
968 
969 static inline void str_discard(VALUE str);
970 
971 void
973 {
974  rb_encoding *enc;
975  int cr;
976  if (str == str2) return;
977  enc = STR_ENC_GET(str2);
978  cr = ENC_CODERANGE(str2);
979  str_discard(str);
980  OBJ_INFECT(str, str2);
981  if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
982  STR_SET_EMBED(str);
983  memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
984  STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
985  rb_enc_associate(str, enc);
986  ENC_CODERANGE_SET(str, cr);
987  return;
988  }
989  STR_SET_NOEMBED(str);
990  STR_UNSET_NOCAPA(str);
991  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
992  RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
993  if (STR_NOCAPA_P(str2)) {
994  VALUE shared = RSTRING(str2)->as.heap.aux.shared;
995  FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
996  RB_OBJ_WRITE(str, &RSTRING(str)->as.heap.aux.shared, shared);
997  }
998  else {
999  RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa;
1000  }
1001  STR_SET_EMBED(str2); /* abandon str2 */
1002  RSTRING_PTR(str2)[0] = 0;
1003  STR_SET_EMBED_LEN(str2, 0);
1004  rb_enc_associate(str, enc);
1005  ENC_CODERANGE_SET(str, cr);
1006 }
1007 
1008 static ID id_to_s;
1009 
1010 VALUE
1012 {
1013  VALUE str;
1014 
1015  if (RB_TYPE_P(obj, T_STRING)) {
1016  return obj;
1017  }
1018  str = rb_funcall(obj, id_to_s, 0);
1019  if (!RB_TYPE_P(str, T_STRING))
1020  return rb_any_to_s(obj);
1021  if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
1022  return str;
1023 }
1024 
1025 static VALUE
1027 {
1028  long len;
1029 
1030  len = RSTRING_LEN(str2);
1031  if (STR_ASSOC_P(str2)) {
1032  str2 = rb_str_new4(str2);
1033  }
1034  if (STR_SHARED_P(str2)) {
1035  VALUE shared = RSTRING(str2)->as.heap.aux.shared;
1036  assert(OBJ_FROZEN(shared));
1037  STR_SET_NOEMBED(str);
1038  RSTRING(str)->as.heap.len = len;
1039  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1040  FL_SET(str, ELTS_SHARED);
1041  FL_UNSET(str, STR_ASSOC);
1042  STR_SET_SHARED(str, shared);
1043  }
1044  else {
1045  str_replace_shared(str, str2);
1046  }
1047 
1048  OBJ_INFECT(str, str2);
1049  rb_enc_cr_str_exact_copy(str, str2);
1050  return str;
1051 }
1052 
1053 static VALUE
1055 {
1056  VALUE dup = str_alloc(klass);
1057  str_replace(dup, str);
1058  return dup;
1059 }
1060 
1061 VALUE
1063 {
1064  return str_duplicate(rb_obj_class(str), str);
1065 }
1066 
1067 VALUE
1069 {
1073  }
1074  return str_duplicate(rb_cString, str);
1075 }
1076 
1077 /*
1078  * call-seq:
1079  * String.new(str="") -> new_str
1080  *
1081  * Returns a new string object containing a copy of <i>str</i>.
1082  */
1083 
1084 static VALUE
1086 {
1087  VALUE orig;
1088 
1089  if (argc > 0 && rb_scan_args(argc, argv, "01", &orig) == 1)
1090  rb_str_replace(str, orig);
1091  return str;
1092 }
1093 
1094 static inline long
1095 enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
1096 {
1097  long c;
1098  const char *q;
1099 
1100  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1101  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
1102  }
1103  else if (rb_enc_asciicompat(enc)) {
1104  c = 0;
1105  if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
1106  while (p < e) {
1107  if (ISASCII(*p)) {
1108  q = search_nonascii(p, e);
1109  if (!q)
1110  return c + (e - p);
1111  c += q - p;
1112  p = q;
1113  }
1114  p += rb_enc_fast_mbclen(p, e, enc);
1115  c++;
1116  }
1117  }
1118  else {
1119  while (p < e) {
1120  if (ISASCII(*p)) {
1121  q = search_nonascii(p, e);
1122  if (!q)
1123  return c + (e - p);
1124  c += q - p;
1125  p = q;
1126  }
1127  p += rb_enc_mbclen(p, e, enc);
1128  c++;
1129  }
1130  }
1131  return c;
1132  }
1133 
1134  for (c=0; p<e; c++) {
1135  p += rb_enc_mbclen(p, e, enc);
1136  }
1137  return c;
1138 }
1139 
1140 long
1141 rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
1142 {
1143  return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN);
1144 }
1145 
1146 long
1147 rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
1148 {
1149  long c;
1150  const char *q;
1151  int ret;
1152 
1153  *cr = 0;
1154  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1155  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
1156  }
1157  else if (rb_enc_asciicompat(enc)) {
1158  c = 0;
1159  while (p < e) {
1160  if (ISASCII(*p)) {
1161  q = search_nonascii(p, e);
1162  if (!q) {
1163  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1164  return c + (e - p);
1165  }
1166  c += q - p;
1167  p = q;
1168  }
1169  ret = rb_enc_precise_mbclen(p, e, enc);
1170  if (MBCLEN_CHARFOUND_P(ret)) {
1171  *cr |= ENC_CODERANGE_VALID;
1172  p += MBCLEN_CHARFOUND_LEN(ret);
1173  }
1174  else {
1175  *cr = ENC_CODERANGE_BROKEN;
1176  p++;
1177  }
1178  c++;
1179  }
1180  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1181  return c;
1182  }
1183 
1184  for (c=0; p<e; c++) {
1185  ret = rb_enc_precise_mbclen(p, e, enc);
1186  if (MBCLEN_CHARFOUND_P(ret)) {
1187  *cr |= ENC_CODERANGE_VALID;
1188  p += MBCLEN_CHARFOUND_LEN(ret);
1189  }
1190  else {
1191  *cr = ENC_CODERANGE_BROKEN;
1192  if (p + rb_enc_mbminlen(enc) <= e)
1193  p += rb_enc_mbminlen(enc);
1194  else
1195  p = e;
1196  }
1197  }
1198  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1199  return c;
1200 }
1201 
1202 #ifdef NONASCII_MASK
1203 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1204 
1205 /*
1206  * UTF-8 leading bytes have either 0xxxxxxx or 11xxxxxx
1207  * bit representation. (see http://en.wikipedia.org/wiki/UTF-8)
1208  * Therefore, following pseudo code can detect UTF-8 leading byte.
1209  *
1210  * if (!(byte & 0x80))
1211  * byte |= 0x40; // turn on bit6
1212  * return ((byte>>6) & 1); // bit6 represent it's leading byte or not.
1213  *
1214  * This function calculate every bytes in the argument word `s'
1215  * using the above logic concurrently. and gather every bytes result.
1216  */
1217 static inline VALUE
1218 count_utf8_lead_bytes_with_word(const VALUE *s)
1219 {
1220  VALUE d = *s;
1221 
1222  /* Transform into bit0 represent UTF-8 leading or not. */
1223  d |= ~(d>>1);
1224  d >>= 6;
1225  d &= NONASCII_MASK >> 7;
1226 
1227  /* Gather every bytes. */
1228  d += (d>>8);
1229  d += (d>>16);
1230 #if SIZEOF_VALUE == 8
1231  d += (d>>32);
1232 #endif
1233  return (d&0xF);
1234 }
1235 #endif
1236 
1237 static long
1239 {
1240  const char *p, *e;
1241  long n;
1242  int cr;
1243 
1244  if (single_byte_optimizable(str)) return RSTRING_LEN(str);
1245  if (!enc) enc = STR_ENC_GET(str);
1246  p = RSTRING_PTR(str);
1247  e = RSTRING_END(str);
1248  cr = ENC_CODERANGE(str);
1249 #ifdef NONASCII_MASK
1250  if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1251  enc == rb_utf8_encoding()) {
1252 
1253  VALUE len = 0;
1254  if ((int)sizeof(VALUE) * 2 < e - p) {
1255  const VALUE *s, *t;
1256  const VALUE lowbits = sizeof(VALUE) - 1;
1257  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1258  t = (const VALUE*)(~lowbits & (VALUE)e);
1259  while (p < (const char *)s) {
1260  if (is_utf8_lead_byte(*p)) len++;
1261  p++;
1262  }
1263  while (s < t) {
1264  len += count_utf8_lead_bytes_with_word(s);
1265  s++;
1266  }
1267  p = (const char *)s;
1268  }
1269  while (p < e) {
1270  if (is_utf8_lead_byte(*p)) len++;
1271  p++;
1272  }
1273  return (long)len;
1274  }
1275 #endif
1276  n = rb_enc_strlen_cr(p, e, enc, &cr);
1277  if (cr) {
1278  ENC_CODERANGE_SET(str, cr);
1279  }
1280  return n;
1281 }
1282 
1283 long
1285 {
1286  return str_strlen(str, STR_ENC_GET(str));
1287 }
1288 
1289 /*
1290  * call-seq:
1291  * str.length -> integer
1292  * str.size -> integer
1293  *
1294  * Returns the character length of <i>str</i>.
1295  */
1296 
1297 VALUE
1299 {
1300  long len;
1301 
1302  len = str_strlen(str, STR_ENC_GET(str));
1303  return LONG2NUM(len);
1304 }
1305 
1306 /*
1307  * call-seq:
1308  * str.bytesize -> integer
1309  *
1310  * Returns the length of +str+ in bytes.
1311  *
1312  * "\x80\u3042".bytesize #=> 4
1313  * "hello".bytesize #=> 5
1314  */
1315 
1316 static VALUE
1318 {
1319  return LONG2NUM(RSTRING_LEN(str));
1320 }
1321 
1322 /*
1323  * call-seq:
1324  * str.empty? -> true or false
1325  *
1326  * Returns <code>true</code> if <i>str</i> has a length of zero.
1327  *
1328  * "hello".empty? #=> false
1329  * " ".empty? #=> false
1330  * "".empty? #=> true
1331  */
1332 
1333 static VALUE
1335 {
1336  if (RSTRING_LEN(str) == 0)
1337  return Qtrue;
1338  return Qfalse;
1339 }
1340 
1341 /*
1342  * call-seq:
1343  * str + other_str -> new_str
1344  *
1345  * Concatenation---Returns a new <code>String</code> containing
1346  * <i>other_str</i> concatenated to <i>str</i>.
1347  *
1348  * "Hello from " + self.to_s #=> "Hello from main"
1349  */
1350 
1351 VALUE
1353 {
1354  VALUE str3;
1355  rb_encoding *enc;
1356 
1357  StringValue(str2);
1358  enc = rb_enc_check(str1, str2);
1359  str3 = rb_str_new(0, RSTRING_LEN(str1)+RSTRING_LEN(str2));
1360  memcpy(RSTRING_PTR(str3), RSTRING_PTR(str1), RSTRING_LEN(str1));
1361  memcpy(RSTRING_PTR(str3) + RSTRING_LEN(str1),
1362  RSTRING_PTR(str2), RSTRING_LEN(str2));
1363  RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
1364 
1365  if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
1366  OBJ_TAINT(str3);
1369  return str3;
1370 }
1371 
1372 /*
1373  * call-seq:
1374  * str * integer -> new_str
1375  *
1376  * Copy --- Returns a new String containing +integer+ copies of the receiver.
1377  * +integer+ must be greater than or equal to 0.
1378  *
1379  * "Ho! " * 3 #=> "Ho! Ho! Ho! "
1380  * "Ho! " * 0 #=> ""
1381  */
1382 
1383 VALUE
1385 {
1386  VALUE str2;
1387  long n, len;
1388  char *ptr2;
1389 
1390  len = NUM2LONG(times);
1391  if (len < 0) {
1392  rb_raise(rb_eArgError, "negative argument");
1393  }
1394  if (len && LONG_MAX/len < RSTRING_LEN(str)) {
1395  rb_raise(rb_eArgError, "argument too big");
1396  }
1397 
1398  str2 = rb_str_new5(str, 0, len *= RSTRING_LEN(str));
1399  ptr2 = RSTRING_PTR(str2);
1400  if (len) {
1401  n = RSTRING_LEN(str);
1402  memcpy(ptr2, RSTRING_PTR(str), n);
1403  while (n <= len/2) {
1404  memcpy(ptr2 + n, ptr2, n);
1405  n *= 2;
1406  }
1407  memcpy(ptr2 + n, ptr2, len-n);
1408  }
1409  ptr2[RSTRING_LEN(str2)] = '\0';
1410  OBJ_INFECT(str2, str);
1411  rb_enc_cr_str_copy_for_substr(str2, str);
1412 
1413  return str2;
1414 }
1415 
1416 /*
1417  * call-seq:
1418  * str % arg -> new_str
1419  *
1420  * Format---Uses <i>str</i> as a format specification, and returns the result
1421  * of applying it to <i>arg</i>. If the format specification contains more than
1422  * one substitution, then <i>arg</i> must be an <code>Array</code> or <code>Hash</code>
1423  * containing the values to be substituted. See <code>Kernel::sprintf</code> for
1424  * details of the format string.
1425  *
1426  * "%05d" % 123 #=> "00123"
1427  * "%-5s: %08x" % [ "ID", self.object_id ] #=> "ID : 200e14d6"
1428  * "foo = %{foo}" % { :foo => 'bar' } #=> "foo = bar"
1429  */
1430 
1431 static VALUE
1433 {
1434  volatile VALUE tmp = rb_check_array_type(arg);
1435 
1436  if (!NIL_P(tmp)) {
1437  return rb_str_format(RARRAY_LENINT(tmp), RARRAY_CONST_PTR(tmp), str);
1438  }
1439  return rb_str_format(1, &arg, str);
1440 }
1441 
1442 static inline void
1444 {
1445  if (FL_TEST(str, STR_TMPLOCK)) {
1446  rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
1447  }
1448  rb_check_frozen(str);
1449 }
1450 
1451 static inline int
1453 {
1454  str_modifiable(str);
1455  if (!STR_SHARED_P(str)) return 1;
1456  if (STR_EMBED_P(str)) return 1;
1457  return 0;
1458 }
1459 
1460 static void
1462 {
1463  char *ptr;
1464  long len = RSTRING_LEN(str);
1465  const int termlen = TERM_LEN(str);
1466  long capa = len + expand;
1467 
1468  if (len > capa) len = capa;
1469  ptr = ALLOC_N(char, capa + termlen);
1470  if (RSTRING_PTR(str)) {
1471  memcpy(ptr, RSTRING_PTR(str), len);
1472  }
1473  STR_SET_NOEMBED(str);
1474  STR_UNSET_NOCAPA(str);
1475  TERM_FILL(ptr + len, termlen);
1476  RSTRING(str)->as.heap.ptr = ptr;
1477  RSTRING(str)->as.heap.len = len;
1478  RSTRING(str)->as.heap.aux.capa = capa;
1479 }
1480 
1481 #define str_make_independent(str) str_make_independent_expand((str), 0L)
1482 
1483 void
1485 {
1486  if (!str_independent(str))
1487  str_make_independent(str);
1488  ENC_CODERANGE_CLEAR(str);
1489 }
1490 
1491 void
1492 rb_str_modify_expand(VALUE str, long expand)
1493 {
1494  if (expand < 0) {
1495  rb_raise(rb_eArgError, "negative expanding string size");
1496  }
1497  if (!str_independent(str)) {
1498  str_make_independent_expand(str, expand);
1499  }
1500  else if (expand > 0) {
1501  long len = RSTRING_LEN(str);
1502  long capa = len + expand;
1503  int termlen = TERM_LEN(str);
1504  if (!STR_EMBED_P(str)) {
1505  REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa + termlen);
1506  STR_UNSET_NOCAPA(str);
1507  RSTRING(str)->as.heap.aux.capa = capa;
1508  }
1509  else if (capa + termlen > RSTRING_EMBED_LEN_MAX + 1) {
1510  str_make_independent_expand(str, expand);
1511  }
1512  }
1513  ENC_CODERANGE_CLEAR(str);
1514 }
1515 
1516 /* As rb_str_modify(), but don't clear coderange */
1517 static void
1519 {
1520  if (!str_independent(str))
1521  str_make_independent(str);
1522  if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
1523  /* Force re-scan later */
1524  ENC_CODERANGE_CLEAR(str);
1525 }
1526 
1527 static inline void
1529 {
1530  str_modifiable(str);
1531  if (!STR_SHARED_P(str) && !STR_EMBED_P(str)) {
1533  RSTRING(str)->as.heap.ptr = 0;
1534  RSTRING(str)->as.heap.len = 0;
1535  }
1536 }
1537 
1538 void
1540 {
1541  /* sanity check */
1542  rb_check_frozen(str);
1543  if (STR_ASSOC_P(str)) {
1544  /* already associated */
1545  rb_ary_concat(RSTRING(str)->as.heap.aux.shared, add);
1546  }
1547  else {
1548  if (STR_SHARED_P(str)) {
1549  VALUE assoc = RSTRING(str)->as.heap.aux.shared;
1550  str_make_independent(str);
1551  if (STR_ASSOC_P(assoc)) {
1552  assoc = RSTRING(assoc)->as.heap.aux.shared;
1553  rb_ary_concat(assoc, add);
1554  add = assoc;
1555  }
1556  }
1557  else if (STR_EMBED_P(str)) {
1558  str_make_independent(str);
1559  }
1560  else if (RSTRING(str)->as.heap.aux.capa != RSTRING_LEN(str)) {
1561  RESIZE_CAPA(str, RSTRING_LEN(str));
1562  }
1563  FL_SET(str, STR_ASSOC);
1564  RBASIC_CLEAR_CLASS(add);
1565  RB_OBJ_WRITE(str, &RSTRING(str)->as.heap.aux.shared, add);
1566  }
1567 }
1568 
1569 VALUE
1571 {
1572  if (STR_SHARED_P(str)) str = RSTRING(str)->as.heap.aux.shared;
1573  if (STR_ASSOC_P(str)) {
1574  return RSTRING(str)->as.heap.aux.shared;
1575  }
1576  return Qfalse;
1577 }
1578 
1579 void
1581 {
1582  rb_encoding *enc = rb_enc_get(str);
1583  if (!rb_enc_asciicompat(enc)) {
1584  rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
1585  }
1586 }
1587 
1588 VALUE
1589 rb_string_value(volatile VALUE *ptr)
1590 {
1591  VALUE s = *ptr;
1592  if (!RB_TYPE_P(s, T_STRING)) {
1593  s = rb_str_to_str(s);
1594  *ptr = s;
1595  }
1596  return s;
1597 }
1598 
1599 char *
1601 {
1602  VALUE str = rb_string_value(ptr);
1603  return RSTRING_PTR(str);
1604 }
1605 
1606 static int
1607 zero_filled(const char *s, int n)
1608 {
1609  for (; n > 0; --n) {
1610  if (*s++) return 0;
1611  }
1612  return 1;
1613 }
1614 
1615 static const char *
1616 str_null_char(const char *s, long len, const int minlen, rb_encoding *enc)
1617 {
1618  const char *e = s + len;
1619 
1620  for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
1621  if (zero_filled(s, minlen)) return s;
1622  }
1623  return 0;
1624 }
1625 
1626 static char *
1627 str_fill_term(VALUE str, char *s, long len, int oldtermlen, int termlen)
1628 {
1629  long capa = rb_str_capacity(str) + 1;
1630 
1631  if (capa < len + termlen) {
1632  rb_str_modify_expand(str, termlen);
1633  }
1634  else if (!str_independent(str)) {
1635  if (zero_filled(s + len, termlen)) return s;
1636  str_make_independent(str);
1637  }
1638  s = RSTRING_PTR(str);
1639  TERM_FILL(s + len, termlen);
1640  return s;
1641 }
1642 
1643 char *
1645 {
1646  VALUE str = rb_string_value(ptr);
1647  char *s = RSTRING_PTR(str);
1648  long len = RSTRING_LEN(str);
1649  rb_encoding *enc = rb_enc_get(str);
1650  const int minlen = rb_enc_mbminlen(enc);
1651 
1652  if (minlen > 1) {
1653  if (str_null_char(s, len, minlen, enc)) {
1654  rb_raise(rb_eArgError, "string contains null char");
1655  }
1656  return str_fill_term(str, s, len, minlen, minlen);
1657  }
1658  if (!s || memchr(s, 0, len)) {
1659  rb_raise(rb_eArgError, "string contains null byte");
1660  }
1661  if (s[len]) {
1662  rb_str_modify(str);
1663  s = RSTRING_PTR(str);
1664  s[RSTRING_LEN(str)] = 0;
1665  }
1666  return s;
1667 }
1668 
1669 void
1670 rb_str_fill_terminator(VALUE str, const int newminlen)
1671 {
1672  char *s = RSTRING_PTR(str);
1673  long len = RSTRING_LEN(str);
1674  rb_encoding *enc = rb_enc_get(str);
1675  str_fill_term(str, s, len, rb_enc_mbminlen(enc), newminlen);
1676 }
1677 
1678 VALUE
1680 {
1681  str = rb_check_convert_type(str, T_STRING, "String", "to_str");
1682  return str;
1683 }
1684 
1685 /*
1686  * call-seq:
1687  * String.try_convert(obj) -> string or nil
1688  *
1689  * Try to convert <i>obj</i> into a String, using to_str method.
1690  * Returns converted string or nil if <i>obj</i> cannot be converted
1691  * for any reason.
1692  *
1693  * String.try_convert("str") #=> "str"
1694  * String.try_convert(/re/) #=> nil
1695  */
1696 static VALUE
1698 {
1699  return rb_check_string_type(str);
1700 }
1701 
1702 static char*
1703 str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
1704 {
1705  long nth = *nthp;
1706  if (rb_enc_mbmaxlen(enc) == 1) {
1707  p += nth;
1708  }
1709  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1710  p += nth * rb_enc_mbmaxlen(enc);
1711  }
1712  else if (rb_enc_asciicompat(enc)) {
1713  const char *p2, *e2;
1714  int n;
1715 
1716  while (p < e && 0 < nth) {
1717  e2 = p + nth;
1718  if (e < e2) {
1719  *nthp = nth;
1720  return (char *)e;
1721  }
1722  if (ISASCII(*p)) {
1723  p2 = search_nonascii(p, e2);
1724  if (!p2) {
1725  nth -= e2 - p;
1726  *nthp = nth;
1727  return (char *)e2;
1728  }
1729  nth -= p2 - p;
1730  p = p2;
1731  }
1732  n = rb_enc_mbclen(p, e, enc);
1733  p += n;
1734  nth--;
1735  }
1736  *nthp = nth;
1737  if (nth != 0) {
1738  return (char *)e;
1739  }
1740  return (char *)p;
1741  }
1742  else {
1743  while (p < e && nth--) {
1744  p += rb_enc_mbclen(p, e, enc);
1745  }
1746  }
1747  if (p > e) p = e;
1748  *nthp = nth;
1749  return (char*)p;
1750 }
1751 
1752 char*
1753 rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
1754 {
1755  return str_nth_len(p, e, &nth, enc);
1756 }
1757 
1758 static char*
1759 str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1760 {
1761  if (singlebyte)
1762  p += nth;
1763  else {
1764  p = str_nth_len(p, e, &nth, enc);
1765  }
1766  if (!p) return 0;
1767  if (p > e) p = e;
1768  return (char *)p;
1769 }
1770 
1771 /* char offset to byte offset */
1772 static long
1773 str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1774 {
1775  const char *pp = str_nth(p, e, nth, enc, singlebyte);
1776  if (!pp) return e - p;
1777  return pp - p;
1778 }
1779 
1780 long
1781 rb_str_offset(VALUE str, long pos)
1782 {
1783  return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
1785 }
1786 
1787 #ifdef NONASCII_MASK
1788 static char *
1789 str_utf8_nth(const char *p, const char *e, long *nthp)
1790 {
1791  long nth = *nthp;
1792  if ((int)SIZEOF_VALUE * 2 < e - p && (int)SIZEOF_VALUE * 2 < nth) {
1793  const VALUE *s, *t;
1794  const VALUE lowbits = sizeof(VALUE) - 1;
1795  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1796  t = (const VALUE*)(~lowbits & (VALUE)e);
1797  while (p < (const char *)s) {
1798  if (is_utf8_lead_byte(*p)) nth--;
1799  p++;
1800  }
1801  do {
1802  nth -= count_utf8_lead_bytes_with_word(s);
1803  s++;
1804  } while (s < t && (int)sizeof(VALUE) <= nth);
1805  p = (char *)s;
1806  }
1807  while (p < e) {
1808  if (is_utf8_lead_byte(*p)) {
1809  if (nth == 0) break;
1810  nth--;
1811  }
1812  p++;
1813  }
1814  *nthp = nth;
1815  return (char *)p;
1816 }
1817 
1818 static long
1819 str_utf8_offset(const char *p, const char *e, long nth)
1820 {
1821  const char *pp = str_utf8_nth(p, e, &nth);
1822  return pp - p;
1823 }
1824 #endif
1825 
1826 /* byte offset to char offset */
1827 long
1828 rb_str_sublen(VALUE str, long pos)
1829 {
1830  if (single_byte_optimizable(str) || pos < 0)
1831  return pos;
1832  else {
1833  char *p = RSTRING_PTR(str);
1834  return enc_strlen(p, p + pos, STR_ENC_GET(str), ENC_CODERANGE(str));
1835  }
1836 }
1837 
1838 VALUE
1839 rb_str_subseq(VALUE str, long beg, long len)
1840 {
1841  VALUE str2;
1842 
1843  if (RSTRING_LEN(str) == beg + len &&
1844  RSTRING_EMBED_LEN_MAX < len) {
1845  str2 = rb_str_new_shared(rb_str_new_frozen(str));
1846  rb_str_drop_bytes(str2, beg);
1847  }
1848  else {
1849  str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
1850  RB_GC_GUARD(str);
1851  }
1852 
1853  rb_enc_cr_str_copy_for_substr(str2, str);
1854  OBJ_INFECT(str2, str);
1855 
1856  return str2;
1857 }
1858 
1859 char *
1860 rb_str_subpos(VALUE str, long beg, long *lenp)
1861 {
1862  long len = *lenp;
1863  long slen = -1L;
1864  long blen = RSTRING_LEN(str);
1865  rb_encoding *enc = STR_ENC_GET(str);
1866  char *p, *s = RSTRING_PTR(str), *e = s + blen;
1867 
1868  if (len < 0) return 0;
1869  if (!blen) {
1870  len = 0;
1871  }
1872  if (single_byte_optimizable(str)) {
1873  if (beg > blen) return 0;
1874  if (beg < 0) {
1875  beg += blen;
1876  if (beg < 0) return 0;
1877  }
1878  if (beg + len > blen)
1879  len = blen - beg;
1880  if (len < 0) return 0;
1881  p = s + beg;
1882  goto end;
1883  }
1884  if (beg < 0) {
1885  if (len > -beg) len = -beg;
1886  if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
1887  beg = -beg;
1888  while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
1889  p = e;
1890  if (!p) return 0;
1891  while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
1892  if (!p) return 0;
1893  len = e - p;
1894  goto end;
1895  }
1896  else {
1897  slen = str_strlen(str, enc);
1898  beg += slen;
1899  if (beg < 0) return 0;
1900  p = s + beg;
1901  if (len == 0) goto end;
1902  }
1903  }
1904  else if (beg > 0 && beg > RSTRING_LEN(str)) {
1905  return 0;
1906  }
1907  if (len == 0) {
1908  if (beg > str_strlen(str, enc)) return 0;
1909  p = s + beg;
1910  }
1911 #ifdef NONASCII_MASK
1912  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1913  enc == rb_utf8_encoding()) {
1914  p = str_utf8_nth(s, e, &beg);
1915  if (beg > 0) return 0;
1916  len = str_utf8_offset(p, e, len);
1917  }
1918 #endif
1919  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1920  int char_sz = rb_enc_mbmaxlen(enc);
1921 
1922  p = s + beg * char_sz;
1923  if (p > e) {
1924  return 0;
1925  }
1926  else if (len * char_sz > e - p)
1927  len = e - p;
1928  else
1929  len *= char_sz;
1930  }
1931  else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
1932  if (beg > 0) return 0;
1933  len = 0;
1934  }
1935  else {
1936  len = str_offset(p, e, len, enc, 0);
1937  }
1938  end:
1939  *lenp = len;
1940  RB_GC_GUARD(str);
1941  return p;
1942 }
1943 
1944 VALUE
1945 rb_str_substr(VALUE str, long beg, long len)
1946 {
1947  VALUE str2;
1948  char *p = rb_str_subpos(str, beg, &len);
1949 
1950  if (!p) return Qnil;
1951  if (len > RSTRING_EMBED_LEN_MAX && p + len == RSTRING_END(str)) {
1952  str2 = rb_str_new4(str);
1953  str2 = str_new3(rb_obj_class(str2), str2);
1954  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
1955  RSTRING(str2)->as.heap.len = len;
1956  }
1957  else {
1958  str2 = rb_str_new5(str, p, len);
1959  OBJ_INFECT(str2, str);
1960  RB_GC_GUARD(str);
1961  }
1962  rb_enc_cr_str_copy_for_substr(str2, str);
1963 
1964  return str2;
1965 }
1966 
1967 VALUE
1969 {
1970  if (STR_ASSOC_P(str)) {
1971  VALUE ary = RSTRING(str)->as.heap.aux.shared;
1972  OBJ_FREEZE(ary);
1973  }
1974  return rb_obj_freeze(str);
1975 }
1976 
1978 #define rb_str_dup_frozen rb_str_new_frozen
1979 
1980 VALUE
1981 rb_str_locktmp(VALUE str)
1982 {
1983  if (FL_TEST(str, STR_TMPLOCK)) {
1984  rb_raise(rb_eRuntimeError, "temporal locking already locked string");
1985  }
1986  FL_SET(str, STR_TMPLOCK);
1987  return str;
1988 }
1989 
1990 VALUE
1992 {
1993  if (!FL_TEST(str, STR_TMPLOCK)) {
1994  rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
1995  }
1996  FL_UNSET(str, STR_TMPLOCK);
1997  return str;
1998 }
1999 
2000 VALUE
2002 {
2003  rb_str_locktmp(str);
2004  return rb_ensure(func, arg, rb_str_unlocktmp, str);
2005 }
2006 
2007 void
2008 rb_str_set_len(VALUE str, long len)
2009 {
2010  long capa;
2011  const int termlen = TERM_LEN(str);
2012 
2013  str_modifiable(str);
2014  if (STR_SHARED_P(str)) {
2015  rb_raise(rb_eRuntimeError, "can't set length of shared string");
2016  }
2017  if (len + termlen - 1 > (capa = (long)rb_str_capacity(str))) {
2018  rb_bug("probable buffer overflow: %ld for %ld", len, capa);
2019  }
2020  STR_SET_LEN(str, len);
2021  TERM_FILL(&RSTRING_PTR(str)[len], termlen);
2022 }
2023 
2024 VALUE
2025 rb_str_resize(VALUE str, long len)
2026 {
2027  long slen;
2028  int independent;
2029 
2030  if (len < 0) {
2031  rb_raise(rb_eArgError, "negative string size (or size too big)");
2032  }
2033 
2034  independent = str_independent(str);
2035  ENC_CODERANGE_CLEAR(str);
2036  slen = RSTRING_LEN(str);
2037  {
2038  long capa;
2039  const int termlen = TERM_LEN(str);
2040  if (STR_EMBED_P(str)) {
2041  if (len == slen) return str;
2042  if (len + termlen <= RSTRING_EMBED_LEN_MAX + 1) {
2043  STR_SET_EMBED_LEN(str, len);
2044  TERM_FILL(RSTRING(str)->as.ary + len, termlen);
2045  return str;
2046  }
2047  str_make_independent_expand(str, len - slen);
2048  STR_SET_NOEMBED(str);
2049  }
2050  else if (len + termlen <= RSTRING_EMBED_LEN_MAX + 1) {
2051  char *ptr = STR_HEAP_PTR(str);
2052  STR_SET_EMBED(str);
2053  if (slen > len) slen = len;
2054  if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen);
2055  TERM_FILL(RSTRING(str)->as.ary + len, termlen);
2056  STR_SET_EMBED_LEN(str, len);
2057  if (independent) ruby_xfree(ptr);
2058  return str;
2059  }
2060  else if (!independent) {
2061  if (len == slen) return str;
2062  str_make_independent_expand(str, len - slen);
2063  }
2064  else if ((capa = RSTRING(str)->as.heap.aux.capa) < len ||
2065  (capa - len) > (len < 1024 ? len : 1024)) {
2066  REALLOC_N(RSTRING(str)->as.heap.ptr, char, len + termlen);
2067  RSTRING(str)->as.heap.aux.capa = len;
2068  }
2069  else if (len == slen) return str;
2070  RSTRING(str)->as.heap.len = len;
2071  TERM_FILL(RSTRING(str)->as.heap.ptr + len, termlen); /* sentinel */
2072  }
2073  return str;
2074 }
2075 
2076 static VALUE
2077 str_buf_cat(VALUE str, const char *ptr, long len)
2078 {
2079  long capa, total, off = -1;
2080  const int termlen = TERM_LEN(str);
2081 
2082  if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
2083  off = ptr - RSTRING_PTR(str);
2084  }
2085  rb_str_modify(str);
2086  if (len == 0) return 0;
2087  if (STR_ASSOC_P(str)) {
2088  FL_UNSET(str, STR_ASSOC);
2089  capa = RSTRING(str)->as.heap.aux.capa = RSTRING_LEN(str);
2090  }
2091  else if (STR_EMBED_P(str)) {
2092  capa = RSTRING_EMBED_LEN_MAX;
2093  }
2094  else {
2095  capa = RSTRING(str)->as.heap.aux.capa;
2096  }
2097  if (RSTRING_LEN(str) >= LONG_MAX - len) {
2098  rb_raise(rb_eArgError, "string sizes too big");
2099  }
2100  total = RSTRING_LEN(str)+len;
2101  if (capa <= total) {
2102  while (total > capa) {
2103  if (capa + termlen >= LONG_MAX / 2) {
2104  capa = (total + 4095) / 4096 * 4096;
2105  break;
2106  }
2107  capa = (capa + termlen) * 2;
2108  }
2109  RESIZE_CAPA(str, capa);
2110  }
2111  if (off != -1) {
2112  ptr = RSTRING_PTR(str) + off;
2113  }
2114  memcpy(RSTRING_PTR(str) + RSTRING_LEN(str), ptr, len);
2115  STR_SET_LEN(str, total);
2116  RSTRING_PTR(str)[total] = '\0'; /* sentinel */
2117 
2118  return str;
2119 }
2120 
2121 #define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
2122 
2123 VALUE
2124 rb_str_buf_cat(VALUE str, const char *ptr, long len)
2125 {
2126  if (len == 0) return str;
2127  if (len < 0) {
2128  rb_raise(rb_eArgError, "negative string size (or size too big)");
2129  }
2130  return str_buf_cat(str, ptr, len);
2131 }
2132 
2133 VALUE
2134 rb_str_buf_cat2(VALUE str, const char *ptr)
2135 {
2136  return rb_str_buf_cat(str, ptr, strlen(ptr));
2137 }
2138 
2139 VALUE
2140 rb_str_cat(VALUE str, const char *ptr, long len)
2141 {
2142  if (len < 0) {
2143  rb_raise(rb_eArgError, "negative string size (or size too big)");
2144  }
2145  if (STR_ASSOC_P(str)) {
2146  char *p;
2147  rb_str_modify_expand(str, len);
2148  p = RSTRING(str)->as.heap.ptr;
2149  memcpy(p + RSTRING(str)->as.heap.len, ptr, len);
2150  len = RSTRING(str)->as.heap.len += len;
2151  TERM_FILL(p, TERM_LEN(str)); /* sentinel */
2152  return str;
2153  }
2154 
2155  return rb_str_buf_cat(str, ptr, len);
2156 }
2157 
2158 VALUE
2159 rb_str_cat2(VALUE str, const char *ptr)
2160 {
2161  return rb_str_cat(str, ptr, strlen(ptr));
2162 }
2163 
2164 static VALUE
2165 rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
2166  int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
2167 {
2168  int str_encindex = ENCODING_GET(str);
2169  int res_encindex;
2170  int str_cr, res_cr;
2171 
2172  str_cr = RSTRING_LEN(str) ? ENC_CODERANGE(str) : ENC_CODERANGE_7BIT;
2173 
2174  if (str_encindex == ptr_encindex) {
2175  if (str_cr == ENC_CODERANGE_UNKNOWN)
2176  ptr_cr = ENC_CODERANGE_UNKNOWN;
2177  else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2178  ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
2179  }
2180  }
2181  else {
2182  rb_encoding *str_enc = rb_enc_from_index(str_encindex);
2183  rb_encoding *ptr_enc = rb_enc_from_index(ptr_encindex);
2184  if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
2185  if (len == 0)
2186  return str;
2187  if (RSTRING_LEN(str) == 0) {
2188  rb_str_buf_cat(str, ptr, len);
2189  ENCODING_CODERANGE_SET(str, ptr_encindex, ptr_cr);
2190  return str;
2191  }
2192  goto incompatible;
2193  }
2194  if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2195  ptr_cr = coderange_scan(ptr, len, ptr_enc);
2196  }
2197  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2198  if (ENCODING_IS_ASCII8BIT(str) || ptr_cr != ENC_CODERANGE_7BIT) {
2199  str_cr = rb_enc_str_coderange(str);
2200  }
2201  }
2202  }
2203  if (ptr_cr_ret)
2204  *ptr_cr_ret = ptr_cr;
2205 
2206  if (str_encindex != ptr_encindex &&
2207  str_cr != ENC_CODERANGE_7BIT &&
2208  ptr_cr != ENC_CODERANGE_7BIT) {
2209  incompatible:
2210  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
2211  rb_enc_name(rb_enc_from_index(str_encindex)),
2212  rb_enc_name(rb_enc_from_index(ptr_encindex)));
2213  }
2214 
2215  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2216  res_encindex = str_encindex;
2217  res_cr = ENC_CODERANGE_UNKNOWN;
2218  }
2219  else if (str_cr == ENC_CODERANGE_7BIT) {
2220  if (ptr_cr == ENC_CODERANGE_7BIT) {
2221  res_encindex = str_encindex;
2222  res_cr = ENC_CODERANGE_7BIT;
2223  }
2224  else {
2225  res_encindex = ptr_encindex;
2226  res_cr = ptr_cr;
2227  }
2228  }
2229  else if (str_cr == ENC_CODERANGE_VALID) {
2230  res_encindex = str_encindex;
2231  if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
2232  res_cr = str_cr;
2233  else
2234  res_cr = ptr_cr;
2235  }
2236  else { /* str_cr == ENC_CODERANGE_BROKEN */
2237  res_encindex = str_encindex;
2238  res_cr = str_cr;
2239  if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN;
2240  }
2241 
2242  if (len < 0) {
2243  rb_raise(rb_eArgError, "negative string size (or size too big)");
2244  }
2245  str_buf_cat(str, ptr, len);
2246  ENCODING_CODERANGE_SET(str, res_encindex, res_cr);
2247  return str;
2248 }
2249 
2250 VALUE
2251 rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
2252 {
2253  return rb_enc_cr_str_buf_cat(str, ptr, len,
2255 }
2256 
2257 VALUE
2258 rb_str_buf_cat_ascii(VALUE str, const char *ptr)
2259 {
2260  /* ptr must reference NUL terminated ASCII string. */
2261  int encindex = ENCODING_GET(str);
2262  rb_encoding *enc = rb_enc_from_index(encindex);
2263  if (rb_enc_asciicompat(enc)) {
2264  return rb_enc_cr_str_buf_cat(str, ptr, strlen(ptr),
2265  encindex, ENC_CODERANGE_7BIT, 0);
2266  }
2267  else {
2268  char *buf = ALLOCA_N(char, rb_enc_mbmaxlen(enc));
2269  while (*ptr) {
2270  unsigned int c = (unsigned char)*ptr;
2271  int len = rb_enc_codelen(c, enc);
2272  rb_enc_mbcput(c, buf, enc);
2273  rb_enc_cr_str_buf_cat(str, buf, len,
2274  encindex, ENC_CODERANGE_VALID, 0);
2275  ptr++;
2276  }
2277  return str;
2278  }
2279 }
2280 
2281 VALUE
2283 {
2284  int str2_cr;
2285 
2286  str2_cr = ENC_CODERANGE(str2);
2287 
2288  rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
2289  ENCODING_GET(str2), str2_cr, &str2_cr);
2290 
2291  OBJ_INFECT(str, str2);
2292  ENC_CODERANGE_SET(str2, str2_cr);
2293 
2294  return str;
2295 }
2296 
2297 VALUE
2299 {
2300  rb_encoding *enc;
2301  int cr, cr2;
2302  long len2;
2303 
2304  StringValue(str2);
2305  if ((len2 = RSTRING_LEN(str2)) > 0 && STR_ASSOC_P(str)) {
2306  long len1 = RSTRING(str)->as.heap.len, len = len1 + len2;
2307  enc = rb_enc_check(str, str2);
2308  cr = ENC_CODERANGE(str);
2309  if ((cr2 = ENC_CODERANGE(str2)) > cr || RSTRING_LEN(str) == 0)
2310  cr = cr2;
2311  rb_str_modify_expand(str, len2);
2312  memcpy(RSTRING(str)->as.heap.ptr + len1, RSTRING_PTR(str2), len2);
2313  TERM_FILL(RSTRING(str)->as.heap.ptr + len, rb_enc_mbminlen(enc));
2314  RSTRING(str)->as.heap.len = len;
2315  rb_enc_associate(str, enc);
2316  ENC_CODERANGE_SET(str, cr);
2317  OBJ_INFECT(str, str2);
2318  return str;
2319  }
2320  return rb_str_buf_append(str, str2);
2321 }
2322 
2323 /*
2324  * call-seq:
2325  * str << integer -> str
2326  * str.concat(integer) -> str
2327  * str << obj -> str
2328  * str.concat(obj) -> str
2329  *
2330  * Append---Concatenates the given object to <i>str</i>. If the object is a
2331  * <code>Integer</code>, it is considered as a codepoint, and is converted
2332  * to a character before concatenation.
2333  *
2334  * a = "hello "
2335  * a << "world" #=> "hello world"
2336  * a.concat(33) #=> "hello world!"
2337  */
2338 
2339 VALUE
2341 {
2342  unsigned int code;
2343  rb_encoding *enc = STR_ENC_GET(str1);
2344 
2345  if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) {
2346  if (rb_num_to_uint(str2, &code) == 0) {
2347  }
2348  else if (FIXNUM_P(str2)) {
2349  rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
2350  }
2351  else {
2352  rb_raise(rb_eRangeError, "bignum out of char range");
2353  }
2354  }
2355  else {
2356  return rb_str_append(str1, str2);
2357  }
2358 
2359  if (enc == rb_usascii_encoding()) {
2360  /* US-ASCII automatically extended to ASCII-8BIT */
2361  char buf[1];
2362  buf[0] = (char)code;
2363  if (code > 0xFF) {
2364  rb_raise(rb_eRangeError, "%u out of char range", code);
2365  }
2366  rb_str_cat(str1, buf, 1);
2367  if (code > 127) {
2370  }
2371  }
2372  else {
2373  long pos = RSTRING_LEN(str1);
2374  int cr = ENC_CODERANGE(str1);
2375  int len;
2376  char *buf;
2377 
2378  switch (len = rb_enc_codelen(code, enc)) {
2380  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2381  break;
2383  case 0:
2384  rb_raise(rb_eRangeError, "%u out of char range", code);
2385  break;
2386  }
2387  buf = ALLOCA_N(char, len + 1);
2388  rb_enc_mbcput(code, buf, enc);
2389  if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
2390  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2391  }
2392  rb_str_resize(str1, pos+len);
2393  memcpy(RSTRING_PTR(str1) + pos, buf, len);
2394  if (cr == ENC_CODERANGE_7BIT && code > 127)
2395  cr = ENC_CODERANGE_VALID;
2396  ENC_CODERANGE_SET(str1, cr);
2397  }
2398  return str1;
2399 }
2400 
2401 /*
2402  * call-seq:
2403  * str.prepend(other_str) -> str
2404  *
2405  * Prepend---Prepend the given string to <i>str</i>.
2406  *
2407  * a = "world"
2408  * a.prepend("hello ") #=> "hello world"
2409  * a #=> "hello world"
2410  */
2411 
2412 static VALUE
2414 {
2415  StringValue(str2);
2416  StringValue(str);
2417  rb_str_update(str, 0L, 0L, str2);
2418  return str;
2419 }
2420 
2421 st_index_t
2423 {
2424  int e = ENCODING_GET(str);
2425  if (e && rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
2426  e = 0;
2427  }
2428  return rb_memhash((const void *)RSTRING_PTR(str), RSTRING_LEN(str)) ^ e;
2429 }
2430 
2431 int
2433 {
2434  long len;
2435 
2436  if (!rb_str_comparable(str1, str2)) return 1;
2437  if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) &&
2438  memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) {
2439  return 0;
2440  }
2441  return 1;
2442 }
2443 
2444 /*
2445  * call-seq:
2446  * str.hash -> fixnum
2447  *
2448  * Return a hash based on the string's length and content.
2449  */
2450 
2451 static VALUE
2453 {
2454  st_index_t hval = rb_str_hash(str);
2455  return INT2FIX(hval);
2456 }
2457 
2458 #define lesser(a,b) (((a)>(b))?(b):(a))
2459 
2460 int
2462 {
2463  int idx1, idx2;
2464  int rc1, rc2;
2465 
2466  if (RSTRING_LEN(str1) == 0) return TRUE;
2467  if (RSTRING_LEN(str2) == 0) return TRUE;
2468  idx1 = ENCODING_GET(str1);
2469  idx2 = ENCODING_GET(str2);
2470  if (idx1 == idx2) return TRUE;
2471  rc1 = rb_enc_str_coderange(str1);
2472  rc2 = rb_enc_str_coderange(str2);
2473  if (rc1 == ENC_CODERANGE_7BIT) {
2474  if (rc2 == ENC_CODERANGE_7BIT) return TRUE;
2476  return TRUE;
2477  }
2478  if (rc2 == ENC_CODERANGE_7BIT) {
2480  return TRUE;
2481  }
2482  return FALSE;
2483 }
2484 
2485 int
2487 {
2488  long len1, len2;
2489  const char *ptr1, *ptr2;
2490  int retval;
2491 
2492  if (str1 == str2) return 0;
2493  RSTRING_GETMEM(str1, ptr1, len1);
2494  RSTRING_GETMEM(str2, ptr2, len2);
2495  if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
2496  if (len1 == len2) {
2497  if (!rb_str_comparable(str1, str2)) {
2498  if (ENCODING_GET(str1) > ENCODING_GET(str2))
2499  return 1;
2500  return -1;
2501  }
2502  return 0;
2503  }
2504  if (len1 > len2) return 1;
2505  return -1;
2506  }
2507  if (retval > 0) return 1;
2508  return -1;
2509 }
2510 
2511 /* expect tail call optimization */
2512 static VALUE
2513 str_eql(const VALUE str1, const VALUE str2)
2514 {
2515  const long len = RSTRING_LEN(str1);
2516  const char *ptr1, *ptr2;
2517 
2518  if (len != RSTRING_LEN(str2)) return Qfalse;
2519  if (!rb_str_comparable(str1, str2)) return Qfalse;
2520  if ((ptr1 = RSTRING_PTR(str1)) == (ptr2 = RSTRING_PTR(str2)))
2521  return Qtrue;
2522  if (memcmp(ptr1, ptr2, len) == 0)
2523  return Qtrue;
2524  return Qfalse;
2525 }
2526 
2527 /*
2528  * call-seq:
2529  * str == obj -> true or false
2530  * str === obj -> true or false
2531  *
2532  * === Equality
2533  *
2534  * Returns whether +str+ == +obj+, similar to Object#==.
2535  *
2536  * If +obj+ is not an instance of String but responds to +to_str+, then the
2537  * two strings are compared using case equality Object#===.
2538  *
2539  * Otherwise, returns similarly to String#eql?, comparing length and content.
2540  */
2541 
2542 VALUE
2544 {
2545  if (str1 == str2) return Qtrue;
2546  if (!RB_TYPE_P(str2, T_STRING)) {
2547  if (!rb_respond_to(str2, rb_intern("to_str"))) {
2548  return Qfalse;
2549  }
2550  return rb_equal(str2, str1);
2551  }
2552  return str_eql(str1, str2);
2553 }
2554 
2555 /*
2556  * call-seq:
2557  * str.eql?(other) -> true or false
2558  *
2559  * Two strings are equal if they have the same length and content.
2560  */
2561 
2562 static VALUE
2564 {
2565  if (str1 == str2) return Qtrue;
2566  if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
2567  return str_eql(str1, str2);
2568 }
2569 
2570 /*
2571  * call-seq:
2572  * string <=> other_string -> -1, 0, +1 or nil
2573  *
2574  *
2575  * Comparison---Returns -1, 0, +1 or nil depending on whether +string+ is less
2576  * than, equal to, or greater than +other_string+.
2577  *
2578  * +nil+ is returned if the two values are incomparable.
2579  *
2580  * If the strings are of different lengths, and the strings are equal when
2581  * compared up to the shortest length, then the longer string is considered
2582  * greater than the shorter one.
2583  *
2584  * <code><=></code> is the basis for the methods <code><</code>,
2585  * <code><=</code>, <code>></code>, <code>>=</code>, and
2586  * <code>between?</code>, included from module Comparable. The method
2587  * String#== does not use Comparable#==.
2588  *
2589  * "abcdef" <=> "abcde" #=> 1
2590  * "abcdef" <=> "abcdef" #=> 0
2591  * "abcdef" <=> "abcdefg" #=> -1
2592  * "abcdef" <=> "ABCDEF" #=> 1
2593  */
2594 
2595 static VALUE
2597 {
2598  int result;
2599 
2600  if (!RB_TYPE_P(str2, T_STRING)) {
2601  VALUE tmp = rb_check_funcall(str2, rb_intern("to_str"), 0, 0);
2602  if (RB_TYPE_P(tmp, T_STRING)) {
2603  result = rb_str_cmp(str1, tmp);
2604  }
2605  else {
2606  return rb_invcmp(str1, str2);
2607  }
2608  }
2609  else {
2610  result = rb_str_cmp(str1, str2);
2611  }
2612  return INT2FIX(result);
2613 }
2614 
2615 /*
2616  * call-seq:
2617  * str.casecmp(other_str) -> -1, 0, +1 or nil
2618  *
2619  * Case-insensitive version of <code>String#<=></code>.
2620  *
2621  * "abcdef".casecmp("abcde") #=> 1
2622  * "aBcDeF".casecmp("abcdef") #=> 0
2623  * "abcdef".casecmp("abcdefg") #=> -1
2624  * "abcdef".casecmp("ABCDEF") #=> 0
2625  */
2626 
2627 static VALUE
2629 {
2630  long len;
2631  rb_encoding *enc;
2632  char *p1, *p1end, *p2, *p2end;
2633 
2634  StringValue(str2);
2635  enc = rb_enc_compatible(str1, str2);
2636  if (!enc) {
2637  return Qnil;
2638  }
2639 
2640  p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
2641  p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
2642  if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
2643  while (p1 < p1end && p2 < p2end) {
2644  if (*p1 != *p2) {
2645  unsigned int c1 = TOUPPER(*p1 & 0xff);
2646  unsigned int c2 = TOUPPER(*p2 & 0xff);
2647  if (c1 != c2)
2648  return INT2FIX(c1 < c2 ? -1 : 1);
2649  }
2650  p1++;
2651  p2++;
2652  }
2653  }
2654  else {
2655  while (p1 < p1end && p2 < p2end) {
2656  int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
2657  int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
2658 
2659  if (0 <= c1 && 0 <= c2) {
2660  c1 = TOUPPER(c1);
2661  c2 = TOUPPER(c2);
2662  if (c1 != c2)
2663  return INT2FIX(c1 < c2 ? -1 : 1);
2664  }
2665  else {
2666  int r;
2667  l1 = rb_enc_mbclen(p1, p1end, enc);
2668  l2 = rb_enc_mbclen(p2, p2end, enc);
2669  len = l1 < l2 ? l1 : l2;
2670  r = memcmp(p1, p2, len);
2671  if (r != 0)
2672  return INT2FIX(r < 0 ? -1 : 1);
2673  if (l1 != l2)
2674  return INT2FIX(l1 < l2 ? -1 : 1);
2675  }
2676  p1 += l1;
2677  p2 += l2;
2678  }
2679  }
2680  if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0);
2681  if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
2682  return INT2FIX(-1);
2683 }
2684 
2685 static long
2686 rb_str_index(VALUE str, VALUE sub, long offset)
2687 {
2688  char *s, *sptr, *e;
2689  long pos, len, slen;
2690  int single_byte = single_byte_optimizable(str);
2691  rb_encoding *enc;
2692 
2693  enc = rb_enc_check(str, sub);
2694  if (is_broken_string(sub)) return -1;
2695 
2696  len = single_byte ? RSTRING_LEN(str) : str_strlen(str, enc);
2697  slen = str_strlen(sub, enc);
2698  if (offset < 0) {
2699  offset += len;
2700  if (offset < 0) return -1;
2701  }
2702  if (len - offset < slen) return -1;
2703 
2704  s = RSTRING_PTR(str);
2705  e = RSTRING_END(str);
2706  if (offset) {
2707  offset = str_offset(s, e, offset, enc, single_byte);
2708  s += offset;
2709  }
2710  if (slen == 0) return offset;
2711  /* need proceed one character at a time */
2712  sptr = RSTRING_PTR(sub);
2713  slen = RSTRING_LEN(sub);
2714  len = RSTRING_LEN(str) - offset;
2715  for (;;) {
2716  char *t;
2717  pos = rb_memsearch(sptr, slen, s, len, enc);
2718  if (pos < 0) return pos;
2719  t = rb_enc_right_char_head(s, s+pos, e, enc);
2720  if (t == s + pos) break;
2721  len -= t - s;
2722  if (len <= 0) return -1;
2723  offset += t - s;
2724  s = t;
2725  }
2726  return pos + offset;
2727 }
2728 
2729 
2730 /*
2731  * call-seq:
2732  * str.index(substring [, offset]) -> fixnum or nil
2733  * str.index(regexp [, offset]) -> fixnum or nil
2734  *
2735  * Returns the index of the first occurrence of the given <i>substring</i> or
2736  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2737  * found. If the second parameter is present, it specifies the position in the
2738  * string to begin the search.
2739  *
2740  * "hello".index('e') #=> 1
2741  * "hello".index('lo') #=> 3
2742  * "hello".index('a') #=> nil
2743  * "hello".index(?e) #=> 1
2744  * "hello".index(/[aeiou]/, -3) #=> 4
2745  */
2746 
2747 static VALUE
2749 {
2750  VALUE sub;
2751  VALUE initpos;
2752  long pos;
2753 
2754  if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
2755  pos = NUM2LONG(initpos);
2756  }
2757  else {
2758  pos = 0;
2759  }
2760  if (pos < 0) {
2761  pos += str_strlen(str, STR_ENC_GET(str));
2762  if (pos < 0) {
2763  if (RB_TYPE_P(sub, T_REGEXP)) {
2765  }
2766  return Qnil;
2767  }
2768  }
2769 
2770  if (SPECIAL_CONST_P(sub)) goto generic;
2771  switch (BUILTIN_TYPE(sub)) {
2772  case T_REGEXP:
2773  if (pos > str_strlen(str, STR_ENC_GET(str)))
2774  return Qnil;
2775  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2776  rb_enc_check(str, sub), single_byte_optimizable(str));
2777 
2778  pos = rb_reg_search(sub, str, pos, 0);
2779  pos = rb_str_sublen(str, pos);
2780  break;
2781 
2782  generic:
2783  default: {
2784  VALUE tmp;
2785 
2786  tmp = rb_check_string_type(sub);
2787  if (NIL_P(tmp)) {
2788  rb_raise(rb_eTypeError, "type mismatch: %s given",
2789  rb_obj_classname(sub));
2790  }
2791  sub = tmp;
2792  }
2793  /* fall through */
2794  case T_STRING:
2795  pos = rb_str_index(str, sub, pos);
2796  pos = rb_str_sublen(str, pos);
2797  break;
2798  }
2799 
2800  if (pos == -1) return Qnil;
2801  return LONG2NUM(pos);
2802 }
2803 
2804 #ifdef HAVE_MEMRCHR
2805 static long
2806 str_rindex(VALUE str, VALUE sub, const char *s, long pos, rb_encoding *enc)
2807 {
2808  char *hit, *adjusted;
2809  int c;
2810  long slen, searchlen;
2811  char *sbeg, *e, *t;
2812 
2813  slen = RSTRING_LEN(sub);
2814  if (slen == 0) return pos;
2815  sbeg = RSTRING_PTR(str);
2816  e = RSTRING_END(str);
2817  t = RSTRING_PTR(sub);
2818  c = *t & 0xff;
2819  searchlen = s - sbeg + 1;
2820 
2821  do {
2822  hit = memrchr(sbeg, c, searchlen);
2823  if (!hit) break;
2824  adjusted = rb_enc_left_char_head(sbeg, hit, e, enc);
2825  if (hit != adjusted) {
2826  searchlen = adjusted - sbeg;
2827  continue;
2828  }
2829  if (memcmp(hit, t, slen) == 0)
2830  return rb_str_sublen(str, hit - sbeg);
2831  searchlen = adjusted - sbeg;
2832  } while (searchlen > 0);
2833 
2834  return -1;
2835 }
2836 #else
2837 static long
2838 str_rindex(VALUE str, VALUE sub, const char *s, long pos, rb_encoding *enc)
2839 {
2840  long slen;
2841  char *sbeg, *e, *t;
2842 
2843  sbeg = RSTRING_PTR(str);
2844  e = RSTRING_END(str);
2845  t = RSTRING_PTR(sub);
2846  slen = RSTRING_LEN(sub);
2847 
2848  while (s) {
2849  if (memcmp(s, t, slen) == 0) {
2850  return pos;
2851  }
2852  if (pos == 0) break;
2853  pos--;
2854  s = rb_enc_prev_char(sbeg, s, e, enc);
2855  }
2856 
2857  return -1;
2858 }
2859 #endif
2860 
2861 static long
2862 rb_str_rindex(VALUE str, VALUE sub, long pos)
2863 {
2864  long len, slen;
2865  char *sbeg, *s;
2866  rb_encoding *enc;
2867  int singlebyte;
2868 
2869  enc = rb_enc_check(str, sub);
2870  if (is_broken_string(sub)) return -1;
2871  singlebyte = single_byte_optimizable(str);
2872  len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
2873  slen = str_strlen(sub, enc);
2874 
2875  /* substring longer than string */
2876  if (len < slen) return -1;
2877  if (len - pos < slen) pos = len - slen;
2878  if (len == 0) return pos;
2879 
2880  sbeg = RSTRING_PTR(str);
2881 
2882  if (pos == 0) {
2883  if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
2884  return 0;
2885  else
2886  return -1;
2887  }
2888 
2889  s = str_nth(sbeg, RSTRING_END(str), pos, enc, singlebyte);
2890  return str_rindex(str, sub, s, pos, enc);
2891 }
2892 
2893 
2894 /*
2895  * call-seq:
2896  * str.rindex(substring [, fixnum]) -> fixnum or nil
2897  * str.rindex(regexp [, fixnum]) -> fixnum or nil
2898  *
2899  * Returns the index of the last occurrence of the given <i>substring</i> or
2900  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2901  * found. If the second parameter is present, it specifies the position in the
2902  * string to end the search---characters beyond this point will not be
2903  * considered.
2904  *
2905  * "hello".rindex('e') #=> 1
2906  * "hello".rindex('l') #=> 3
2907  * "hello".rindex('a') #=> nil
2908  * "hello".rindex(?e) #=> 1
2909  * "hello".rindex(/[aeiou]/, -2) #=> 1
2910  */
2911 
2912 static VALUE
2914 {
2915  VALUE sub;
2916  VALUE vpos;
2917  rb_encoding *enc = STR_ENC_GET(str);
2918  long pos, len = str_strlen(str, enc);
2919 
2920  if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
2921  pos = NUM2LONG(vpos);
2922  if (pos < 0) {
2923  pos += len;
2924  if (pos < 0) {
2925  if (RB_TYPE_P(sub, T_REGEXP)) {
2927  }
2928  return Qnil;
2929  }
2930  }
2931  if (pos > len) pos = len;
2932  }
2933  else {
2934  pos = len;
2935  }
2936 
2937  if (SPECIAL_CONST_P(sub)) goto generic;
2938  switch (BUILTIN_TYPE(sub)) {
2939  case T_REGEXP:
2940  /* enc = rb_get_check(str, sub); */
2941  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2943 
2944  if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
2945  pos = rb_reg_search(sub, str, pos, 1);
2946  pos = rb_str_sublen(str, pos);
2947  }
2948  if (pos >= 0) return LONG2NUM(pos);
2949  break;
2950 
2951  generic:
2952  default: {
2953  VALUE tmp;
2954 
2955  tmp = rb_check_string_type(sub);
2956  if (NIL_P(tmp)) {
2957  rb_raise(rb_eTypeError, "type mismatch: %s given",
2958  rb_obj_classname(sub));
2959  }
2960  sub = tmp;
2961  }
2962  /* fall through */
2963  case T_STRING:
2964  pos = rb_str_rindex(str, sub, pos);
2965  if (pos >= 0) return LONG2NUM(pos);
2966  break;
2967  }
2968  return Qnil;
2969 }
2970 
2971 /*
2972  * call-seq:
2973  * str =~ obj -> fixnum or nil
2974  *
2975  * Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
2976  * against <i>str</i>,and returns the position the match starts, or
2977  * <code>nil</code> if there is no match. Otherwise, invokes
2978  * <i>obj.=~</i>, passing <i>str</i> as an argument. The default
2979  * <code>=~</code> in <code>Object</code> returns <code>nil</code>.
2980  *
2981  * Note: <code>str =~ regexp</code> is not the same as
2982  * <code>regexp =~ str</code>. Strings captured from named capture groups
2983  * are assigned to local variables only in the second case.
2984  *
2985  * "cat o' 9 tails" =~ /\d/ #=> 7
2986  * "cat o' 9 tails" =~ 9 #=> nil
2987  */
2988 
2989 static VALUE
2991 {
2992  if (SPECIAL_CONST_P(y)) goto generic;
2993  switch (BUILTIN_TYPE(y)) {
2994  case T_STRING:
2995  rb_raise(rb_eTypeError, "type mismatch: String given");
2996 
2997  case T_REGEXP:
2998  return rb_reg_match(y, x);
2999 
3000  generic:
3001  default:
3002  return rb_funcall(y, rb_intern("=~"), 1, x);
3003  }
3004 }
3005 
3006 
3007 static VALUE get_pat(VALUE, int);
3008 
3009 
3010 /*
3011  * call-seq:
3012  * str.match(pattern) -> matchdata or nil
3013  * str.match(pattern, pos) -> matchdata or nil
3014  *
3015  * Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one),
3016  * then invokes its <code>match</code> method on <i>str</i>. If the second
3017  * parameter is present, it specifies the position in the string to begin the
3018  * search.
3019  *
3020  * 'hello'.match('(.)\1') #=> #<MatchData "ll" 1:"l">
3021  * 'hello'.match('(.)\1')[0] #=> "ll"
3022  * 'hello'.match(/(.)\1/)[0] #=> "ll"
3023  * 'hello'.match('xx') #=> nil
3024  *
3025  * If a block is given, invoke the block with MatchData if match succeed, so
3026  * that you can write
3027  *
3028  * str.match(pat) {|m| ...}
3029  *
3030  * instead of
3031  *
3032  * if m = str.match(pat)
3033  * ...
3034  * end
3035  *
3036  * The return value is a value from block execution in this case.
3037  */
3038 
3039 static VALUE
3041 {
3042  VALUE re, result;
3043  if (argc < 1)
3044  rb_check_arity(argc, 1, 2);
3045  re = argv[0];
3046  argv[0] = str;
3047  result = rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv);
3048  if (!NIL_P(result) && rb_block_given_p()) {
3049  return rb_yield(result);
3050  }
3051  return result;
3052 }
3053 
3058 };
3059 
3060 static enum neighbor_char
3061 enc_succ_char(char *p, long len, rb_encoding *enc)
3062 {
3063  long i;
3064  int l;
3065 
3066  if (rb_enc_mbminlen(enc) > 1) {
3067  /* wchar, trivial case */
3068  int r = rb_enc_precise_mbclen(p, p + len, enc), c;
3069  if (!MBCLEN_CHARFOUND_P(r)) {
3070  return NEIGHBOR_NOT_CHAR;
3071  }
3072  c = rb_enc_mbc_to_codepoint(p, p + len, enc) + 1;
3073  l = rb_enc_code_to_mbclen(c, enc);
3074  if (!l) return NEIGHBOR_NOT_CHAR;
3075  if (l != len) return NEIGHBOR_WRAPPED;
3076  rb_enc_mbcput(c, p, enc);
3077  r = rb_enc_precise_mbclen(p, p + len, enc);
3078  if (!MBCLEN_CHARFOUND_P(r)) {
3079  return NEIGHBOR_NOT_CHAR;
3080  }
3081  return NEIGHBOR_FOUND;
3082  }
3083  while (1) {
3084  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
3085  p[i] = '\0';
3086  if (i < 0)
3087  return NEIGHBOR_WRAPPED;
3088  ++((unsigned char*)p)[i];
3089  l = rb_enc_precise_mbclen(p, p+len, enc);
3090  if (MBCLEN_CHARFOUND_P(l)) {
3091  l = MBCLEN_CHARFOUND_LEN(l);
3092  if (l == len) {
3093  return NEIGHBOR_FOUND;
3094  }
3095  else {
3096  memset(p+l, 0xff, len-l);
3097  }
3098  }
3099  if (MBCLEN_INVALID_P(l) && i < len-1) {
3100  long len2;
3101  int l2;
3102  for (len2 = len-1; 0 < len2; len2--) {
3103  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
3104  if (!MBCLEN_INVALID_P(l2))
3105  break;
3106  }
3107  memset(p+len2+1, 0xff, len-(len2+1));
3108  }
3109  }
3110 }
3111 
3112 static enum neighbor_char
3113 enc_pred_char(char *p, long len, rb_encoding *enc)
3114 {
3115  long i;
3116  int l;
3117  if (rb_enc_mbminlen(enc) > 1) {
3118  /* wchar, trivial case */
3119  int r = rb_enc_precise_mbclen(p, p + len, enc), c;
3120  if (!MBCLEN_CHARFOUND_P(r)) {
3121  return NEIGHBOR_NOT_CHAR;
3122  }
3123  c = rb_enc_mbc_to_codepoint(p, p + len, enc);
3124  if (!c) return NEIGHBOR_NOT_CHAR;
3125  --c;
3126  l = rb_enc_code_to_mbclen(c, enc);
3127  if (!l) return NEIGHBOR_NOT_CHAR;
3128  if (l != len) return NEIGHBOR_WRAPPED;
3129  rb_enc_mbcput(c, p, enc);
3130  r = rb_enc_precise_mbclen(p, p + len, enc);
3131  if (!MBCLEN_CHARFOUND_P(r)) {
3132  return NEIGHBOR_NOT_CHAR;
3133  }
3134  return NEIGHBOR_FOUND;
3135  }
3136  while (1) {
3137  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
3138  p[i] = '\xff';
3139  if (i < 0)
3140  return NEIGHBOR_WRAPPED;
3141  --((unsigned char*)p)[i];
3142  l = rb_enc_precise_mbclen(p, p+len, enc);
3143  if (MBCLEN_CHARFOUND_P(l)) {
3144  l = MBCLEN_CHARFOUND_LEN(l);
3145  if (l == len) {
3146  return NEIGHBOR_FOUND;
3147  }
3148  else {
3149  memset(p+l, 0, len-l);
3150  }
3151  }
3152  if (MBCLEN_INVALID_P(l) && i < len-1) {
3153  long len2;
3154  int l2;
3155  for (len2 = len-1; 0 < len2; len2--) {
3156  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
3157  if (!MBCLEN_INVALID_P(l2))
3158  break;
3159  }
3160  memset(p+len2+1, 0, len-(len2+1));
3161  }
3162  }
3163 }
3164 
3165 /*
3166  overwrite +p+ by succeeding letter in +enc+ and returns
3167  NEIGHBOR_FOUND or NEIGHBOR_WRAPPED.
3168  When NEIGHBOR_WRAPPED, carried-out letter is stored into carry.
3169  assuming each ranges are successive, and mbclen
3170  never change in each ranges.
3171  NEIGHBOR_NOT_CHAR is returned if invalid character or the range has only one
3172  character.
3173  */
3174 static enum neighbor_char
3175 enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
3176 {
3177  enum neighbor_char ret;
3178  unsigned int c;
3179  int ctype;
3180  int range;
3181  char save[ONIGENC_CODE_TO_MBC_MAXLEN];
3182 
3183  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
3184  if (rb_enc_isctype(c, ONIGENC_CTYPE_DIGIT, enc))
3185  ctype = ONIGENC_CTYPE_DIGIT;
3186  else if (rb_enc_isctype(c, ONIGENC_CTYPE_ALPHA, enc))
3187  ctype = ONIGENC_CTYPE_ALPHA;
3188  else
3189  return NEIGHBOR_NOT_CHAR;
3190 
3191  MEMCPY(save, p, char, len);
3192  ret = enc_succ_char(p, len, enc);
3193  if (ret == NEIGHBOR_FOUND) {
3194  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
3195  if (rb_enc_isctype(c, ctype, enc))
3196  return NEIGHBOR_FOUND;
3197  }
3198  MEMCPY(p, save, char, len);
3199  range = 1;
3200  while (1) {
3201  MEMCPY(save, p, char, len);
3202  ret = enc_pred_char(p, len, enc);
3203  if (ret == NEIGHBOR_FOUND) {
3204  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
3205  if (!rb_enc_isctype(c, ctype, enc)) {
3206  MEMCPY(p, save, char, len);
3207  break;
3208  }
3209  }
3210  else {
3211  MEMCPY(p, save, char, len);
3212  break;
3213  }
3214  range++;
3215  }
3216  if (range == 1) {
3217  return NEIGHBOR_NOT_CHAR;
3218  }
3219 
3220  if (ctype != ONIGENC_CTYPE_DIGIT) {
3221  MEMCPY(carry, p, char, len);
3222  return NEIGHBOR_WRAPPED;
3223  }
3224 
3225  MEMCPY(carry, p, char, len);
3226  enc_succ_char(carry, len, enc);
3227  return NEIGHBOR_WRAPPED;
3228 }
3229 
3230 
3231 /*
3232  * call-seq:
3233  * str.succ -> new_str
3234  * str.next -> new_str
3235  *
3236  * Returns the successor to <i>str</i>. The successor is calculated by
3237  * incrementing characters starting from the rightmost alphanumeric (or
3238  * the rightmost character if there are no alphanumerics) in the
3239  * string. Incrementing a digit always results in another digit, and
3240  * incrementing a letter results in another letter of the same case.
3241  * Incrementing nonalphanumerics uses the underlying character set's
3242  * collating sequence.
3243  *
3244  * If the increment generates a ``carry,'' the character to the left of
3245  * it is incremented. This process repeats until there is no carry,
3246  * adding an additional character if necessary.
3247  *
3248  * "abcd".succ #=> "abce"
3249  * "THX1138".succ #=> "THX1139"
3250  * "<<koala>>".succ #=> "<<koalb>>"
3251  * "1999zzz".succ #=> "2000aaa"
3252  * "ZZZ9999".succ #=> "AAAA0000"
3253  * "***".succ #=> "**+"
3254  */
3255 
3256 VALUE
3258 {
3259  rb_encoding *enc;
3260  VALUE str;
3261  char *sbeg, *s, *e, *last_alnum = 0;
3262  int c = -1;
3263  long l;
3264  char carry[ONIGENC_CODE_TO_MBC_MAXLEN] = "\1";
3265  long carry_pos = 0, carry_len = 1;
3266  enum neighbor_char neighbor = NEIGHBOR_FOUND;
3267 
3268  str = rb_str_new5(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));
3269  rb_enc_cr_str_copy_for_substr(str, orig);
3270  OBJ_INFECT(str, orig);
3271  if (RSTRING_LEN(str) == 0) return str;
3272 
3273  enc = STR_ENC_GET(orig);
3274  sbeg = RSTRING_PTR(str);
3275  s = e = sbeg + RSTRING_LEN(str);
3276 
3277  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3278  if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
3279  if (ISALPHA(*last_alnum) ? ISDIGIT(*s) :
3280  ISDIGIT(*last_alnum) ? ISALPHA(*s) : 0) {
3281  s = last_alnum;
3282  break;
3283  }
3284  }
3285  l = rb_enc_precise_mbclen(s, e, enc);
3286  if (!ONIGENC_MBCLEN_CHARFOUND_P(l)) continue;
3288  neighbor = enc_succ_alnum_char(s, l, enc, carry);
3289  switch (neighbor) {
3290  case NEIGHBOR_NOT_CHAR:
3291  continue;
3292  case NEIGHBOR_FOUND:
3293  return str;
3294  case NEIGHBOR_WRAPPED:
3295  last_alnum = s;
3296  break;
3297  }
3298  c = 1;
3299  carry_pos = s - sbeg;
3300  carry_len = l;
3301  }
3302  if (c == -1) { /* str contains no alnum */
3303  s = e;
3304  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3305  enum neighbor_char neighbor;
3306  char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
3307  l = rb_enc_precise_mbclen(s, e, enc);
3308  if (!ONIGENC_MBCLEN_CHARFOUND_P(l)) continue;
3310  MEMCPY(tmp, s, char, l);
3311  neighbor = enc_succ_char(tmp, l, enc);
3312  switch (neighbor) {
3313  case NEIGHBOR_FOUND:
3314  MEMCPY(s, tmp, char, l);
3315  return str;
3316  break;
3317  case NEIGHBOR_WRAPPED:
3318  MEMCPY(s, tmp, char, l);
3319  break;
3320  case NEIGHBOR_NOT_CHAR:
3321  break;
3322  }
3323  if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
3324  /* wrapped to \0...\0. search next valid char. */
3325  enc_succ_char(s, l, enc);
3326  }
3327  if (!rb_enc_asciicompat(enc)) {
3328  MEMCPY(carry, s, char, l);
3329  carry_len = l;
3330  }
3331  carry_pos = s - sbeg;
3332  }
3333  }
3334  RESIZE_CAPA(str, RSTRING_LEN(str) + carry_len);
3335  s = RSTRING_PTR(str) + carry_pos;
3336  memmove(s + carry_len, s, RSTRING_LEN(str) - carry_pos);
3337  memmove(s, carry, carry_len);
3338  STR_SET_LEN(str, RSTRING_LEN(str) + carry_len);
3339  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3340  rb_enc_str_coderange(str);
3341  return str;
3342 }
3343 
3344 
3345 /*
3346  * call-seq:
3347  * str.succ! -> str
3348  * str.next! -> str
3349  *
3350  * Equivalent to <code>String#succ</code>, but modifies the receiver in
3351  * place.
3352  */
3353 
3354 static VALUE
3356 {
3358 
3359  return str;
3360 }
3361 
3362 
3363 /*
3364  * call-seq:
3365  * str.upto(other_str, exclusive=false) {|s| block } -> str
3366  * str.upto(other_str, exclusive=false) -> an_enumerator
3367  *
3368  * Iterates through successive values, starting at <i>str</i> and
3369  * ending at <i>other_str</i> inclusive, passing each value in turn to
3370  * the block. The <code>String#succ</code> method is used to generate
3371  * each value. If optional second argument exclusive is omitted or is false,
3372  * the last value will be included; otherwise it will be excluded.
3373  *
3374  * If no block is given, an enumerator is returned instead.
3375  *
3376  * "a8".upto("b6") {|s| print s, ' ' }
3377  * for s in "a8".."b6"
3378  * print s, ' '
3379  * end
3380  *
3381  * <em>produces:</em>
3382  *
3383  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3384  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3385  *
3386  * If <i>str</i> and <i>other_str</i> contains only ascii numeric characters,
3387  * both are recognized as decimal numbers. In addition, the width of
3388  * string (e.g. leading zeros) is handled appropriately.
3389  *
3390  * "9".upto("11").to_a #=> ["9", "10", "11"]
3391  * "25".upto("5").to_a #=> []
3392  * "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"]
3393  */
3394 
3395 static VALUE
3397 {
3398  VALUE end, exclusive;
3399  VALUE current, after_end;
3400  ID succ;
3401  int n, excl, ascii;
3402  rb_encoding *enc;
3403 
3404  rb_scan_args(argc, argv, "11", &end, &exclusive);
3405  RETURN_ENUMERATOR(beg, argc, argv);
3406  excl = RTEST(exclusive);
3407  CONST_ID(succ, "succ");
3408  StringValue(end);
3409  enc = rb_enc_check(beg, end);
3410  ascii = (is_ascii_string(beg) && is_ascii_string(end));
3411  /* single character */
3412  if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
3413  char c = RSTRING_PTR(beg)[0];
3414  char e = RSTRING_PTR(end)[0];
3415 
3416  if (c > e || (excl && c == e)) return beg;
3417  for (;;) {
3418  rb_yield(rb_enc_str_new(&c, 1, enc));
3419  if (!excl && c == e) break;
3420  c++;
3421  if (excl && c == e) break;
3422  }
3423  return beg;
3424  }
3425  /* both edges are all digits */
3426  if (ascii && ISDIGIT(RSTRING_PTR(beg)[0]) && ISDIGIT(RSTRING_PTR(end)[0])) {
3427  char *s, *send;
3428  VALUE b, e;
3429  int width;
3430 
3431  s = RSTRING_PTR(beg); send = RSTRING_END(beg);
3432  width = rb_long2int(send - s);
3433  while (s < send) {
3434  if (!ISDIGIT(*s)) goto no_digits;
3435  s++;
3436  }
3437  s = RSTRING_PTR(end); send = RSTRING_END(end);
3438  while (s < send) {
3439  if (!ISDIGIT(*s)) goto no_digits;
3440  s++;
3441  }
3442  b = rb_str_to_inum(beg, 10, FALSE);
3443  e = rb_str_to_inum(end, 10, FALSE);
3444  if (FIXNUM_P(b) && FIXNUM_P(e)) {
3445  long bi = FIX2LONG(b);
3446  long ei = FIX2LONG(e);
3447  rb_encoding *usascii = rb_usascii_encoding();
3448 
3449  while (bi <= ei) {
3450  if (excl && bi == ei) break;
3451  rb_yield(rb_enc_sprintf(usascii, "%.*ld", width, bi));
3452  bi++;
3453  }
3454  }
3455  else {
3456  ID op = excl ? '<' : rb_intern("<=");
3457  VALUE args[2], fmt = rb_obj_freeze(rb_usascii_str_new_cstr("%.*d"));
3458 
3459  args[0] = INT2FIX(width);
3460  while (rb_funcall(b, op, 1, e)) {
3461  args[1] = b;
3462  rb_yield(rb_str_format(numberof(args), args, fmt));
3463  b = rb_funcall(b, succ, 0, 0);
3464  }
3465  }
3466  return beg;
3467  }
3468  /* normal case */
3469  no_digits:
3470  n = rb_str_cmp(beg, end);
3471  if (n > 0 || (excl && n == 0)) return beg;
3472 
3473  after_end = rb_funcall(end, succ, 0, 0);
3474  current = rb_str_dup(beg);
3475  while (!rb_str_equal(current, after_end)) {
3476  VALUE next = Qnil;
3477  if (excl || !rb_str_equal(current, end))
3478  next = rb_funcall(current, succ, 0, 0);
3479  rb_yield(current);
3480  if (NIL_P(next)) break;
3481  current = next;
3482  StringValue(current);
3483  if (excl && rb_str_equal(current, end)) break;
3484  if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
3485  break;
3486  }
3487 
3488  return beg;
3489 }
3490 
3491 static VALUE
3492 rb_str_subpat(VALUE str, VALUE re, VALUE backref)
3493 {
3494  if (rb_reg_search(re, str, 0, 0) >= 0) {
3496  int nth = rb_reg_backref_number(match, backref);
3497  return rb_reg_nth_match(nth, match);
3498  }
3499  return Qnil;
3500 }
3501 
3502 static VALUE
3504 {
3505  long idx;
3506 
3507  if (FIXNUM_P(indx)) {
3508  idx = FIX2LONG(indx);
3509 
3510  num_index:
3511  str = rb_str_substr(str, idx, 1);
3512  if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
3513  return str;
3514  }
3515 
3516  if (SPECIAL_CONST_P(indx)) goto generic;
3517  switch (BUILTIN_TYPE(indx)) {
3518  case T_REGEXP:
3519  return rb_str_subpat(str, indx, INT2FIX(0));
3520 
3521  case T_STRING:
3522  if (rb_str_index(str, indx, 0) != -1)
3523  return rb_str_dup(indx);
3524  return Qnil;
3525 
3526  generic:
3527  default:
3528  /* check if indx is Range */
3529  {
3530  long beg, len;
3531  VALUE tmp;
3532 
3533  len = str_strlen(str, STR_ENC_GET(str));
3534  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
3535  case Qfalse:
3536  break;
3537  case Qnil:
3538  return Qnil;
3539  default:
3540  tmp = rb_str_substr(str, beg, len);
3541  return tmp;
3542  }
3543  }
3544  idx = NUM2LONG(indx);
3545  goto num_index;
3546  }
3547 
3548  UNREACHABLE;
3549 }
3550 
3551 
3552 /*
3553  * call-seq:
3554  * str[index] -> new_str or nil
3555  * str[start, length] -> new_str or nil
3556  * str[range] -> new_str or nil
3557  * str[regexp] -> new_str or nil
3558  * str[regexp, capture] -> new_str or nil
3559  * str[match_str] -> new_str or nil
3560  * str.slice(index) -> new_str or nil
3561  * str.slice(start, length) -> new_str or nil
3562  * str.slice(range) -> new_str or nil
3563  * str.slice(regexp) -> new_str or nil
3564  * str.slice(regexp, capture) -> new_str or nil
3565  * str.slice(match_str) -> new_str or nil
3566  *
3567  * Element Reference --- If passed a single +index+, returns a substring of
3568  * one character at that index. If passed a +start+ index and a +length+,
3569  * returns a substring containing +length+ characters starting at the
3570  * +index+. If passed a +range+, its beginning and end are interpreted as
3571  * offsets delimiting the substring to be returned.
3572  *
3573  * In these three cases, if an index is negative, it is counted from the end
3574  * of the string. For the +start+ and +range+ cases the starting index
3575  * is just before a character and an index matching the string's size.
3576  * Additionally, an empty string is returned when the starting index for a
3577  * character range is at the end of the string.
3578  *
3579  * Returns +nil+ if the initial index falls outside the string or the length
3580  * is negative.
3581  *
3582  * If a +Regexp+ is supplied, the matching portion of the string is
3583  * returned. If a +capture+ follows the regular expression, which may be a
3584  * capture group index or name, follows the regular expression that component
3585  * of the MatchData is returned instead.
3586  *
3587  * If a +match_str+ is given, that string is returned if it occurs in
3588  * the string.
3589  *
3590  * Returns +nil+ if the regular expression does not match or the match string
3591  * cannot be found.
3592  *
3593  * a = "hello there"
3594  *
3595  * a[1] #=> "e"
3596  * a[2, 3] #=> "llo"
3597  * a[2..3] #=> "ll"
3598  *
3599  * a[-3, 2] #=> "er"
3600  * a[7..-2] #=> "her"
3601  * a[-4..-2] #=> "her"
3602  * a[-2..-4] #=> ""
3603  *
3604  * a[11, 0] #=> ""
3605  * a[11] #=> nil
3606  * a[12, 0] #=> nil
3607  * a[12..-1] #=> nil
3608  *
3609  * a[/[aeiou](.)\1/] #=> "ell"
3610  * a[/[aeiou](.)\1/, 0] #=> "ell"
3611  * a[/[aeiou](.)\1/, 1] #=> "l"
3612  * a[/[aeiou](.)\1/, 2] #=> nil
3613  *
3614  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "non_vowel"] #=> "l"
3615  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "vowel"] #=> "e"
3616  *
3617  * a["lo"] #=> "lo"
3618  * a["bye"] #=> nil
3619  */
3620 
3621 static VALUE
3623 {
3624  if (argc == 2) {
3625  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3626  return rb_str_subpat(str, argv[0], argv[1]);
3627  }
3628  return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
3629  }
3630  rb_check_arity(argc, 1, 2);
3631  return rb_str_aref(str, argv[0]);
3632 }
3633 
3634 VALUE
3635 rb_str_drop_bytes(VALUE str, long len)
3636 {
3637  char *ptr = RSTRING_PTR(str);
3638  long olen = RSTRING_LEN(str), nlen;
3639 
3640  str_modifiable(str);
3641  if (len > olen) len = olen;
3642  nlen = olen - len;
3643  if (nlen <= RSTRING_EMBED_LEN_MAX) {
3644  char *oldptr = ptr;
3645  int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|ELTS_SHARED));
3646  STR_SET_EMBED(str);
3647  STR_SET_EMBED_LEN(str, nlen);
3648  ptr = RSTRING(str)->as.ary;
3649  memmove(ptr, oldptr + len, nlen);
3650  if (fl == STR_NOEMBED) xfree(oldptr);
3651  }
3652  else {
3653  if (!STR_SHARED_P(str)) rb_str_new4(str);
3654  ptr = RSTRING(str)->as.heap.ptr += len;
3655  RSTRING(str)->as.heap.len = nlen;
3656  }
3657  ptr[nlen] = 0;
3658  ENC_CODERANGE_CLEAR(str);
3659  return str;
3660 }
3661 
3662 static void
3663 rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
3664 {
3665  if (beg == 0 && RSTRING_LEN(val) == 0) {
3666  rb_str_drop_bytes(str, len);
3667  OBJ_INFECT(str, val);
3668  return;
3669  }
3670 
3671  rb_str_modify(str);
3672  if (len < RSTRING_LEN(val)) {
3673  /* expand string */
3674  RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + TERM_LEN(str));
3675  }
3676 
3677  if (RSTRING_LEN(val) != len) {
3678  memmove(RSTRING_PTR(str) + beg + RSTRING_LEN(val),
3679  RSTRING_PTR(str) + beg + len,
3680  RSTRING_LEN(str) - (beg + len));
3681  }
3682  if (RSTRING_LEN(val) < beg && len < 0) {
3683  MEMZERO(RSTRING_PTR(str) + RSTRING_LEN(str), char, -len);
3684  }
3685  if (RSTRING_LEN(val) > 0) {
3686  memmove(RSTRING_PTR(str)+beg, RSTRING_PTR(val), RSTRING_LEN(val));
3687  }
3688  STR_SET_LEN(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len);
3689  if (RSTRING_PTR(str)) {
3690  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3691  }
3692  OBJ_INFECT(str, val);
3693 }
3694 
3695 static void
3696 rb_str_splice(VALUE str, long beg, long len, VALUE val)
3697 {
3698  long slen;
3699  char *p, *e;
3700  rb_encoding *enc;
3701  int singlebyte = single_byte_optimizable(str);
3702  int cr;
3703 
3704  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
3705 
3706  StringValue(val);
3707  enc = rb_enc_check(str, val);
3708  slen = str_strlen(str, enc);
3709 
3710  if (slen < beg) {
3711  out_of_range:
3712  rb_raise(rb_eIndexError, "index %ld out of string", beg);
3713  }
3714  if (beg < 0) {
3715  if (-beg > slen) {
3716  goto out_of_range;
3717  }
3718  beg += slen;
3719  }
3720  if (slen < len || slen < beg + len) {
3721  len = slen - beg;
3722  }
3723  str_modify_keep_cr(str);
3724  p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
3725  if (!p) p = RSTRING_END(str);
3726  e = str_nth(p, RSTRING_END(str), len, enc, singlebyte);
3727  if (!e) e = RSTRING_END(str);
3728  /* error check */
3729  beg = p - RSTRING_PTR(str); /* physical position */
3730  len = e - p; /* physical length */
3731  rb_str_splice_0(str, beg, len, val);
3732  rb_enc_associate(str, enc);
3734  if (cr != ENC_CODERANGE_BROKEN)
3735  ENC_CODERANGE_SET(str, cr);
3736 }
3737 
3738 void
3739 rb_str_update(VALUE str, long beg, long len, VALUE val)
3740 {
3741  rb_str_splice(str, beg, len, val);
3742 }
3743 
3744 static void
3746 {
3747  int nth;
3748  VALUE match;
3749  long start, end, len;
3750  rb_encoding *enc;
3751  struct re_registers *regs;
3752 
3753  if (rb_reg_search(re, str, 0, 0) < 0) {
3754  rb_raise(rb_eIndexError, "regexp not matched");
3755  }
3756  match = rb_backref_get();
3757  nth = rb_reg_backref_number(match, backref);
3758  regs = RMATCH_REGS(match);
3759  if (nth >= regs->num_regs) {
3760  out_of_range:
3761  rb_raise(rb_eIndexError, "index %d out of regexp", nth);
3762  }
3763  if (nth < 0) {
3764  if (-nth >= regs->num_regs) {
3765  goto out_of_range;
3766  }
3767  nth += regs->num_regs;
3768  }
3769 
3770  start = BEG(nth);
3771  if (start == -1) {
3772  rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
3773  }
3774  end = END(nth);
3775  len = end - start;
3776  StringValue(val);
3777  enc = rb_enc_check(str, val);
3778  rb_str_splice_0(str, start, len, val);
3779  rb_enc_associate(str, enc);
3780 }
3781 
3782 static VALUE
3784 {
3785  long idx, beg;
3786 
3787  if (FIXNUM_P(indx)) {
3788  idx = FIX2LONG(indx);
3789  num_index:
3790  rb_str_splice(str, idx, 1, val);
3791  return val;
3792  }
3793 
3794  if (SPECIAL_CONST_P(indx)) goto generic;
3795  switch (TYPE(indx)) {
3796  case T_REGEXP:
3797  rb_str_subpat_set(str, indx, INT2FIX(0), val);
3798  return val;
3799 
3800  case T_STRING:
3801  beg = rb_str_index(str, indx, 0);
3802  if (beg < 0) {
3803  rb_raise(rb_eIndexError, "string not matched");
3804  }
3805  beg = rb_str_sublen(str, beg);
3806  rb_str_splice(str, beg, str_strlen(indx, 0), val);
3807  return val;
3808 
3809  generic:
3810  default:
3811  /* check if indx is Range */
3812  {
3813  long beg, len;
3814  if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) {
3815  rb_str_splice(str, beg, len, val);
3816  return val;
3817  }
3818  }
3819  idx = NUM2LONG(indx);
3820  goto num_index;
3821  }
3822 }
3823 
3824 /*
3825  * call-seq:
3826  * str[fixnum] = new_str
3827  * str[fixnum, fixnum] = new_str
3828  * str[range] = aString
3829  * str[regexp] = new_str
3830  * str[regexp, fixnum] = new_str
3831  * str[regexp, name] = new_str
3832  * str[other_str] = new_str
3833  *
3834  * Element Assignment---Replaces some or all of the content of <i>str</i>. The
3835  * portion of the string affected is determined using the same criteria as
3836  * <code>String#[]</code>. If the replacement string is not the same length as
3837  * the text it is replacing, the string will be adjusted accordingly. If the
3838  * regular expression or string is used as the index doesn't match a position
3839  * in the string, <code>IndexError</code> is raised. If the regular expression
3840  * form is used, the optional second <code>Fixnum</code> allows you to specify
3841  * which portion of the match to replace (effectively using the
3842  * <code>MatchData</code> indexing rules. The forms that take a
3843  * <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
3844  * out of range; the <code>Range</code> form will raise a
3845  * <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code>
3846  * will raise an <code>IndexError</code> on negative match.
3847  */
3848 
3849 static VALUE
3851 {
3852  if (argc == 3) {
3853  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3854  rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
3855  }
3856  else {
3857  rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
3858  }
3859  return argv[2];
3860  }
3861  rb_check_arity(argc, 2, 3);
3862  return rb_str_aset(str, argv[0], argv[1]);
3863 }
3864 
3865 /*
3866  * call-seq:
3867  * str.insert(index, other_str) -> str
3868  *
3869  * Inserts <i>other_str</i> before the character at the given
3870  * <i>index</i>, modifying <i>str</i>. Negative indices count from the
3871  * end of the string, and insert <em>after</em> the given character.
3872  * The intent is insert <i>aString</i> so that it starts at the given
3873  * <i>index</i>.
3874  *
3875  * "abcd".insert(0, 'X') #=> "Xabcd"
3876  * "abcd".insert(3, 'X') #=> "abcXd"
3877  * "abcd".insert(4, 'X') #=> "abcdX"
3878  * "abcd".insert(-3, 'X') #=> "abXcd"
3879  * "abcd".insert(-1, 'X') #=> "abcdX"
3880  */
3881 
3882 static VALUE
3884 {
3885  long pos = NUM2LONG(idx);
3886 
3887  if (pos == -1) {
3888  return rb_str_append(str, str2);
3889  }
3890  else if (pos < 0) {
3891  pos++;
3892  }
3893  rb_str_splice(str, pos, 0, str2);
3894  return str;
3895 }
3896 
3897 
3898 /*
3899  * call-seq:
3900  * str.slice!(fixnum) -> fixnum or nil
3901  * str.slice!(fixnum, fixnum) -> new_str or nil
3902  * str.slice!(range) -> new_str or nil
3903  * str.slice!(regexp) -> new_str or nil
3904  * str.slice!(other_str) -> new_str or nil
3905  *
3906  * Deletes the specified portion from <i>str</i>, and returns the portion
3907  * deleted.
3908  *
3909  * string = "this is a string"
3910  * string.slice!(2) #=> "i"
3911  * string.slice!(3..6) #=> " is "
3912  * string.slice!(/s.*t/) #=> "sa st"
3913  * string.slice!("r") #=> "r"
3914  * string #=> "thing"
3915  */
3916 
3917 static VALUE
3919 {
3920  VALUE result;
3921  VALUE buf[3];
3922  int i;
3923 
3924  rb_check_arity(argc, 1, 2);
3925  for (i=0; i<argc; i++) {
3926  buf[i] = argv[i];
3927  }
3928  str_modify_keep_cr(str);
3929  result = rb_str_aref_m(argc, buf, str);
3930  if (!NIL_P(result)) {
3931  buf[i] = rb_str_new(0,0);
3932  rb_str_aset_m(argc+1, buf, str);
3933  }
3934  return result;
3935 }
3936 
3937 static VALUE
3938 get_pat(VALUE pat, int quote)
3939 {
3940  VALUE val;
3941 
3942  switch (TYPE(pat)) {
3943  case T_REGEXP:
3944  return pat;
3945 
3946  case T_STRING:
3947  break;
3948 
3949  default:
3950  val = rb_check_string_type(pat);
3951  if (NIL_P(val)) {
3952  Check_Type(pat, T_REGEXP);
3953  }
3954  pat = val;
3955  }
3956 
3957  if (quote) {
3958  pat = rb_reg_quote(pat);
3959  }
3960 
3961  return rb_reg_regcomp(pat);
3962 }
3963 
3964 
3965 /*
3966  * call-seq:
3967  * str.sub!(pattern, replacement) -> str or nil
3968  * str.sub!(pattern) {|match| block } -> str or nil
3969  *
3970  * Performs the same substitution as String#sub in-place.
3971  *
3972  * Returns +str+ if a substitution was performed or +nil+ if no substitution
3973  * was performed.
3974  */
3975 
3976 static VALUE
3978 {
3979  VALUE pat, repl, hash = Qnil;
3980  int iter = 0;
3981  int tainted = 0;
3982  long plen;
3983  int min_arity = rb_block_given_p() ? 1 : 2;
3984 
3985  rb_check_arity(argc, min_arity, 2);
3986  if (argc == 1) {
3987  iter = 1;
3988  }
3989  else {
3990  repl = argv[1];
3991  hash = rb_check_hash_type(argv[1]);
3992  if (NIL_P(hash)) {
3993  StringValue(repl);
3994  }
3995  if (OBJ_TAINTED(repl)) tainted = 1;
3996  }
3997 
3998  pat = get_pat(argv[0], 1);
3999  str_modifiable(str);
4000  if (rb_reg_search(pat, str, 0, 0) >= 0) {
4001  rb_encoding *enc;
4002  int cr = ENC_CODERANGE(str);
4004  struct re_registers *regs = RMATCH_REGS(match);
4005  long beg0 = BEG(0);
4006  long end0 = END(0);
4007  char *p, *rp;
4008  long len, rlen;
4009 
4010  if (iter || !NIL_P(hash)) {
4011  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
4012 
4013  if (iter) {
4014  repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
4015  }
4016  else {
4017  repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
4018  repl = rb_obj_as_string(repl);
4019  }
4020  str_mod_check(str, p, len);
4021  rb_check_frozen(str);
4022  }
4023  else {
4024  repl = rb_reg_regsub(repl, str, regs, pat);
4025  }
4026  enc = rb_enc_compatible(str, repl);
4027  if (!enc) {
4028  rb_encoding *str_enc = STR_ENC_GET(str);
4029  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
4030  if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
4031  coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
4032  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
4033  rb_enc_name(str_enc),
4034  rb_enc_name(STR_ENC_GET(repl)));
4035  }
4036  enc = STR_ENC_GET(repl);
4037  }
4038  rb_str_modify(str);
4039  rb_enc_associate(str, enc);
4040  if (OBJ_TAINTED(repl)) tainted = 1;
4041  if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
4042  int cr2 = ENC_CODERANGE(repl);
4043  if (cr2 == ENC_CODERANGE_BROKEN ||
4044  (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
4045  cr = ENC_CODERANGE_UNKNOWN;
4046  else
4047  cr = cr2;
4048  }
4049  plen = end0 - beg0;
4050  rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl);
4051  len = RSTRING_LEN(str);
4052  if (rlen > plen) {
4053  RESIZE_CAPA(str, len + rlen - plen);
4054  }
4055  p = RSTRING_PTR(str);
4056  if (rlen != plen) {
4057  memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
4058  }
4059  memcpy(p + beg0, rp, rlen);
4060  len += rlen - plen;
4061  STR_SET_LEN(str, len);
4062  RSTRING_PTR(str)[len] = '\0';
4063  ENC_CODERANGE_SET(str, cr);
4064  if (tainted) OBJ_TAINT(str);
4065 
4066  return str;
4067  }
4068  return Qnil;
4069 }
4070 
4071 
4072 /*
4073  * call-seq:
4074  * str.sub(pattern, replacement) -> new_str
4075  * str.sub(pattern, hash) -> new_str
4076  * str.sub(pattern) {|match| block } -> new_str
4077  *
4078  * Returns a copy of +str+ with the _first_ occurrence of +pattern+
4079  * replaced by the second argument. The +pattern+ is typically a Regexp; if
4080  * given as a String, any regular expression metacharacters it contains will
4081  * be interpreted literally, e.g. <code>'\\\d'</code> will match a backlash
4082  * followed by 'd', instead of a digit.
4083  *
4084  * If +replacement+ is a String it will be substituted for the matched text.
4085  * It may contain back-references to the pattern's capture groups of the form
4086  * <code>"\\d"</code>, where <i>d</i> is a group number, or
4087  * <code>"\\k<n>"</code>, where <i>n</i> is a group name. If it is a
4088  * double-quoted string, both back-references must be preceded by an
4089  * additional backslash. However, within +replacement+ the special match
4090  * variables, such as <code>&$</code>, will not refer to the current match.
4091  *
4092  * If the second argument is a Hash, and the matched text is one of its keys,
4093  * the corresponding value is the replacement string.
4094  *
4095  * In the block form, the current match string is passed in as a parameter,
4096  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
4097  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
4098  * returned by the block will be substituted for the match on each call.
4099  *
4100  * The result inherits any tainting in the original string or any supplied
4101  * replacement string.
4102  *
4103  * "hello".sub(/[aeiou]/, '*') #=> "h*llo"
4104  * "hello".sub(/([aeiou])/, '<\1>') #=> "h<e>llo"
4105  * "hello".sub(/./) {|s| s.ord.to_s + ' ' } #=> "104 ello"
4106  * "hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*') #=> "h*e*llo"
4107  * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV)
4108  * #=> "Is /bin/bash your preferred shell?"
4109  */
4110 
4111 static VALUE
4113 {
4114  str = rb_str_dup(str);
4115  rb_str_sub_bang(argc, argv, str);
4116  return str;
4117 }
4118 
4119 static VALUE
4120 str_gsub(int argc, VALUE *argv, VALUE str, int bang)
4121 {
4122  VALUE pat, val, repl, match, dest, hash = Qnil;
4123  struct re_registers *regs;
4124  long beg, n;
4125  long beg0, end0;
4126  long offset, blen, slen, len, last;
4127  int iter = 0;
4128  char *sp, *cp;
4129  int tainted = 0;
4130  rb_encoding *str_enc;
4131 
4132  switch (argc) {
4133  case 1:
4134  RETURN_ENUMERATOR(str, argc, argv);
4135  iter = 1;
4136  break;
4137  case 2:
4138  repl = argv[1];
4139  hash = rb_check_hash_type(argv[1]);
4140  if (NIL_P(hash)) {
4141  StringValue(repl);
4142  }
4143  if (OBJ_TAINTED(repl)) tainted = 1;
4144  break;
4145  default:
4146  rb_check_arity(argc, 1, 2);
4147  }
4148 
4149  pat = get_pat(argv[0], 1);
4150  beg = rb_reg_search(pat, str, 0, 0);
4151  if (beg < 0) {
4152  if (bang) return Qnil; /* no match, no substitution */
4153  return rb_str_dup(str);
4154  }
4155 
4156  offset = 0;
4157  n = 0;
4158  blen = RSTRING_LEN(str) + 30; /* len + margin */
4159  dest = rb_str_buf_new(blen);
4160  sp = RSTRING_PTR(str);
4161  slen = RSTRING_LEN(str);
4162  cp = sp;
4163  str_enc = STR_ENC_GET(str);
4164  rb_enc_associate(dest, str_enc);
4166 
4167  do {
4168  n++;
4169  match = rb_backref_get();
4170  regs = RMATCH_REGS(match);
4171  beg0 = BEG(0);
4172  end0 = END(0);
4173  if (iter || !NIL_P(hash)) {
4174  if (iter) {
4175  val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
4176  }
4177  else {
4178  val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
4179  val = rb_obj_as_string(val);
4180  }
4181  str_mod_check(str, sp, slen);
4182  if (val == dest) { /* paranoid check [ruby-dev:24827] */
4183  rb_raise(rb_eRuntimeError, "block should not cheat");
4184  }
4185  }
4186  else {
4187  val = rb_reg_regsub(repl, str, regs, pat);
4188  }
4189 
4190  if (OBJ_TAINTED(val)) tainted = 1;
4191 
4192  len = beg0 - offset; /* copy pre-match substr */
4193  if (len) {
4194  rb_enc_str_buf_cat(dest, cp, len, str_enc);
4195  }
4196 
4197  rb_str_buf_append(dest, val);
4198 
4199  last = offset;
4200  offset = end0;
4201  if (beg0 == end0) {
4202  /*
4203  * Always consume at least one character of the input string
4204  * in order to prevent infinite loops.
4205  */
4206  if (RSTRING_LEN(str) <= end0) break;
4207  len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
4208  rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0, len, str_enc);
4209  offset = end0 + len;
4210  }
4211  cp = RSTRING_PTR(str) + offset;
4212  if (offset > RSTRING_LEN(str)) break;
4213  beg = rb_reg_search(pat, str, offset, 0);
4214  } while (beg >= 0);
4215  if (RSTRING_LEN(str) > offset) {
4216  rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
4217  }
4218  rb_reg_search(pat, str, last, 0);
4219  if (bang) {
4220  rb_str_shared_replace(str, dest);
4221  }
4222  else {
4223  RBASIC_SET_CLASS(dest, rb_obj_class(str));
4224  OBJ_INFECT(dest, str);
4225  str = dest;
4226  }
4227 
4228  if (tainted) OBJ_TAINT(str);
4229  return str;
4230 }
4231 
4232 
4233 /*
4234  * call-seq:
4235  * str.gsub!(pattern, replacement) -> str or nil
4236  * str.gsub!(pattern) {|match| block } -> str or nil
4237  * str.gsub!(pattern) -> an_enumerator
4238  *
4239  * Performs the substitutions of <code>String#gsub</code> in place, returning
4240  * <i>str</i>, or <code>nil</code> if no substitutions were performed.
4241  * If no block and no <i>replacement</i> is given, an enumerator is returned instead.
4242  */
4243 
4244 static VALUE
4246 {
4247  str_modify_keep_cr(str);
4248  return str_gsub(argc, argv, str, 1);
4249 }
4250 
4251 
4252 /*
4253  * call-seq:
4254  * str.gsub(pattern, replacement) -> new_str
4255  * str.gsub(pattern, hash) -> new_str
4256  * str.gsub(pattern) {|match| block } -> new_str
4257  * str.gsub(pattern) -> enumerator
4258  *
4259  * Returns a copy of <i>str</i> with the <em>all</em> occurrences of
4260  * <i>pattern</i> substituted for the second argument. The <i>pattern</i> is
4261  * typically a <code>Regexp</code>; if given as a <code>String</code>, any
4262  * regular expression metacharacters it contains will be interpreted
4263  * literally, e.g. <code>'\\\d'</code> will match a backlash followed by 'd',
4264  * instead of a digit.
4265  *
4266  * If <i>replacement</i> is a <code>String</code> it will be substituted for
4267  * the matched text. It may contain back-references to the pattern's capture
4268  * groups of the form <code>\\\d</code>, where <i>d</i> is a group number, or
4269  * <code>\\\k<n></code>, where <i>n</i> is a group name. If it is a
4270  * double-quoted string, both back-references must be preceded by an
4271  * additional backslash. However, within <i>replacement</i> the special match
4272  * variables, such as <code>$&</code>, will not refer to the current match.
4273  *
4274  * If the second argument is a <code>Hash</code>, and the matched text is one
4275  * of its keys, the corresponding value is the replacement string.
4276  *
4277  * In the block form, the current match string is passed in as a parameter,
4278  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
4279  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
4280  * returned by the block will be substituted for the match on each call.
4281  *
4282  * The result inherits any tainting in the original string or any supplied
4283  * replacement string.
4284  *
4285  * When neither a block nor a second argument is supplied, an
4286  * <code>Enumerator</code> is returned.
4287  *
4288  * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
4289  * "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
4290  * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 "
4291  * "hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}') #=> "h{e}ll{o}"
4292  * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*"
4293  */
4294 
4295 static VALUE
4297 {
4298  return str_gsub(argc, argv, str, 0);
4299 }
4300 
4301 
4302 /*
4303  * call-seq:
4304  * str.replace(other_str) -> str
4305  *
4306  * Replaces the contents and taintedness of <i>str</i> with the corresponding
4307  * values in <i>other_str</i>.
4308  *
4309  * s = "hello" #=> "hello"
4310  * s.replace "world" #=> "world"
4311  */
4312 
4313 VALUE
4315 {
4316  str_modifiable(str);
4317  if (str == str2) return str;
4318 
4319  StringValue(str2);
4320  str_discard(str);
4321  return str_replace(str, str2);
4322 }
4323 
4324 /*
4325  * call-seq:
4326  * string.clear -> string
4327  *
4328  * Makes string empty.
4329  *
4330  * a = "abcde"
4331  * a.clear #=> ""
4332  */
4333 
4334 static VALUE
4336 {
4337  str_discard(str);
4338  STR_SET_EMBED(str);
4339  STR_SET_EMBED_LEN(str, 0);
4340  RSTRING_PTR(str)[0] = 0;
4341  if (rb_enc_asciicompat(STR_ENC_GET(str)))
4343  else
4345  return str;
4346 }
4347 
4348 /*
4349  * call-seq:
4350  * string.chr -> string
4351  *
4352  * Returns a one-character string at the beginning of the string.
4353  *
4354  * a = "abcde"
4355  * a.chr #=> "a"
4356  */
4357 
4358 static VALUE
4360 {
4361  return rb_str_substr(str, 0, 1);
4362 }
4363 
4364 /*
4365  * call-seq:
4366  * str.getbyte(index) -> 0 .. 255
4367  *
4368  * returns the <i>index</i>th byte as an integer.
4369  */
4370 static VALUE
4372 {
4373  long pos = NUM2LONG(index);
4374 
4375  if (pos < 0)
4376  pos += RSTRING_LEN(str);
4377  if (pos < 0 || RSTRING_LEN(str) <= pos)
4378  return Qnil;
4379 
4380  return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
4381 }
4382 
4383 /*
4384  * call-seq:
4385  * str.setbyte(index, integer) -> integer
4386  *
4387  * modifies the <i>index</i>th byte as <i>integer</i>.
4388  */
4389 static VALUE
4390 rb_str_setbyte(VALUE str, VALUE index, VALUE value)
4391 {
4392  long pos = NUM2LONG(index);
4393  int byte = NUM2INT(value);
4394 
4395  rb_str_modify(str);
4396 
4397  if (pos < -RSTRING_LEN(str) || RSTRING_LEN(str) <= pos)
4398  rb_raise(rb_eIndexError, "index %ld out of string", pos);
4399  if (pos < 0)
4400  pos += RSTRING_LEN(str);
4401 
4402  RSTRING_PTR(str)[pos] = byte;
4403 
4404  return value;
4405 }
4406 
4407 static VALUE
4408 str_byte_substr(VALUE str, long beg, long len)
4409 {
4410  char *p, *s = RSTRING_PTR(str);
4411  long n = RSTRING_LEN(str);
4412  VALUE str2;
4413 
4414  if (beg > n || len < 0) return Qnil;
4415  if (beg < 0) {
4416  beg += n;
4417  if (beg < 0) return Qnil;
4418  }
4419  if (beg + len > n)
4420  len = n - beg;
4421  if (len <= 0) {
4422  len = 0;
4423  p = 0;
4424  }
4425  else
4426  p = s + beg;
4427 
4428  if (len > RSTRING_EMBED_LEN_MAX && beg + len == n) {
4429  str2 = rb_str_new4(str);
4430  str2 = str_new3(rb_obj_class(str2), str2);
4431  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
4432  RSTRING(str2)->as.heap.len = len;
4433  }
4434  else {
4435  str2 = rb_str_new5(str, p, len);
4436  }
4437 
4438  str_enc_copy(str2, str);
4439 
4440  if (RSTRING_LEN(str2) == 0) {
4441  if (!rb_enc_asciicompat(STR_ENC_GET(str)))
4443  else
4445  }
4446  else {
4447  switch (ENC_CODERANGE(str)) {
4448  case ENC_CODERANGE_7BIT:
4450  break;
4451  default:
4453  break;
4454  }
4455  }
4456 
4457  OBJ_INFECT(str2, str);
4458 
4459  return str2;
4460 }
4461 
4462 static VALUE
4464 {
4465  long idx;
4466  switch (TYPE(indx)) {
4467  case T_FIXNUM:
4468  idx = FIX2LONG(indx);
4469 
4470  num_index:
4471  str = str_byte_substr(str, idx, 1);
4472  if (NIL_P(str) || RSTRING_LEN(str) == 0) return Qnil;
4473  return str;
4474 
4475  default:
4476  /* check if indx is Range */
4477  {
4478  long beg, len = RSTRING_LEN(str);
4479 
4480  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
4481  case Qfalse:
4482  break;
4483  case Qnil:
4484  return Qnil;
4485  default:
4486  return str_byte_substr(str, beg, len);
4487  }
4488  }
4489  idx = NUM2LONG(indx);
4490  goto num_index;
4491  }
4492 
4493  UNREACHABLE;
4494 }
4495 
4496 /*
4497  * call-seq:
4498  * str.byteslice(fixnum) -> new_str or nil
4499  * str.byteslice(fixnum, fixnum) -> new_str or nil
4500  * str.byteslice(range) -> new_str or nil
4501  *
4502  * Byte Reference---If passed a single <code>Fixnum</code>, returns a
4503  * substring of one byte at that position. If passed two <code>Fixnum</code>
4504  * objects, returns a substring starting at the offset given by the first, and
4505  * a length given by the second. If given a <code>Range</code>, a substring containing
4506  * bytes at offsets given by the range is returned. In all three cases, if
4507  * an offset is negative, it is counted from the end of <i>str</i>. Returns
4508  * <code>nil</code> if the initial offset falls outside the string, the length
4509  * is negative, or the beginning of the range is greater than the end.
4510  * The encoding of the resulted string keeps original encoding.
4511  *
4512  * "hello".byteslice(1) #=> "e"
4513  * "hello".byteslice(-1) #=> "o"
4514  * "hello".byteslice(1, 2) #=> "el"
4515  * "\x80\u3042".byteslice(1, 3) #=> "\u3042"
4516  * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042"
4517  */
4518 
4519 static VALUE
4521 {
4522  if (argc == 2) {
4523  return str_byte_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
4524  }
4525  rb_check_arity(argc, 1, 2);
4526  return str_byte_aref(str, argv[0]);
4527 }
4528 
4529 /*
4530  * call-seq:
4531  * str.reverse -> new_str
4532  *
4533  * Returns a new string with the characters from <i>str</i> in reverse order.
4534  *
4535  * "stressed".reverse #=> "desserts"
4536  */
4537 
4538 static VALUE
4540 {
4541  rb_encoding *enc;
4542  VALUE rev;
4543  char *s, *e, *p;
4544  int single = 1;
4545 
4546  if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
4547  enc = STR_ENC_GET(str);
4548  rev = rb_str_new5(str, 0, RSTRING_LEN(str));
4549  s = RSTRING_PTR(str); e = RSTRING_END(str);
4550  p = RSTRING_END(rev);
4551 
4552  if (RSTRING_LEN(str) > 1) {
4553  if (single_byte_optimizable(str)) {
4554  while (s < e) {
4555  *--p = *s++;
4556  }
4557  }
4558  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) {
4559  while (s < e) {
4560  int clen = rb_enc_fast_mbclen(s, e, enc);
4561 
4562  if (clen > 1 || (*s & 0x80)) single = 0;
4563  p -= clen;
4564  memcpy(p, s, clen);
4565  s += clen;
4566  }
4567  }
4568  else {
4569  while (s < e) {
4570  int clen = rb_enc_mbclen(s, e, enc);
4571 
4572  if (clen > 1 || (*s & 0x80)) single = 0;
4573  p -= clen;
4574  memcpy(p, s, clen);
4575  s += clen;
4576  }
4577  }
4578  }
4579  STR_SET_LEN(rev, RSTRING_LEN(str));
4580  OBJ_INFECT(rev, str);
4581  if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
4582  if (single) {
4584  }
4585  else {
4587  }
4588  }
4590 
4591  return rev;
4592 }
4593 
4594 
4595 /*
4596  * call-seq:
4597  * str.reverse! -> str
4598  *
4599  * Reverses <i>str</i> in place.
4600  */
4601 
4602 static VALUE
4604 {
4605  if (RSTRING_LEN(str) > 1) {
4606  if (single_byte_optimizable(str)) {
4607  char *s, *e, c;
4608 
4609  str_modify_keep_cr(str);
4610  s = RSTRING_PTR(str);
4611  e = RSTRING_END(str) - 1;
4612  while (s < e) {
4613  c = *s;
4614  *s++ = *e;
4615  *e-- = c;
4616  }
4617  }
4618  else {
4620  }
4621  }
4622  else {
4623  str_modify_keep_cr(str);
4624  }
4625  return str;
4626 }
4627 
4628 
4629 /*
4630  * call-seq:
4631  * str.include? other_str -> true or false
4632  *
4633  * Returns <code>true</code> if <i>str</i> contains the given string or
4634  * character.
4635  *
4636  * "hello".include? "lo" #=> true
4637  * "hello".include? "ol" #=> false
4638  * "hello".include? ?h #=> true
4639  */
4640 
4641 static VALUE
4643 {
4644  long i;
4645 
4646  StringValue(arg);
4647  i = rb_str_index(str, arg, 0);
4648 
4649  if (i == -1) return Qfalse;
4650  return Qtrue;
4651 }
4652 
4653 
4654 /*
4655  * call-seq:
4656  * str.to_i(base=10) -> integer
4657  *
4658  * Returns the result of interpreting leading characters in <i>str</i> as an
4659  * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
4660  * end of a valid number are ignored. If there is not a valid number at the
4661  * start of <i>str</i>, <code>0</code> is returned. This method never raises an
4662  * exception when <i>base</i> is valid.
4663  *
4664  * "12345".to_i #=> 12345
4665  * "99 red balloons".to_i #=> 99
4666  * "0a".to_i #=> 0
4667  * "0a".to_i(16) #=> 10
4668  * "hello".to_i #=> 0
4669  * "1100101".to_i(2) #=> 101
4670  * "1100101".to_i(8) #=> 294977
4671  * "1100101".to_i(10) #=> 1100101
4672  * "1100101".to_i(16) #=> 17826049
4673  */
4674 
4675 static VALUE
4677 {
4678  int base;
4679 
4680  if (argc == 0) base = 10;
4681  else {
4682  VALUE b;
4683 
4684  rb_scan_args(argc, argv, "01", &b);
4685  base = NUM2INT(b);
4686  }
4687  if (base < 0) {
4688  rb_raise(rb_eArgError, "invalid radix %d", base);
4689  }
4690  return rb_str_to_inum(str, base, FALSE);
4691 }
4692 
4693 
4694 /*
4695  * call-seq:
4696  * str.to_f -> float
4697  *
4698  * Returns the result of interpreting leading characters in <i>str</i> as a
4699  * floating point number. Extraneous characters past the end of a valid number
4700  * are ignored. If there is not a valid number at the start of <i>str</i>,
4701  * <code>0.0</code> is returned. This method never raises an exception.
4702  *
4703  * "123.45e1".to_f #=> 1234.5
4704  * "45.67 degrees".to_f #=> 45.67
4705  * "thx1138".to_f #=> 0.0
4706  */
4707 
4708 static VALUE
4710 {
4711  return DBL2NUM(rb_str_to_dbl(str, FALSE));
4712 }
4713 
4714 
4715 /*
4716  * call-seq:
4717  * str.to_s -> str
4718  * str.to_str -> str
4719  *
4720  * Returns the receiver.
4721  */
4722 
4723 static VALUE
4725 {
4726  if (rb_obj_class(str) != rb_cString) {
4727  return str_duplicate(rb_cString, str);
4728  }
4729  return str;
4730 }
4731 
4732 #if 0
4733 static void
4734 str_cat_char(VALUE str, unsigned int c, rb_encoding *enc)
4735 {
4736  char s[RUBY_MAX_CHAR_LEN];
4737  int n = rb_enc_codelen(c, enc);
4738 
4739  rb_enc_mbcput(c, s, enc);
4740  rb_enc_str_buf_cat(str, s, n, enc);
4741 }
4742 #endif
4743 
4744 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
4745 
4746 int
4747 rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
4748 {
4749  char buf[CHAR_ESC_LEN + 1];
4750  int l;
4751 
4752 #if SIZEOF_INT > 4
4753  c &= 0xffffffff;
4754 #endif
4755  if (unicode_p) {
4756  if (c < 0x7F && ISPRINT(c)) {
4757  snprintf(buf, CHAR_ESC_LEN, "%c", c);
4758  }
4759  else if (c < 0x10000) {
4760  snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
4761  }
4762  else {
4763  snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
4764  }
4765  }
4766  else {
4767  if (c < 0x100) {
4768  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
4769  }
4770  else {
4771  snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
4772  }
4773  }
4774  l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */
4775  rb_str_buf_cat(result, buf, l);
4776  return l;
4777 }
4778 
4779 /*
4780  * call-seq:
4781  * str.inspect -> string
4782  *
4783  * Returns a printable version of _str_, surrounded by quote marks,
4784  * with special characters escaped.
4785  *
4786  * str = "hello"
4787  * str[3] = "\b"
4788  * str.inspect #=> "\"hel\\bo\""
4789  */
4790 
4791 VALUE
4793 {
4794  int encidx = ENCODING_GET(str);
4795  rb_encoding *enc = rb_enc_from_index(encidx), *actenc;
4796  const char *p, *pend, *prev;
4797  char buf[CHAR_ESC_LEN + 1];
4800  int unicode_p = rb_enc_unicode_p(enc);
4801  int asciicompat = rb_enc_asciicompat(enc);
4802 
4803  if (resenc == NULL) resenc = rb_default_external_encoding();
4804  if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
4805  rb_enc_associate(result, resenc);
4806  str_buf_cat2(result, "\"");
4807 
4808  p = RSTRING_PTR(str); pend = RSTRING_END(str);
4809  prev = p;
4810  actenc = get_actual_encoding(encidx, str);
4811  if (actenc != enc) {
4812  enc = actenc;
4813  if (unicode_p) unicode_p = rb_enc_unicode_p(enc);
4814  }
4815  while (p < pend) {
4816  unsigned int c, cc;
4817  int n;
4818 
4819  n = rb_enc_precise_mbclen(p, pend, enc);
4820  if (!MBCLEN_CHARFOUND_P(n)) {
4821  if (p > prev) str_buf_cat(result, prev, p - prev);
4822  n = rb_enc_mbminlen(enc);
4823  if (pend < p + n)
4824  n = (int)(pend - p);
4825  while (n--) {
4826  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
4827  str_buf_cat(result, buf, strlen(buf));
4828  prev = ++p;
4829  }
4830  continue;
4831  }
4832  n = MBCLEN_CHARFOUND_LEN(n);
4833  c = rb_enc_mbc_to_codepoint(p, pend, enc);
4834  p += n;
4835  if ((asciicompat || unicode_p) &&
4836  (c == '"'|| c == '\\' ||
4837  (c == '#' &&
4838  p < pend &&
4840  (cc = rb_enc_codepoint(p,pend,enc),
4841  (cc == '$' || cc == '@' || cc == '{'))))) {
4842  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4843  str_buf_cat2(result, "\\");
4844  if (asciicompat || enc == resenc) {
4845  prev = p - n;
4846  continue;
4847  }
4848  }
4849  switch (c) {
4850  case '\n': cc = 'n'; break;
4851  case '\r': cc = 'r'; break;
4852  case '\t': cc = 't'; break;
4853  case '\f': cc = 'f'; break;
4854  case '\013': cc = 'v'; break;
4855  case '\010': cc = 'b'; break;
4856  case '\007': cc = 'a'; break;
4857  case 033: cc = 'e'; break;
4858  default: cc = 0; break;
4859  }
4860  if (cc) {
4861  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4862  buf[0] = '\\';
4863  buf[1] = (char)cc;
4864  str_buf_cat(result, buf, 2);
4865  prev = p;
4866  continue;
4867  }
4868  if ((enc == resenc && rb_enc_isprint(c, enc)) ||
4869  (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
4870  continue;
4871  }
4872  else {
4873  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4874  rb_str_buf_cat_escaped_char(result, c, unicode_p);
4875  prev = p;
4876  continue;
4877  }
4878  }
4879  if (p > prev) str_buf_cat(result, prev, p - prev);
4880  str_buf_cat2(result, "\"");
4881 
4882  OBJ_INFECT(result, str);
4883  return result;
4884 }
4885 
4886 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
4887 
4888 /*
4889  * call-seq:
4890  * str.dump -> new_str
4891  *
4892  * Produces a version of +str+ with all non-printing characters replaced by
4893  * <code>\nnn</code> notation and all special characters escaped.
4894  *
4895  * "hello \n ''".dump #=> "\"hello \\n ''\"
4896  */
4897 
4898 VALUE
4900 {
4901  rb_encoding *enc = rb_enc_get(str);
4902  long len;
4903  const char *p, *pend;
4904  char *q, *qend;
4905  VALUE result;
4906  int u8 = (enc == rb_utf8_encoding());
4907 
4908  len = 2; /* "" */
4909  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4910  while (p < pend) {
4911  unsigned char c = *p++;
4912  switch (c) {
4913  case '"': case '\\':
4914  case '\n': case '\r':
4915  case '\t': case '\f':
4916  case '\013': case '\010': case '\007': case '\033':
4917  len += 2;
4918  break;
4919 
4920  case '#':
4921  len += IS_EVSTR(p, pend) ? 2 : 1;
4922  break;
4923 
4924  default:
4925  if (ISPRINT(c)) {
4926  len++;
4927  }
4928  else {
4929  if (u8 && c > 0x7F) { /* \u{NN} */
4930  int n = rb_enc_precise_mbclen(p-1, pend, enc);
4931  if (MBCLEN_CHARFOUND_P(n)) {
4932  unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4933  while (cc >>= 4) len++;
4934  len += 5;
4935  p += MBCLEN_CHARFOUND_LEN(n)-1;
4936  break;
4937  }
4938  }
4939  len += 4; /* \xNN */
4940  }
4941  break;
4942  }
4943  }
4944  if (!rb_enc_asciicompat(enc)) {
4945  len += 19; /* ".force_encoding('')" */
4946  len += strlen(enc->name);
4947  }
4948 
4949  result = rb_str_new5(str, 0, len);
4950  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4951  q = RSTRING_PTR(result); qend = q + len + 1;
4952 
4953  *q++ = '"';
4954  while (p < pend) {
4955  unsigned char c = *p++;
4956 
4957  if (c == '"' || c == '\\') {
4958  *q++ = '\\';
4959  *q++ = c;
4960  }
4961  else if (c == '#') {
4962  if (IS_EVSTR(p, pend)) *q++ = '\\';
4963  *q++ = '#';
4964  }
4965  else if (c == '\n') {
4966  *q++ = '\\';
4967  *q++ = 'n';
4968  }
4969  else if (c == '\r') {
4970  *q++ = '\\';
4971  *q++ = 'r';
4972  }
4973  else if (c == '\t') {
4974  *q++ = '\\';
4975  *q++ = 't';
4976  }
4977  else if (c == '\f') {
4978  *q++ = '\\';
4979  *q++ = 'f';
4980  }
4981  else if (c == '\013') {
4982  *q++ = '\\';
4983  *q++ = 'v';
4984  }
4985  else if (c == '\010') {
4986  *q++ = '\\';
4987  *q++ = 'b';
4988  }
4989  else if (c == '\007') {
4990  *q++ = '\\';
4991  *q++ = 'a';
4992  }
4993  else if (c == '\033') {
4994  *q++ = '\\';
4995  *q++ = 'e';
4996  }
4997  else if (ISPRINT(c)) {
4998  *q++ = c;
4999  }
5000  else {
5001  *q++ = '\\';
5002  if (u8) {
5003  int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
5004  if (MBCLEN_CHARFOUND_P(n)) {
5005  int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
5006  p += n;
5007  snprintf(q, qend-q, "u{%x}", cc);
5008  q += strlen(q);
5009  continue;
5010  }
5011  }
5012  snprintf(q, qend-q, "x%02X", c);
5013  q += 3;
5014  }
5015  }
5016  *q++ = '"';
5017  *q = '\0';
5018  if (!rb_enc_asciicompat(enc)) {
5019  snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
5020  enc = rb_ascii8bit_encoding();
5021  }
5022  OBJ_INFECT(result, str);
5023  /* result from dump is ASCII */
5024  rb_enc_associate(result, enc);
5026  return result;
5027 }
5028 
5029 
5030 static void
5032 {
5033  if (rb_enc_dummy_p(enc)) {
5034  rb_raise(rb_eEncCompatError, "incompatible encoding with this operation: %s",
5035  rb_enc_name(enc));
5036  }
5037 }
5038 
5039 /*
5040  * call-seq:
5041  * str.upcase! -> str or nil
5042  *
5043  * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
5044  * were made.
5045  * Note: case replacement is effective only in ASCII region.
5046  */
5047 
5048 static VALUE
5050 {
5051  rb_encoding *enc;
5052  char *s, *send;
5053  int modify = 0;
5054  int n;
5055 
5056  str_modify_keep_cr(str);
5057  enc = STR_ENC_GET(str);
5059  s = RSTRING_PTR(str); send = RSTRING_END(str);
5060  if (single_byte_optimizable(str)) {
5061  while (s < send) {
5062  unsigned int c = *(unsigned char*)s;
5063 
5064  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
5065  *s = 'A' + (c - 'a');
5066  modify = 1;
5067  }
5068  s++;
5069  }
5070  }
5071  else {
5072  int ascompat = rb_enc_asciicompat(enc);
5073 
5074  while (s < send) {
5075  unsigned int c;
5076 
5077  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5078  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
5079  *s = 'A' + (c - 'a');
5080  modify = 1;
5081  }
5082  s++;
5083  }
5084  else {
5085  c = rb_enc_codepoint_len(s, send, &n, enc);
5086  if (rb_enc_islower(c, enc)) {
5087  /* assuming toupper returns codepoint with same size */
5088  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5089  modify = 1;
5090  }
5091  s += n;
5092  }
5093  }
5094  }
5095 
5096  if (modify) return str;
5097  return Qnil;
5098 }
5099 
5100 
5101 /*
5102  * call-seq:
5103  * str.upcase -> new_str
5104  *
5105  * Returns a copy of <i>str</i> with all lowercase letters replaced with their
5106  * uppercase counterparts. The operation is locale insensitive---only
5107  * characters ``a'' to ``z'' are affected.
5108  * Note: case replacement is effective only in ASCII region.
5109  *
5110  * "hEllO".upcase #=> "HELLO"
5111  */
5112 
5113 static VALUE
5115 {
5116  str = rb_str_dup(str);
5117  rb_str_upcase_bang(str);
5118  return str;
5119 }
5120 
5121 
5122 /*
5123  * call-seq:
5124  * str.downcase! -> str or nil
5125  *
5126  * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
5127  * changes were made.
5128  * Note: case replacement is effective only in ASCII region.
5129  */
5130 
5131 static VALUE
5133 {
5134  rb_encoding *enc;
5135  char *s, *send;
5136  int modify = 0;
5137 
5138  str_modify_keep_cr(str);
5139  enc = STR_ENC_GET(str);
5141  s = RSTRING_PTR(str); send = RSTRING_END(str);
5142  if (single_byte_optimizable(str)) {
5143  while (s < send) {
5144  unsigned int c = *(unsigned char*)s;
5145 
5146  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
5147  *s = 'a' + (c - 'A');
5148  modify = 1;
5149  }
5150  s++;
5151  }
5152  }
5153  else {
5154  int ascompat = rb_enc_asciicompat(enc);
5155 
5156  while (s < send) {
5157  unsigned int c;
5158  int n;
5159 
5160  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5161  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
5162  *s = 'a' + (c - 'A');
5163  modify = 1;
5164  }
5165  s++;
5166  }
5167  else {
5168  c = rb_enc_codepoint_len(s, send, &n, enc);
5169  if (rb_enc_isupper(c, enc)) {
5170  /* assuming toupper returns codepoint with same size */
5171  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5172  modify = 1;
5173  }
5174  s += n;
5175  }
5176  }
5177  }
5178 
5179  if (modify) return str;
5180  return Qnil;
5181 }
5182 
5183 
5184 /*
5185  * call-seq:
5186  * str.downcase -> new_str
5187  *
5188  * Returns a copy of <i>str</i> with all uppercase letters replaced with their
5189  * lowercase counterparts. The operation is locale insensitive---only
5190  * characters ``A'' to ``Z'' are affected.
5191  * Note: case replacement is effective only in ASCII region.
5192  *
5193  * "hEllO".downcase #=> "hello"
5194  */
5195 
5196 static VALUE
5198 {
5199  str = rb_str_dup(str);
5200  rb_str_downcase_bang(str);
5201  return str;
5202 }
5203 
5204 
5205 /*
5206  * call-seq:
5207  * str.capitalize! -> str or nil
5208  *
5209  * Modifies <i>str</i> by converting the first character to uppercase and the
5210  * remainder to lowercase. Returns <code>nil</code> if no changes are made.
5211  * Note: case conversion is effective only in ASCII region.
5212  *
5213  * a = "hello"
5214  * a.capitalize! #=> "Hello"
5215  * a #=> "Hello"
5216  * a.capitalize! #=> nil
5217  */
5218 
5219 static VALUE
5221 {
5222  rb_encoding *enc;
5223  char *s, *send;
5224  int modify = 0;
5225  unsigned int c;
5226  int n;
5227 
5228  str_modify_keep_cr(str);
5229  enc = STR_ENC_GET(str);
5231  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5232  s = RSTRING_PTR(str); send = RSTRING_END(str);
5233 
5234  c = rb_enc_codepoint_len(s, send, &n, enc);
5235  if (rb_enc_islower(c, enc)) {
5236  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5237  modify = 1;
5238  }
5239  s += n;
5240  while (s < send) {
5241  c = rb_enc_codepoint_len(s, send, &n, enc);
5242  if (rb_enc_isupper(c, enc)) {
5243  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5244  modify = 1;
5245  }
5246  s += n;
5247  }
5248 
5249  if (modify) return str;
5250  return Qnil;
5251 }
5252 
5253 
5254 /*
5255  * call-seq:
5256  * str.capitalize -> new_str
5257  *
5258  * Returns a copy of <i>str</i> with the first character converted to uppercase
5259  * and the remainder to lowercase.
5260  * Note: case conversion is effective only in ASCII region.
5261  *
5262  * "hello".capitalize #=> "Hello"
5263  * "HELLO".capitalize #=> "Hello"
5264  * "123ABC".capitalize #=> "123abc"
5265  */
5266 
5267 static VALUE
5269 {
5270  str = rb_str_dup(str);
5272  return str;
5273 }
5274 
5275 
5276 /*
5277  * call-seq:
5278  * str.swapcase! -> str or nil
5279  *
5280  * Equivalent to <code>String#swapcase</code>, but modifies the receiver in
5281  * place, returning <i>str</i>, or <code>nil</code> if no changes were made.
5282  * Note: case conversion is effective only in ASCII region.
5283  */
5284 
5285 static VALUE
5287 {
5288  rb_encoding *enc;
5289  char *s, *send;
5290  int modify = 0;
5291  int n;
5292 
5293  str_modify_keep_cr(str);
5294  enc = STR_ENC_GET(str);
5296  s = RSTRING_PTR(str); send = RSTRING_END(str);
5297  while (s < send) {
5298  unsigned int c = rb_enc_codepoint_len(s, send, &n, enc);
5299 
5300  if (rb_enc_isupper(c, enc)) {
5301  /* assuming toupper returns codepoint with same size */
5302  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5303  modify = 1;
5304  }
5305  else if (rb_enc_islower(c, enc)) {
5306  /* assuming tolower returns codepoint with same size */
5307  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5308  modify = 1;
5309  }
5310  s += n;
5311  }
5312 
5313  if (modify) return str;
5314  return Qnil;
5315 }
5316 
5317 
5318 /*
5319  * call-seq:
5320  * str.swapcase -> new_str
5321  *
5322  * Returns a copy of <i>str</i> with uppercase alphabetic characters converted
5323  * to lowercase and lowercase characters converted to uppercase.
5324  * Note: case conversion is effective only in ASCII region.
5325  *
5326  * "Hello".swapcase #=> "hELLO"
5327  * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11"
5328  */
5329 
5330 static VALUE
5332 {
5333  str = rb_str_dup(str);
5334  rb_str_swapcase_bang(str);
5335  return str;
5336 }
5337 
5338 typedef unsigned char *USTR;
5339 
5340 struct tr {
5341  int gen;
5342  unsigned int now, max;
5343  char *p, *pend;
5344 };
5345 
5346 static unsigned int
5347 trnext(struct tr *t, rb_encoding *enc)
5348 {
5349  int n;
5350 
5351  for (;;) {
5352  if (!t->gen) {
5353 nextpart:
5354  if (t->p == t->pend) return -1;
5355  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '\\' && t->p + n < t->pend) {
5356  t->p += n;
5357  }
5358  t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5359  t->p += n;
5360  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '-' && t->p + n < t->pend) {
5361  t->p += n;
5362  if (t->p < t->pend) {
5363  unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5364  t->p += n;
5365  if (t->now > c) {
5366  if (t->now < 0x80 && c < 0x80) {
5368  "invalid range \"%c-%c\" in string transliteration",
5369  t->now, c);
5370  }
5371  else {
5372  rb_raise(rb_eArgError, "invalid range in string transliteration");
5373  }
5374  continue; /* not reached */
5375  }
5376  t->gen = 1;
5377  t->max = c;
5378  }
5379  }
5380  return t->now;
5381  }
5382  else {
5383  while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
5384  if (t->now == t->max) {
5385  t->gen = 0;
5386  goto nextpart;
5387  }
5388  }
5389  if (t->now < t->max) {
5390  return t->now;
5391  }
5392  else {
5393  t->gen = 0;
5394  return t->max;
5395  }
5396  }
5397  }
5398 }
5399 
5400 static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
5401 
5402 static VALUE
5403 tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
5404 {
5405  const unsigned int errc = -1;
5406  unsigned int trans[256];
5407  rb_encoding *enc, *e1, *e2;
5408  struct tr trsrc, trrepl;
5409  int cflag = 0;
5410  unsigned int c, c0, last = 0;
5411  int modify = 0, i, l;
5412  char *s, *send;
5413  VALUE hash = 0;
5414  int singlebyte = single_byte_optimizable(str);
5415  int cr;
5416 
5417 #define CHECK_IF_ASCII(c) \
5418  (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
5419  (cr = ENC_CODERANGE_VALID) : 0)
5420 
5421  StringValue(src);
5422  StringValue(repl);
5423  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5424  if (RSTRING_LEN(repl) == 0) {
5425  return rb_str_delete_bang(1, &src, str);
5426  }
5427 
5428  cr = ENC_CODERANGE(str);
5429  e1 = rb_enc_check(str, src);
5430  e2 = rb_enc_check(str, repl);
5431  if (e1 == e2) {
5432  enc = e1;
5433  }
5434  else {
5435  enc = rb_enc_check(src, repl);
5436  }
5437  trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
5438  if (RSTRING_LEN(src) > 1 &&
5439  rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) == '^' &&
5440  trsrc.p + l < trsrc.pend) {
5441  cflag = 1;
5442  trsrc.p += l;
5443  }
5444  trrepl.p = RSTRING_PTR(repl);
5445  trrepl.pend = trrepl.p + RSTRING_LEN(repl);
5446  trsrc.gen = trrepl.gen = 0;
5447  trsrc.now = trrepl.now = 0;
5448  trsrc.max = trrepl.max = 0;
5449 
5450  if (cflag) {
5451  for (i=0; i<256; i++) {
5452  trans[i] = 1;
5453  }
5454  while ((c = trnext(&trsrc, enc)) != errc) {
5455  if (c < 256) {
5456  trans[c] = errc;
5457  }
5458  else {
5459  if (!hash) hash = rb_hash_new();
5460  rb_hash_aset(hash, UINT2NUM(c), Qtrue);
5461  }
5462  }
5463  while ((c = trnext(&trrepl, enc)) != errc)
5464  /* retrieve last replacer */;
5465  last = trrepl.now;
5466  for (i=0; i<256; i++) {
5467  if (trans[i] != errc) {
5468  trans[i] = last;
5469  }
5470  }
5471  }
5472  else {
5473  unsigned int r;
5474 
5475  for (i=0; i<256; i++) {
5476  trans[i] = errc;
5477  }
5478  while ((c = trnext(&trsrc, enc)) != errc) {
5479  r = trnext(&trrepl, enc);
5480  if (r == errc) r = trrepl.now;
5481  if (c < 256) {
5482  trans[c] = r;
5483  if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
5484  }
5485  else {
5486  if (!hash) hash = rb_hash_new();
5487  rb_hash_aset(hash, UINT2NUM(c), UINT2NUM(r));
5488  }
5489  }
5490  }
5491 
5492  if (cr == ENC_CODERANGE_VALID)
5493  cr = ENC_CODERANGE_7BIT;
5494  str_modify_keep_cr(str);
5495  s = RSTRING_PTR(str); send = RSTRING_END(str);
5496  if (sflag) {
5497  int clen, tlen;
5498  long offset, max = RSTRING_LEN(str);
5499  unsigned int save = -1;
5500  char *buf = ALLOC_N(char, max), *t = buf;
5501 
5502  while (s < send) {
5503  int may_modify = 0;
5504 
5505  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5506  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5507 
5508  s += clen;
5509  if (c < 256) {
5510  c = trans[c];
5511  }
5512  else if (hash) {
5513  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5514  if (NIL_P(tmp)) {
5515  if (cflag) c = last;
5516  else c = errc;
5517  }
5518  else if (cflag) c = errc;
5519  else c = NUM2INT(tmp);
5520  }
5521  else {
5522  c = errc;
5523  }
5524  if (c != (unsigned int)-1) {
5525  if (save == c) {
5526  CHECK_IF_ASCII(c);
5527  continue;
5528  }
5529  save = c;
5530  tlen = rb_enc_codelen(c, enc);
5531  modify = 1;
5532  }
5533  else {
5534  save = -1;
5535  c = c0;
5536  if (enc != e1) may_modify = 1;
5537  }
5538  while (t - buf + tlen >= max) {
5539  offset = t - buf;
5540  max *= 2;
5541  REALLOC_N(buf, char, max);
5542  t = buf + offset;
5543  }
5544  rb_enc_mbcput(c, t, enc);
5545  if (may_modify && memcmp(s, t, tlen) != 0) {
5546  modify = 1;
5547  }
5548  CHECK_IF_ASCII(c);
5549  t += tlen;
5550  }
5551  if (!STR_EMBED_P(str)) {
5553  }
5554  *t = '\0';
5555  RSTRING(str)->as.heap.ptr = buf;
5556  RSTRING(str)->as.heap.len = t - buf;
5557  STR_SET_NOEMBED(str);
5558  RSTRING(str)->as.heap.aux.capa = max;
5559  }
5560  else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
5561  while (s < send) {
5562  c = (unsigned char)*s;
5563  if (trans[c] != errc) {
5564  if (!cflag) {
5565  c = trans[c];
5566  *s = c;
5567  modify = 1;
5568  }
5569  else {
5570  *s = last;
5571  modify = 1;
5572  }
5573  }
5574  CHECK_IF_ASCII(c);
5575  s++;
5576  }
5577  }
5578  else {
5579  int clen, tlen, max = (int)(RSTRING_LEN(str) * 1.2);
5580  long offset;
5581  char *buf = ALLOC_N(char, max), *t = buf;
5582 
5583  while (s < send) {
5584  int may_modify = 0;
5585  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5586  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5587 
5588  if (c < 256) {
5589  c = trans[c];
5590  }
5591  else if (hash) {
5592  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5593  if (NIL_P(tmp)) {
5594  if (cflag) c = last;
5595  else c = errc;
5596  }
5597  else if (cflag) c = errc;
5598  else c = NUM2INT(tmp);
5599  }
5600  else {
5601  c = cflag ? last : errc;
5602  }
5603  if (c != errc) {
5604  tlen = rb_enc_codelen(c, enc);
5605  modify = 1;
5606  }
5607  else {
5608  c = c0;
5609  if (enc != e1) may_modify = 1;
5610  }
5611  while (t - buf + tlen >= max) {
5612  offset = t - buf;
5613  max *= 2;
5614  REALLOC_N(buf, char, max);
5615  t = buf + offset;
5616  }
5617  if (s != t) {
5618  rb_enc_mbcput(c, t, enc);
5619  if (may_modify && memcmp(s, t, tlen) != 0) {
5620  modify = 1;
5621  }
5622  }
5623  CHECK_IF_ASCII(c);
5624  s += clen;
5625  t += tlen;
5626  }
5627  if (!STR_EMBED_P(str)) {
5629  }
5630  *t = '\0';
5631  RSTRING(str)->as.heap.ptr = buf;
5632  RSTRING(str)->as.heap.len = t - buf;
5633  STR_SET_NOEMBED(str);
5634  RSTRING(str)->as.heap.aux.capa = max;
5635  }
5636 
5637  if (modify) {
5638  if (cr != ENC_CODERANGE_BROKEN)
5639  ENC_CODERANGE_SET(str, cr);
5640  rb_enc_associate(str, enc);
5641  return str;
5642  }
5643  return Qnil;
5644 }
5645 
5646 
5647 /*
5648  * call-seq:
5649  * str.tr!(from_str, to_str) -> str or nil
5650  *
5651  * Translates <i>str</i> in place, using the same rules as
5652  * <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
5653  * changes were made.
5654  */
5655 
5656 static VALUE
5658 {
5659  return tr_trans(str, src, repl, 0);
5660 }
5661 
5662 
5663 /*
5664  * call-seq:
5665  * str.tr(from_str, to_str) => new_str
5666  *
5667  * Returns a copy of +str+ with the characters in +from_str+ replaced by the
5668  * corresponding characters in +to_str+. If +to_str+ is shorter than
5669  * +from_str+, it is padded with its last character in order to maintain the
5670  * correspondence.
5671  *
5672  * "hello".tr('el', 'ip') #=> "hippo"
5673  * "hello".tr('aeiou', '*') #=> "h*ll*"
5674  * "hello".tr('aeiou', 'AA*') #=> "hAll*"
5675  *
5676  * Both strings may use the <code>c1-c2</code> notation to denote ranges of
5677  * characters, and +from_str+ may start with a <code>^</code>, which denotes
5678  * all characters except those listed.
5679  *
5680  * "hello".tr('a-y', 'b-z') #=> "ifmmp"
5681  * "hello".tr('^aeiou', '*') #=> "*e**o"
5682  *
5683  * The backslash character <code></code> can be used to escape
5684  * <code>^</code> or <code>-</code> and is otherwise ignored unless it
5685  * appears at the end of a range or the end of the +from_str+ or +to_str+:
5686  *
5687  * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
5688  * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld"
5689  *
5690  * "hello\r\nworld".tr("\r", "") #=> "hello\nworld"
5691  * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold"
5692  * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
5693  *
5694  * "X['\\b']".tr("X\\", "") #=> "['b']"
5695  * "X['\\b']".tr("X-\\]", "") #=> "'b'"
5696  */
5697 
5698 static VALUE
5699 rb_str_tr(VALUE str, VALUE src, VALUE repl)
5700 {
5701  str = rb_str_dup(str);
5702  tr_trans(str, src, repl, 0);
5703  return str;
5704 }
5705 
5706 #define TR_TABLE_SIZE 257
5707 static void
5708 tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first,
5709  VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
5710 {
5711  const unsigned int errc = -1;
5712  char buf[256];
5713  struct tr tr;
5714  unsigned int c;
5715  VALUE table = 0, ptable = 0;
5716  int i, l, cflag = 0;
5717 
5718  tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
5719  tr.gen = tr.now = tr.max = 0;
5720 
5721  if (RSTRING_LEN(str) > 1 && rb_enc_ascget(tr.p, tr.pend, &l, enc) == '^') {
5722  cflag = 1;
5723  tr.p += l;
5724  }
5725  if (first) {
5726  for (i=0; i<256; i++) {
5727  stable[i] = 1;
5728  }
5729  stable[256] = cflag;
5730  }
5731  else if (stable[256] && !cflag) {
5732  stable[256] = 0;
5733  }
5734  for (i=0; i<256; i++) {
5735  buf[i] = cflag;
5736  }
5737 
5738  while ((c = trnext(&tr, enc)) != errc) {
5739  if (c < 256) {
5740  buf[c & 0xff] = !cflag;
5741  }
5742  else {
5743  VALUE key = UINT2NUM(c);
5744 
5745  if (!table && (first || *tablep || stable[256])) {
5746  if (cflag) {
5747  ptable = *ctablep;
5748  table = ptable ? ptable : rb_hash_new();
5749  *ctablep = table;
5750  }
5751  else {
5752  table = rb_hash_new();
5753  ptable = *tablep;
5754  *tablep = table;
5755  }
5756  }
5757  if (table && (!ptable || (cflag ^ !NIL_P(rb_hash_aref(ptable, key))))) {
5758  rb_hash_aset(table, key, Qtrue);
5759  }
5760  }
5761  }
5762  for (i=0; i<256; i++) {
5763  stable[i] = stable[i] && buf[i];
5764  }
5765  if (!table && !cflag) {
5766  *tablep = 0;
5767  }
5768 }
5769 
5770 
5771 static int
5772 tr_find(unsigned int c, const char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
5773 {
5774  if (c < 256) {
5775  return table[c] != 0;
5776  }
5777  else {
5778  VALUE v = UINT2NUM(c);
5779 
5780  if (del) {
5781  if (!NIL_P(rb_hash_lookup(del, v)) &&
5782  (!nodel || NIL_P(rb_hash_lookup(nodel, v)))) {
5783  return TRUE;
5784  }
5785  }
5786  else if (nodel && !NIL_P(rb_hash_lookup(nodel, v))) {
5787  return FALSE;
5788  }
5789  return table[256] ? TRUE : FALSE;
5790  }
5791 }
5792 
5793 /*
5794  * call-seq:
5795  * str.delete!([other_str]+) -> str or nil
5796  *
5797  * Performs a <code>delete</code> operation in place, returning <i>str</i>, or
5798  * <code>nil</code> if <i>str</i> was not modified.
5799  */
5800 
5801 static VALUE
5803 {
5804  char squeez[TR_TABLE_SIZE];
5805  rb_encoding *enc = 0;
5806  char *s, *send, *t;
5807  VALUE del = 0, nodel = 0;
5808  int modify = 0;
5809  int i, ascompat, cr;
5810 
5811  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5813  for (i=0; i<argc; i++) {
5814  VALUE s = argv[i];
5815 
5816  StringValue(s);
5817  enc = rb_enc_check(str, s);
5818  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5819  }
5820 
5821  str_modify_keep_cr(str);
5822  ascompat = rb_enc_asciicompat(enc);
5823  s = t = RSTRING_PTR(str);
5824  send = RSTRING_END(str);
5825  cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
5826  while (s < send) {
5827  unsigned int c;
5828  int clen;
5829 
5830  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5831  if (squeez[c]) {
5832  modify = 1;
5833  }
5834  else {
5835  if (t != s) *t = c;
5836  t++;
5837  }
5838  s++;
5839  }
5840  else {
5841  c = rb_enc_codepoint_len(s, send, &clen, enc);
5842 
5843  if (tr_find(c, squeez, del, nodel)) {
5844  modify = 1;
5845  }
5846  else {
5847  if (t != s) rb_enc_mbcput(c, t, enc);
5848  t += clen;
5849  if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
5850  }
5851  s += clen;
5852  }
5853  }
5854  *t = '\0';
5855  STR_SET_LEN(str, t - RSTRING_PTR(str));
5856  ENC_CODERANGE_SET(str, cr);
5857 
5858  if (modify) return str;
5859  return Qnil;
5860 }
5861 
5862 
5863 /*
5864  * call-seq:
5865  * str.delete([other_str]+) -> new_str
5866  *
5867  * Returns a copy of <i>str</i> with all characters in the intersection of its
5868  * arguments deleted. Uses the same rules for building the set of characters as
5869  * <code>String#count</code>.
5870  *
5871  * "hello".delete "l","lo" #=> "heo"
5872  * "hello".delete "lo" #=> "he"
5873  * "hello".delete "aeiou", "^e" #=> "hell"
5874  * "hello".delete "ej-m" #=> "ho"
5875  */
5876 
5877 static VALUE
5879 {
5880  str = rb_str_dup(str);
5881  rb_str_delete_bang(argc, argv, str);
5882  return str;
5883 }
5884 
5885 
5886 /*
5887  * call-seq:
5888  * str.squeeze!([other_str]*) -> str or nil
5889  *
5890  * Squeezes <i>str</i> in place, returning either <i>str</i>, or
5891  * <code>nil</code> if no changes were made.
5892  */
5893 
5894 static VALUE
5896 {
5897  char squeez[TR_TABLE_SIZE];
5898  rb_encoding *enc = 0;
5899  VALUE del = 0, nodel = 0;
5900  char *s, *send, *t;
5901  int i, modify = 0;
5902  int ascompat, singlebyte = single_byte_optimizable(str);
5903  unsigned int save;
5904 
5905  if (argc == 0) {
5906  enc = STR_ENC_GET(str);
5907  }
5908  else {
5909  for (i=0; i<argc; i++) {
5910  VALUE s = argv[i];
5911 
5912  StringValue(s);
5913  enc = rb_enc_check(str, s);
5914  if (singlebyte && !single_byte_optimizable(s))
5915  singlebyte = 0;
5916  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5917  }
5918  }
5919 
5920  str_modify_keep_cr(str);
5921  s = t = RSTRING_PTR(str);
5922  if (!s || RSTRING_LEN(str) == 0) return Qnil;
5923  send = RSTRING_END(str);
5924  save = -1;
5925  ascompat = rb_enc_asciicompat(enc);
5926 
5927  if (singlebyte) {
5928  while (s < send) {
5929  unsigned int c = *(unsigned char*)s++;
5930  if (c != save || (argc > 0 && !squeez[c])) {
5931  *t++ = save = c;
5932  }
5933  }
5934  } else {
5935  while (s < send) {
5936  unsigned int c;
5937  int clen;
5938 
5939  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5940  if (c != save || (argc > 0 && !squeez[c])) {
5941  *t++ = save = c;
5942  }
5943  s++;
5944  }
5945  else {
5946  c = rb_enc_codepoint_len(s, send, &clen, enc);
5947 
5948  if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
5949  if (t != s) rb_enc_mbcput(c, t, enc);
5950  save = c;
5951  t += clen;
5952  }
5953  s += clen;
5954  }
5955  }
5956  }
5957 
5958  *t = '\0';
5959  if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
5960  STR_SET_LEN(str, t - RSTRING_PTR(str));
5961  modify = 1;
5962  }
5963 
5964  if (modify) return str;
5965  return Qnil;
5966 }
5967 
5968 
5969 /*
5970  * call-seq:
5971  * str.squeeze([other_str]*) -> new_str
5972  *
5973  * Builds a set of characters from the <i>other_str</i> parameter(s) using the
5974  * procedure described for <code>String#count</code>. Returns a new string
5975  * where runs of the same character that occur in this set are replaced by a
5976  * single character. If no arguments are given, all runs of identical
5977  * characters are replaced by a single character.
5978  *
5979  * "yellow moon".squeeze #=> "yelow mon"
5980  * " now is the".squeeze(" ") #=> " now is the"
5981  * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
5982  */
5983 
5984 static VALUE
5986 {
5987  str = rb_str_dup(str);
5988  rb_str_squeeze_bang(argc, argv, str);
5989  return str;
5990 }
5991 
5992 
5993 /*
5994  * call-seq:
5995  * str.tr_s!(from_str, to_str) -> str or nil
5996  *
5997  * Performs <code>String#tr_s</code> processing on <i>str</i> in place,
5998  * returning <i>str</i>, or <code>nil</code> if no changes were made.
5999  */
6000 
6001 static VALUE
6003 {
6004  return tr_trans(str, src, repl, 1);
6005 }
6006 
6007 
6008 /*
6009  * call-seq:
6010  * str.tr_s(from_str, to_str) -> new_str
6011  *
6012  * Processes a copy of <i>str</i> as described under <code>String#tr</code>,
6013  * then removes duplicate characters in regions that were affected by the
6014  * translation.
6015  *
6016  * "hello".tr_s('l', 'r') #=> "hero"
6017  * "hello".tr_s('el', '*') #=> "h*o"
6018  * "hello".tr_s('el', 'hx') #=> "hhxo"
6019  */
6020 
6021 static VALUE
6022 rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
6023 {
6024  str = rb_str_dup(str);
6025  tr_trans(str, src, repl, 1);
6026  return str;
6027 }
6028 
6029 
6030 /*
6031  * call-seq:
6032  * str.count([other_str]+) -> fixnum
6033  *
6034  * Each +other_str+ parameter defines a set of characters to count. The
6035  * intersection of these sets defines the characters to count in +str+. Any
6036  * +other_str+ that starts with a caret <code>^</code> is negated. The
6037  * sequence <code>c1-c2</code> means all characters between c1 and c2. The
6038  * backslash character <code></code> can be used to escape <code>^</code> or
6039  * <code>-</code> and is otherwise ignored unless it appears at the end of a
6040  * sequence or the end of a +other_str+.
6041  *
6042  * a = "hello world"
6043  * a.count "lo" #=> 5
6044  * a.count "lo", "o" #=> 2
6045  * a.count "hello", "^l" #=> 4
6046  * a.count "ej-m" #=> 4
6047  *
6048  * "hello^world".count "\\^aeiou" #=> 4
6049  * "hello-world".count "a\\-eo" #=> 4
6050  *
6051  * c = "hello world\\r\\n"
6052  * c.count "\\" #=> 2
6053  * c.count "\\A" #=> 0
6054  * c.count "X-\\w" #=> 3
6055  */
6056 
6057 static VALUE
6059 {
6060  char table[TR_TABLE_SIZE];
6061  rb_encoding *enc = 0;
6062  VALUE del = 0, nodel = 0, tstr;
6063  char *s, *send;
6064  int i;
6065  int ascompat;
6066 
6068 
6069  tstr = argv[0];
6070  StringValue(tstr);
6071  enc = rb_enc_check(str, tstr);
6072  if (argc == 1) {
6073  const char *ptstr;
6074  if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
6075  (ptstr = RSTRING_PTR(tstr),
6076  ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (const unsigned char *)ptstr, (const unsigned char *)ptstr+1)) &&
6077  !is_broken_string(str)) {
6078  int n = 0;
6079  int clen;
6080  unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
6081 
6082  s = RSTRING_PTR(str);
6083  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
6084  send = RSTRING_END(str);
6085  while (s < send) {
6086  if (*(unsigned char*)s++ == c) n++;
6087  }
6088  return INT2NUM(n);
6089  }
6090  }
6091 
6092  tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
6093  for (i=1; i<argc; i++) {
6094  tstr = argv[i];
6095  StringValue(tstr);
6096  enc = rb_enc_check(str, tstr);
6097  tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
6098  }
6099 
6100  s = RSTRING_PTR(str);
6101  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
6102  send = RSTRING_END(str);
6103  ascompat = rb_enc_asciicompat(enc);
6104  i = 0;
6105  while (s < send) {
6106  unsigned int c;
6107 
6108  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
6109  if (table[c]) {
6110  i++;
6111  }
6112  s++;
6113  }
6114  else {
6115  int clen;
6116  c = rb_enc_codepoint_len(s, send, &clen, enc);
6117  if (tr_find(c, table, del, nodel)) {
6118  i++;
6119  }
6120  s += clen;
6121  }
6122  }
6123 
6124  return INT2NUM(i);
6125 }
6126 
6127 static const char isspacetable[256] = {
6128  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
6129  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6130  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6131  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6132  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6133  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6134  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6135  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6136  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6137  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6138  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6139  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6140  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6141  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6142  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6143  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6144 };
6145 
6146 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
6147 
6148 /*
6149  * call-seq:
6150  * str.split(pattern=$;, [limit]) -> anArray
6151  *
6152  * Divides <i>str</i> into substrings based on a delimiter, returning an array
6153  * of these substrings.
6154  *
6155  * If <i>pattern</i> is a <code>String</code>, then its contents are used as
6156  * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
6157  * space, <i>str</i> is split on whitespace, with leading whitespace and runs
6158  * of contiguous whitespace characters ignored.
6159  *
6160  * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
6161  * pattern matches. Whenever the pattern matches a zero-length string,
6162  * <i>str</i> is split into individual characters. If <i>pattern</i> contains
6163  * groups, the respective matches will be returned in the array as well.
6164  *
6165  * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
6166  * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
6167  * split on whitespace as if ` ' were specified.
6168  *
6169  * If the <i>limit</i> parameter is omitted, trailing null fields are
6170  * suppressed. If <i>limit</i> is a positive number, at most that number of
6171  * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
6172  * string is returned as the only entry in an array). If negative, there is no
6173  * limit to the number of fields returned, and trailing null fields are not
6174  * suppressed.
6175  *
6176  * When the input +str+ is empty an empty Array is returned as the string is
6177  * considered to have no fields to split.
6178  *
6179  * " now's the time".split #=> ["now's", "the", "time"]
6180  * " now's the time".split(' ') #=> ["now's", "the", "time"]
6181  * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
6182  * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
6183  * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
6184  * "hello".split(//, 3) #=> ["h", "e", "llo"]
6185  * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
6186  *
6187  * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
6188  * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
6189  * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
6190  * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
6191  *
6192  * "".split(',', -1) #=> []
6193  */
6194 
6195 static VALUE
6197 {
6198  rb_encoding *enc;
6199  VALUE spat;
6200  VALUE limit;
6201  enum {awk, string, regexp} split_type;
6202  long beg, end, i = 0;
6203  int lim = 0;
6204  VALUE result, tmp;
6205 
6206  if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
6207  lim = NUM2INT(limit);
6208  if (lim <= 0) limit = Qnil;
6209  else if (lim == 1) {
6210  if (RSTRING_LEN(str) == 0)
6211  return rb_ary_new2(0);
6212  return rb_ary_new3(1, str);
6213  }
6214  i = 1;
6215  }
6216 
6217  enc = STR_ENC_GET(str);
6218  if (NIL_P(spat)) {
6219  if (!NIL_P(rb_fs)) {
6220  spat = rb_fs;
6221  goto fs_set;
6222  }
6223  split_type = awk;
6224  }
6225  else {
6226  fs_set:
6227  if (RB_TYPE_P(spat, T_STRING)) {
6228  rb_encoding *enc2 = STR_ENC_GET(spat);
6229 
6230  split_type = string;
6231  if (RSTRING_LEN(spat) == 0) {
6232  /* Special case - split into chars */
6233  spat = rb_reg_regcomp(spat);
6234  split_type = regexp;
6235  }
6236  else if (rb_enc_asciicompat(enc2) == 1) {
6237  if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
6238  split_type = awk;
6239  }
6240  }
6241  else {
6242  int l;
6243  if (rb_enc_ascget(RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
6244  RSTRING_LEN(spat) == l) {
6245  split_type = awk;
6246  }
6247  }
6248  }
6249  else {
6250  spat = get_pat(spat, 1);
6251  split_type = regexp;
6252  }
6253  }
6254 
6255  result = rb_ary_new();
6256  beg = 0;
6257  if (split_type == awk) {
6258  char *ptr = RSTRING_PTR(str);
6259  char *eptr = RSTRING_END(str);
6260  char *bptr = ptr;
6261  int skip = 1;
6262  unsigned int c;
6263 
6264  end = beg;
6265  if (is_ascii_string(str)) {
6266  while (ptr < eptr) {
6267  c = (unsigned char)*ptr++;
6268  if (skip) {
6269  if (ascii_isspace(c)) {
6270  beg = ptr - bptr;
6271  }
6272  else {
6273  end = ptr - bptr;
6274  skip = 0;
6275  if (!NIL_P(limit) && lim <= i) break;
6276  }
6277  }
6278  else if (ascii_isspace(c)) {
6279  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6280  skip = 1;
6281  beg = ptr - bptr;
6282  if (!NIL_P(limit)) ++i;
6283  }
6284  else {
6285  end = ptr - bptr;
6286  }
6287  }
6288  }
6289  else {
6290  while (ptr < eptr) {
6291  int n;
6292 
6293  c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
6294  ptr += n;
6295  if (skip) {
6296  if (rb_isspace(c)) {
6297  beg = ptr - bptr;
6298  }
6299  else {
6300  end = ptr - bptr;
6301  skip = 0;
6302  if (!NIL_P(limit) && lim <= i) break;
6303  }
6304  }
6305  else if (rb_isspace(c)) {
6306  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6307  skip = 1;
6308  beg = ptr - bptr;
6309  if (!NIL_P(limit)) ++i;
6310  }
6311  else {
6312  end = ptr - bptr;
6313  }
6314  }
6315  }
6316  }
6317  else if (split_type == string) {
6318  char *ptr = RSTRING_PTR(str);
6319  char *temp = ptr;
6320  char *eptr = RSTRING_END(str);
6321  char *sptr = RSTRING_PTR(spat);
6322  long slen = RSTRING_LEN(spat);
6323 
6324  if (is_broken_string(str)) {
6325  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
6326  }
6327  if (is_broken_string(spat)) {
6328  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(spat)));
6329  }
6330  enc = rb_enc_check(str, spat);
6331  while (ptr < eptr &&
6332  (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
6333  /* Check we are at the start of a char */
6334  char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
6335  if (t != ptr + end) {
6336  ptr = t;
6337  continue;
6338  }
6339  rb_ary_push(result, rb_str_subseq(str, ptr - temp, end));
6340  ptr += end + slen;
6341  if (!NIL_P(limit) && lim <= ++i) break;
6342  }
6343  beg = ptr - temp;
6344  }
6345  else {
6346  char *ptr = RSTRING_PTR(str);
6347  long len = RSTRING_LEN(str);
6348  long start = beg;
6349  long idx;
6350  int last_null = 0;
6351  struct re_registers *regs;
6352 
6353  while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
6354  regs = RMATCH_REGS(rb_backref_get());
6355  if (start == end && BEG(0) == END(0)) {
6356  if (!ptr) {
6357  rb_ary_push(result, str_new_empty(str));
6358  break;
6359  }
6360  else if (last_null == 1) {
6361  rb_ary_push(result, rb_str_subseq(str, beg,
6362  rb_enc_fast_mbclen(ptr+beg,
6363  ptr+len,
6364  enc)));
6365  beg = start;
6366  }
6367  else {
6368  if (ptr+start == ptr+len)
6369  start++;
6370  else
6371  start += rb_enc_fast_mbclen(ptr+start,ptr+len,enc);
6372  last_null = 1;
6373  continue;
6374  }
6375  }
6376  else {
6377  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6378  beg = start = END(0);
6379  }
6380  last_null = 0;
6381 
6382  for (idx=1; idx < regs->num_regs; idx++) {
6383  if (BEG(idx) == -1) continue;
6384  if (BEG(idx) == END(idx))
6385  tmp = str_new_empty(str);
6386  else
6387  tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx));
6388  rb_ary_push(result, tmp);
6389  }
6390  if (!NIL_P(limit) && lim <= ++i) break;
6391  }
6392  }
6393  if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
6394  if (RSTRING_LEN(str) == beg)
6395  tmp = str_new_empty(str);
6396  else
6397  tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg);
6398  rb_ary_push(result, tmp);
6399  }
6400  if (NIL_P(limit) && lim == 0) {
6401  long len;
6402  while ((len = RARRAY_LEN(result)) > 0 &&
6403  (tmp = RARRAY_AREF(result, len-1), RSTRING_LEN(tmp) == 0))
6404  rb_ary_pop(result);
6405  }
6406 
6407  return result;
6408 }
6409 
6410 VALUE
6411 rb_str_split(VALUE str, const char *sep0)
6412 {
6413  VALUE sep;
6414 
6415  StringValue(str);
6416  sep = rb_str_new2(sep0);
6417  return rb_str_split_m(1, &sep, str);
6418 }
6419 
6420 
6421 static VALUE
6422 rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
6423 {
6424  rb_encoding *enc;
6425  VALUE line, rs, orig = str;
6426  const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
6427  long pos, len, rslen;
6428  int paragraph_mode = 0;
6429 
6430  VALUE UNINITIALIZED_VAR(ary);
6431 
6432  if (argc == 0)
6433  rs = rb_rs;
6434  else
6435  rb_scan_args(argc, argv, "01", &rs);
6436 
6437  if (rb_block_given_p()) {
6438  if (wantarray) {
6439 #if STRING_ENUMERATORS_WANTARRAY
6440  rb_warn("given block not used");
6441  ary = rb_ary_new();
6442 #else
6443  rb_warning("passing a block to String#lines is deprecated");
6444  wantarray = 0;
6445 #endif
6446  }
6447  }
6448  else {
6449  if (wantarray)
6450  ary = rb_ary_new();
6451  else
6452  RETURN_ENUMERATOR(str, argc, argv);
6453  }
6454 
6455  if (NIL_P(rs)) {
6456  if (wantarray) {
6457  rb_ary_push(ary, str);
6458  return ary;
6459  }
6460  else {
6461  rb_yield(str);
6462  return orig;
6463  }
6464  }
6465 
6466  str = rb_str_new4(str);
6467  ptr = subptr = RSTRING_PTR(str);
6468  pend = RSTRING_END(str);
6469  len = RSTRING_LEN(str);
6470  StringValue(rs);
6471  rslen = RSTRING_LEN(rs);
6472 
6473  if (rs == rb_default_rs)
6474  enc = rb_enc_get(str);
6475  else
6476  enc = rb_enc_check(str, rs);
6477 
6478  if (rslen == 0) {
6479  rsptr = "\n\n";
6480  rslen = 2;
6481  paragraph_mode = 1;
6482  }
6483  else {
6484  rsptr = RSTRING_PTR(rs);
6485  }
6486 
6487  if ((rs == rb_default_rs || paragraph_mode) && !rb_enc_asciicompat(enc)) {
6488  rs = rb_str_new(rsptr, rslen);
6489  rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil);
6490  rsptr = RSTRING_PTR(rs);
6491  rslen = RSTRING_LEN(rs);
6492  }
6493 
6494  while (subptr < pend) {
6495  pos = rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
6496  if (pos < 0) break;
6497  hit = subptr + pos;
6498  adjusted = rb_enc_right_char_head(subptr, hit, pend, enc);
6499  if (hit != adjusted) {
6500  subptr = adjusted;
6501  continue;
6502  }
6503  subend = hit + rslen;
6504  if (paragraph_mode) {
6505  while (subend < pend && rb_enc_is_newline(subend, pend, enc)) {
6506  subend += rb_enc_mbclen(subend, pend, enc);
6507  }
6508  }
6509  line = rb_str_subseq(str, subptr - ptr, subend - subptr);
6510  if (wantarray) {
6511  rb_ary_push(ary, line);
6512  }
6513  else {
6514  rb_yield(line);
6515  str_mod_check(str, ptr, len);
6516  }
6517  subptr = subend;
6518  }
6519 
6520  if (subptr != pend) {
6521  line = rb_str_subseq(str, subptr - ptr, pend - subptr);
6522  if (wantarray)
6523  rb_ary_push(ary, line);
6524  else
6525  rb_yield(line);
6526  RB_GC_GUARD(str);
6527  }
6528 
6529  if (wantarray)
6530  return ary;
6531  else
6532  return orig;
6533 }
6534 
6535 /*
6536  * call-seq:
6537  * str.each_line(separator=$/) {|substr| block } -> str
6538  * str.each_line(separator=$/) -> an_enumerator
6539  *
6540  * Splits <i>str</i> using the supplied parameter as the record
6541  * separator (<code>$/</code> by default), passing each substring in
6542  * turn to the supplied block. If a zero-length record separator is
6543  * supplied, the string is split into paragraphs delimited by
6544  * multiple successive newlines.
6545  *
6546  * If no block is given, an enumerator is returned instead.
6547  *
6548  * print "Example one\n"
6549  * "hello\nworld".each_line {|s| p s}
6550  * print "Example two\n"
6551  * "hello\nworld".each_line('l') {|s| p s}
6552  * print "Example three\n"
6553  * "hello\n\n\nworld".each_line('') {|s| p s}
6554  *
6555  * <em>produces:</em>
6556  *
6557  * Example one
6558  * "hello\n"
6559  * "world"
6560  * Example two
6561  * "hel"
6562  * "l"
6563  * "o\nworl"
6564  * "d"
6565  * Example three
6566  * "hello\n\n\n"
6567  * "world"
6568  */
6569 
6570 static VALUE
6572 {
6573  return rb_str_enumerate_lines(argc, argv, str, 0);
6574 }
6575 
6576 /*
6577  * call-seq:
6578  * str.lines(separator=$/) -> an_array
6579  *
6580  * Returns an array of lines in <i>str</i> split using the supplied
6581  * record separator (<code>$/</code> by default). This is a
6582  * shorthand for <code>str.each_line(separator).to_a</code>.
6583  *
6584  * If a block is given, which is a deprecated form, works the same as
6585  * <code>each_line</code>.
6586  */
6587 
6588 static VALUE
6590 {
6591  return rb_str_enumerate_lines(argc, argv, str, 1);
6592 }
6593 
6594 static VALUE
6596 {
6597  return LONG2FIX(RSTRING_LEN(str));
6598 }
6599 
6600 static VALUE
6601 rb_str_enumerate_bytes(VALUE str, int wantarray)
6602 {
6603  long i;
6604  VALUE UNINITIALIZED_VAR(ary);
6605 
6606  if (rb_block_given_p()) {
6607  if (wantarray) {
6608 #if STRING_ENUMERATORS_WANTARRAY
6609  rb_warn("given block not used");
6610  ary = rb_ary_new();
6611 #else
6612  rb_warning("passing a block to String#bytes is deprecated");
6613  wantarray = 0;
6614 #endif
6615  }
6616  }
6617  else {
6618  if (wantarray)
6619  ary = rb_ary_new2(RSTRING_LEN(str));
6620  else
6622  }
6623 
6624  for (i=0; i<RSTRING_LEN(str); i++) {
6625  if (wantarray)
6626  rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6627  else
6628  rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6629  }
6630  if (wantarray)
6631  return ary;
6632  else
6633  return str;
6634 }
6635 
6636 /*
6637  * call-seq:
6638  * str.each_byte {|fixnum| block } -> str
6639  * str.each_byte -> an_enumerator
6640  *
6641  * Passes each byte in <i>str</i> to the given block, or returns an
6642  * enumerator if no block is given.
6643  *
6644  * "hello".each_byte {|c| print c, ' ' }
6645  *
6646  * <em>produces:</em>
6647  *
6648  * 104 101 108 108 111
6649  */
6650 
6651 static VALUE
6653 {
6654  return rb_str_enumerate_bytes(str, 0);
6655 }
6656 
6657 /*
6658  * call-seq:
6659  * str.bytes -> an_array
6660  *
6661  * Returns an array of bytes in <i>str</i>. This is a shorthand for
6662  * <code>str.each_byte.to_a</code>.
6663  *
6664  * If a block is given, which is a deprecated form, works the same as
6665  * <code>each_byte</code>.
6666  */
6667 
6668 static VALUE
6670 {
6671  return rb_str_enumerate_bytes(str, 1);
6672 }
6673 
6674 static VALUE
6676 {
6677  return rb_str_length(str);
6678 }
6679 
6680 static VALUE
6681 rb_str_enumerate_chars(VALUE str, int wantarray)
6682 {
6683  VALUE orig = str;
6684  VALUE substr;
6685  long i, len, n;
6686  const char *ptr;
6687  rb_encoding *enc;
6688  VALUE UNINITIALIZED_VAR(ary);
6689 
6690  str = rb_str_new4(str);
6691  ptr = RSTRING_PTR(str);
6692  len = RSTRING_LEN(str);
6693  enc = rb_enc_get(str);
6694 
6695  if (rb_block_given_p()) {
6696  if (wantarray) {
6697 #if STRING_ENUMERATORS_WANTARRAY
6698  rb_warn("given block not used");
6699  ary = rb_ary_new_capa(str_strlen(str, enc));
6700 #else
6701  rb_warning("passing a block to String#chars is deprecated");
6702  wantarray = 0;
6703 #endif
6704  }
6705  }
6706  else {
6707  if (wantarray)
6708  ary = rb_ary_new_capa(str_strlen(str, enc));
6709  else
6711  }
6712 
6713  switch (ENC_CODERANGE(str)) {
6714  case ENC_CODERANGE_VALID:
6715  case ENC_CODERANGE_7BIT:
6716  for (i = 0; i < len; i += n) {
6717  n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
6718  substr = rb_str_subseq(str, i, n);
6719  if (wantarray)
6720  rb_ary_push(ary, substr);
6721  else
6722  rb_yield(substr);
6723  }
6724  break;
6725  default:
6726  for (i = 0; i < len; i += n) {
6727  n = rb_enc_mbclen(ptr + i, ptr + len, enc);
6728  substr = rb_str_subseq(str, i, n);
6729  if (wantarray)
6730  rb_ary_push(ary, substr);
6731  else
6732  rb_yield(substr);
6733  }
6734  }
6735  RB_GC_GUARD(str);
6736  if (wantarray)
6737  return ary;
6738  else
6739  return orig;
6740 }
6741 
6742 /*
6743  * call-seq:
6744  * str.each_char {|cstr| block } -> str
6745  * str.each_char -> an_enumerator
6746  *
6747  * Passes each character in <i>str</i> to the given block, or returns
6748  * an enumerator if no block is given.
6749  *
6750  * "hello".each_char {|c| print c, ' ' }
6751  *
6752  * <em>produces:</em>
6753  *
6754  * h e l l o
6755  */
6756 
6757 static VALUE
6759 {
6760  return rb_str_enumerate_chars(str, 0);
6761 }
6762 
6763 /*
6764  * call-seq:
6765  * str.chars -> an_array
6766  *
6767  * Returns an array of characters in <i>str</i>. This is a shorthand
6768  * for <code>str.each_char.to_a</code>.
6769  *
6770  * If a block is given, which is a deprecated form, works the same as
6771  * <code>each_char</code>.
6772  */
6773 
6774 static VALUE
6776 {
6777  return rb_str_enumerate_chars(str, 1);
6778 }
6779 
6780 
6781 static VALUE
6783 {
6784  VALUE orig = str;
6785  int n;
6786  unsigned int c;
6787  const char *ptr, *end;
6788  rb_encoding *enc;
6789  VALUE UNINITIALIZED_VAR(ary);
6790 
6791  if (single_byte_optimizable(str))
6792  return rb_str_enumerate_bytes(str, wantarray);
6793 
6794  str = rb_str_new4(str);
6795  ptr = RSTRING_PTR(str);
6796  end = RSTRING_END(str);
6797  enc = STR_ENC_GET(str);
6798 
6799  if (rb_block_given_p()) {
6800  if (wantarray) {
6801 #if STRING_ENUMERATORS_WANTARRAY
6802  rb_warn("given block not used");
6803  ary = rb_ary_new_capa(str_strlen(str, enc));
6804 #else
6805  rb_warning("passing a block to String#codepoints is deprecated");
6806  wantarray = 0;
6807 #endif
6808  }
6809  }
6810  else {
6811  if (wantarray)
6812  ary = rb_ary_new_capa(str_strlen(str, enc));
6813  else
6815  }
6816 
6817  while (ptr < end) {
6818  c = rb_enc_codepoint_len(ptr, end, &n, enc);
6819  if (wantarray)
6820  rb_ary_push(ary, UINT2NUM(c));
6821  else
6822  rb_yield(UINT2NUM(c));
6823  ptr += n;
6824  }
6825  RB_GC_GUARD(str);
6826  if (wantarray)
6827  return ary;
6828  else
6829  return orig;
6830 }
6831 
6832 /*
6833  * call-seq:
6834  * str.each_codepoint {|integer| block } -> str
6835  * str.each_codepoint -> an_enumerator
6836  *
6837  * Passes the <code>Integer</code> ordinal of each character in <i>str</i>,
6838  * also known as a <i>codepoint</i> when applied to Unicode strings to the
6839  * given block.
6840  *
6841  * If no block is given, an enumerator is returned instead.
6842  *
6843  * "hello\u0639".each_codepoint {|c| print c, ' ' }
6844  *
6845  * <em>produces:</em>
6846  *
6847  * 104 101 108 108 111 1593
6848  */
6849 
6850 static VALUE
6852 {
6853  return rb_str_enumerate_codepoints(str, 0);
6854 }
6855 
6856 /*
6857  * call-seq:
6858  * str.codepoints -> an_array
6859  *
6860  * Returns an array of the <code>Integer</code> ordinals of the
6861  * characters in <i>str</i>. This is a shorthand for
6862  * <code>str.each_codepoint.to_a</code>.
6863  *
6864  * If a block is given, which is a deprecated form, works the same as
6865  * <code>each_codepoint</code>.
6866  */
6867 
6868 static VALUE
6870 {
6871  return rb_str_enumerate_codepoints(str, 1);
6872 }
6873 
6874 
6875 static long
6877 {
6878  rb_encoding *enc = STR_ENC_GET(str);
6879  const char *p, *p2, *beg, *end;
6880 
6881  beg = RSTRING_PTR(str);
6882  end = beg + RSTRING_LEN(str);
6883  if (beg > end) return 0;
6884  p = rb_enc_prev_char(beg, end, end, enc);
6885  if (!p) return 0;
6886  if (p > beg && rb_enc_ascget(p, end, 0, enc) == '\n') {
6887  p2 = rb_enc_prev_char(beg, p, end, enc);
6888  if (p2 && rb_enc_ascget(p2, end, 0, enc) == '\r') p = p2;
6889  }
6890  return p - beg;
6891 }
6892 
6893 /*
6894  * call-seq:
6895  * str.chop! -> str or nil
6896  *
6897  * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
6898  * or <code>nil</code> if <i>str</i> is the empty string. See also
6899  * <code>String#chomp!</code>.
6900  */
6901 
6902 static VALUE
6904 {
6905  str_modify_keep_cr(str);
6906  if (RSTRING_LEN(str) > 0) {
6907  long len;
6908  len = chopped_length(str);
6909  STR_SET_LEN(str, len);
6910  RSTRING_PTR(str)[len] = '\0';
6911  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6912  ENC_CODERANGE_CLEAR(str);
6913  }
6914  return str;
6915  }
6916  return Qnil;
6917 }
6918 
6919 
6920 /*
6921  * call-seq:
6922  * str.chop -> new_str
6923  *
6924  * Returns a new <code>String</code> with the last character removed. If the
6925  * string ends with <code>\r\n</code>, both characters are removed. Applying
6926  * <code>chop</code> to an empty string returns an empty
6927  * string. <code>String#chomp</code> is often a safer alternative, as it leaves
6928  * the string unchanged if it doesn't end in a record separator.
6929  *
6930  * "string\r\n".chop #=> "string"
6931  * "string\n\r".chop #=> "string\n"
6932  * "string\n".chop #=> "string"
6933  * "string".chop #=> "strin"
6934  * "x".chop.chop #=> ""
6935  */
6936 
6937 static VALUE
6939 {
6940  return rb_str_subseq(str, 0, chopped_length(str));
6941 }
6942 
6943 
6944 /*
6945  * call-seq:
6946  * str.chomp!(separator=$/) -> str or nil
6947  *
6948  * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
6949  * returning <i>str</i>, or <code>nil</code> if no modifications were made.
6950  */
6951 
6952 static VALUE
6954 {
6955  rb_encoding *enc;
6956  VALUE rs;
6957  int newline;
6958  char *p, *pp, *e;
6959  long len, rslen;
6960 
6961  str_modify_keep_cr(str);
6962  len = RSTRING_LEN(str);
6963  if (len == 0) return Qnil;
6964  p = RSTRING_PTR(str);
6965  e = p + len;
6966  if (argc == 0) {
6967  rs = rb_rs;
6968  if (rs == rb_default_rs) {
6969  smart_chomp:
6970  enc = rb_enc_get(str);
6971  if (rb_enc_mbminlen(enc) > 1) {
6972  pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
6973  if (rb_enc_is_newline(pp, e, enc)) {
6974  e = pp;
6975  }
6976  pp = e - rb_enc_mbminlen(enc);
6977  if (pp >= p) {
6978  pp = rb_enc_left_char_head(p, pp, e, enc);
6979  if (rb_enc_ascget(pp, e, 0, enc) == '\r') {
6980  e = pp;
6981  }
6982  }
6983  if (e == RSTRING_END(str)) {
6984  return Qnil;
6985  }
6986  len = e - RSTRING_PTR(str);
6987  STR_SET_LEN(str, len);
6988  }
6989  else {
6990  if (RSTRING_PTR(str)[len-1] == '\n') {
6991  STR_DEC_LEN(str);
6992  if (RSTRING_LEN(str) > 0 &&
6993  RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
6994  STR_DEC_LEN(str);
6995  }
6996  }
6997  else if (RSTRING_PTR(str)[len-1] == '\r') {
6998  STR_DEC_LEN(str);
6999  }
7000  else {
7001  return Qnil;
7002  }
7003  }
7004  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
7005  return str;
7006  }
7007  }
7008  else {
7009  rb_scan_args(argc, argv, "01", &rs);
7010  }
7011  if (NIL_P(rs)) return Qnil;
7012  StringValue(rs);
7013  rslen = RSTRING_LEN(rs);
7014  if (rslen == 0) {
7015  while (len>0 && p[len-1] == '\n') {
7016  len--;
7017  if (len>0 && p[len-1] == '\r')
7018  len--;
7019  }
7020  if (len < RSTRING_LEN(str)) {
7021  STR_SET_LEN(str, len);
7022  RSTRING_PTR(str)[len] = '\0';
7023  return str;
7024  }
7025  return Qnil;
7026  }
7027  if (rslen > len) return Qnil;
7028  newline = RSTRING_PTR(rs)[rslen-1];
7029  if (rslen == 1 && newline == '\n')
7030  goto smart_chomp;
7031 
7032  enc = rb_enc_check(str, rs);
7033  if (is_broken_string(rs)) {
7034  return Qnil;
7035  }
7036  pp = e - rslen;
7037  if (p[len-1] == newline &&
7038  (rslen <= 1 ||
7039  memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
7040  if (rb_enc_left_char_head(p, pp, e, enc) != pp)
7041  return Qnil;
7042  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
7043  ENC_CODERANGE_CLEAR(str);
7044  }
7045  STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
7046  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
7047  return str;
7048  }
7049  return Qnil;
7050 }
7051 
7052 
7053 /*
7054  * call-seq:
7055  * str.chomp(separator=$/) -> new_str
7056  *
7057  * Returns a new <code>String</code> with the given record separator removed
7058  * from the end of <i>str</i> (if present). If <code>$/</code> has not been
7059  * changed from the default Ruby record separator, then <code>chomp</code> also
7060  * removes carriage return characters (that is it will remove <code>\n</code>,
7061  * <code>\r</code>, and <code>\r\n</code>).
7062  *
7063  * "hello".chomp #=> "hello"
7064  * "hello\n".chomp #=> "hello"
7065  * "hello\r\n".chomp #=> "hello"
7066  * "hello\n\r".chomp #=> "hello\n"
7067  * "hello\r".chomp #=> "hello"
7068  * "hello \n there".chomp #=> "hello \n there"
7069  * "hello".chomp("llo") #=> "he"
7070  */
7071 
7072 static VALUE
7074 {
7075  str = rb_str_dup(str);
7076  rb_str_chomp_bang(argc, argv, str);
7077  return str;
7078 }
7079 
7080 /*
7081  * call-seq:
7082  * str.lstrip! -> self or nil
7083  *
7084  * Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
7085  * change was made. See also <code>String#rstrip!</code> and
7086  * <code>String#strip!</code>.
7087  *
7088  * " hello ".lstrip #=> "hello "
7089  * "hello".lstrip! #=> nil
7090  */
7091 
7092 static VALUE
7094 {
7095  rb_encoding *enc;
7096  char *s, *t, *e;
7097 
7098  str_modify_keep_cr(str);
7099  enc = STR_ENC_GET(str);
7100  s = RSTRING_PTR(str);
7101  if (!s || RSTRING_LEN(str) == 0) return Qnil;
7102  e = t = RSTRING_END(str);
7103  /* remove spaces at head */
7104  while (s < e) {
7105  int n;
7106  unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
7107 
7108  if (!rb_isspace(cc)) break;
7109  s += n;
7110  }
7111 
7112  if (s > RSTRING_PTR(str)) {
7113  STR_SET_LEN(str, t-s);
7114  memmove(RSTRING_PTR(str), s, RSTRING_LEN(str));
7115  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
7116  return str;
7117  }
7118  return Qnil;
7119 }
7120 
7121 
7122 /*
7123  * call-seq:
7124  * str.lstrip -> new_str
7125  *
7126  * Returns a copy of <i>str</i> with leading whitespace removed. See also
7127  * <code>String#rstrip</code> and <code>String#strip</code>.
7128  *
7129  * " hello ".lstrip #=> "hello "
7130  * "hello".lstrip #=> "hello"
7131  */
7132 
7133 static VALUE
7135 {
7136  str = rb_str_dup(str);
7137  rb_str_lstrip_bang(str);
7138  return str;
7139 }
7140 
7141 
7142 /*
7143  * call-seq:
7144  * str.rstrip! -> self or nil
7145  *
7146  * Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
7147  * no change was made. See also <code>String#lstrip!</code> and
7148  * <code>String#strip!</code>.
7149  *
7150  * " hello ".rstrip #=> " hello"
7151  * "hello".rstrip! #=> nil
7152  */
7153 
7154 static VALUE
7156 {
7157  rb_encoding *enc;
7158  char *s, *t, *e;
7159 
7160  str_modify_keep_cr(str);
7161  enc = STR_ENC_GET(str);
7163  s = RSTRING_PTR(str);
7164  if (!s || RSTRING_LEN(str) == 0) return Qnil;
7165  t = e = RSTRING_END(str);
7166 
7167  /* remove trailing spaces or '\0's */
7168  if (single_byte_optimizable(str)) {
7169  unsigned char c;
7170  while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--;
7171  }
7172  else {
7173  char *tp;
7174 
7175  while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
7176  unsigned int c = rb_enc_codepoint(tp, e, enc);
7177  if (c && !rb_isspace(c)) break;
7178  t = tp;
7179  }
7180  }
7181  if (t < e) {
7182  long len = t-RSTRING_PTR(str);
7183 
7184  STR_SET_LEN(str, len);
7185  RSTRING_PTR(str)[len] = '\0';
7186  return str;
7187  }
7188  return Qnil;
7189 }
7190 
7191 
7192 /*
7193  * call-seq:
7194  * str.rstrip -> new_str
7195  *
7196  * Returns a copy of <i>str</i> with trailing whitespace removed. See also
7197  * <code>String#lstrip</code> and <code>String#strip</code>.
7198  *
7199  * " hello ".rstrip #=> " hello"
7200  * "hello".rstrip #=> "hello"
7201  */
7202 
7203 static VALUE
7205 {
7206  str = rb_str_dup(str);
7207  rb_str_rstrip_bang(str);
7208  return str;
7209 }
7210 
7211 
7212 /*
7213  * call-seq:
7214  * str.strip! -> str or nil
7215  *
7216  * Removes leading and trailing whitespace from <i>str</i>. Returns
7217  * <code>nil</code> if <i>str</i> was not altered.
7218  */
7219 
7220 static VALUE
7222 {
7223  VALUE l = rb_str_lstrip_bang(str);
7224  VALUE r = rb_str_rstrip_bang(str);
7225 
7226  if (NIL_P(l) && NIL_P(r)) return Qnil;
7227  return str;
7228 }
7229 
7230 
7231 /*
7232  * call-seq:
7233  * str.strip -> new_str
7234  *
7235  * Returns a copy of <i>str</i> with leading and trailing whitespace removed.
7236  *
7237  * " hello ".strip #=> "hello"
7238  * "\tgoodbye\r\n".strip #=> "goodbye"
7239  */
7240 
7241 static VALUE
7243 {
7244  str = rb_str_dup(str);
7245  rb_str_strip_bang(str);
7246  return str;
7247 }
7248 
7249 static VALUE
7250 scan_once(VALUE str, VALUE pat, long *start)
7251 {
7252  VALUE result, match;
7253  struct re_registers *regs;
7254  int i;
7255 
7256  if (rb_reg_search(pat, str, *start, 0) >= 0) {
7257  match = rb_backref_get();
7258  regs = RMATCH_REGS(match);
7259  if (BEG(0) == END(0)) {
7260  rb_encoding *enc = STR_ENC_GET(str);
7261  /*
7262  * Always consume at least one character of the input string
7263  */
7264  if (RSTRING_LEN(str) > END(0))
7265  *start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0),
7266  RSTRING_END(str), enc);
7267  else
7268  *start = END(0)+1;
7269  }
7270  else {
7271  *start = END(0);
7272  }
7273  if (regs->num_regs == 1) {
7274  return rb_reg_nth_match(0, match);
7275  }
7276  result = rb_ary_new2(regs->num_regs);
7277  for (i=1; i < regs->num_regs; i++) {
7278  rb_ary_push(result, rb_reg_nth_match(i, match));
7279  }
7280 
7281  return result;
7282  }
7283  return Qnil;
7284 }
7285 
7286 
7287 /*
7288  * call-seq:
7289  * str.scan(pattern) -> array
7290  * str.scan(pattern) {|match, ...| block } -> str
7291  *
7292  * Both forms iterate through <i>str</i>, matching the pattern (which may be a
7293  * <code>Regexp</code> or a <code>String</code>). For each match, a result is
7294  * generated and either added to the result array or passed to the block. If
7295  * the pattern contains no groups, each individual result consists of the
7296  * matched string, <code>$&</code>. If the pattern contains groups, each
7297  * individual result is itself an array containing one entry per group.
7298  *
7299  * a = "cruel world"
7300  * a.scan(/\w+/) #=> ["cruel", "world"]
7301  * a.scan(/.../) #=> ["cru", "el ", "wor"]
7302  * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]]
7303  * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]]
7304  *
7305  * And the block form:
7306  *
7307  * a.scan(/\w+/) {|w| print "<<#{w}>> " }
7308  * print "\n"
7309  * a.scan(/(.)(.)/) {|x,y| print y, x }
7310  * print "\n"
7311  *
7312  * <em>produces:</em>
7313  *
7314  * <<cruel>> <<world>>
7315  * rceu lowlr
7316  */
7317 
7318 static VALUE
7320 {
7321  VALUE result;
7322  long start = 0;
7323  long last = -1, prev = 0;
7324  char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
7325 
7326  pat = get_pat(pat, 1);
7327  if (!rb_block_given_p()) {
7328  VALUE ary = rb_ary_new();
7329 
7330  while (!NIL_P(result = scan_once(str, pat, &start))) {
7331  last = prev;
7332  prev = start;
7333  rb_ary_push(ary, result);
7334  }
7335  if (last >= 0) rb_reg_search(pat, str, last, 0);
7336  return ary;
7337  }
7338 
7339  while (!NIL_P(result = scan_once(str, pat, &start))) {
7340  last = prev;
7341  prev = start;
7342  rb_yield(result);
7343  str_mod_check(str, p, len);
7344  }
7345  if (last >= 0) rb_reg_search(pat, str, last, 0);
7346  return str;
7347 }
7348 
7349 
7350 /*
7351  * call-seq:
7352  * str.hex -> integer
7353  *
7354  * Treats leading characters from <i>str</i> as a string of hexadecimal digits
7355  * (with an optional sign and an optional <code>0x</code>) and returns the
7356  * corresponding number. Zero is returned on error.
7357  *
7358  * "0x0a".hex #=> 10
7359  * "-1234".hex #=> -4660
7360  * "0".hex #=> 0
7361  * "wombat".hex #=> 0
7362  */
7363 
7364 static VALUE
7366 {
7367  return rb_str_to_inum(str, 16, FALSE);
7368 }
7369 
7370 
7371 /*
7372  * call-seq:
7373  * str.oct -> integer
7374  *
7375  * Treats leading characters of <i>str</i> as a string of octal digits (with an
7376  * optional sign) and returns the corresponding number. Returns 0 if the
7377  * conversion fails.
7378  *
7379  * "123".oct #=> 83
7380  * "-377".oct #=> -255
7381  * "bad".oct #=> 0
7382  * "0377bad".oct #=> 255
7383  */
7384 
7385 static VALUE
7387 {
7388  return rb_str_to_inum(str, -8, FALSE);
7389 }
7390 
7391 
7392 /*
7393  * call-seq:
7394  * str.crypt(salt_str) -> new_str
7395  *
7396  * Applies a one-way cryptographic hash to <i>str</i> by invoking the
7397  * standard library function <code>crypt(3)</code> with the given
7398  * salt string. While the format and the result are system and
7399  * implementation dependent, using a salt matching the regular
7400  * expression <code>\A[a-zA-Z0-9./]{2}</code> should be valid and
7401  * safe on any platform, in which only the first two characters are
7402  * significant.
7403  *
7404  * This method is for use in system specific scripts, so if you want
7405  * a cross-platform hash function consider using Digest or OpenSSL
7406  * instead.
7407  */
7408 
7409 static VALUE
7411 {
7412  extern char *crypt(const char *, const char *);
7413  VALUE result;
7414  const char *s, *saltp;
7415  char *res;
7416 #ifdef BROKEN_CRYPT
7417  char salt_8bit_clean[3];
7418 #endif
7419 
7420  StringValue(salt);
7421  if (RSTRING_LEN(salt) < 2)
7422  rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
7423 
7424  s = RSTRING_PTR(str);
7425  if (!s) s = "";
7426  saltp = RSTRING_PTR(salt);
7427 #ifdef BROKEN_CRYPT
7428  if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
7429  salt_8bit_clean[0] = saltp[0] & 0x7f;
7430  salt_8bit_clean[1] = saltp[1] & 0x7f;
7431  salt_8bit_clean[2] = '\0';
7432  saltp = salt_8bit_clean;
7433  }
7434 #endif
7435  res = crypt(s, saltp);
7436  if (!res) {
7437  rb_sys_fail("crypt");
7438  }
7439  result = rb_str_new2(res);
7440  OBJ_INFECT(result, str);
7441  OBJ_INFECT(result, salt);
7442  return result;
7443 }
7444 
7445 
7446 /*
7447  * call-seq:
7448  * str.intern -> symbol
7449  * str.to_sym -> symbol
7450  *
7451  * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
7452  * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
7453  *
7454  * "Koala".intern #=> :Koala
7455  * s = 'cat'.to_sym #=> :cat
7456  * s == :cat #=> true
7457  * s = '@cat'.to_sym #=> :@cat
7458  * s == :@cat #=> true
7459  *
7460  * This can also be used to create symbols that cannot be represented using the
7461  * <code>:xxx</code> notation.
7462  *
7463  * 'cat and dog'.to_sym #=> :"cat and dog"
7464  */
7465 
7466 VALUE
7468 {
7469  VALUE str = RB_GC_GUARD(s);
7470  ID id;
7471 
7472  id = rb_intern_str(str);
7473  return ID2SYM(id);
7474 }
7475 
7476 
7477 /*
7478  * call-seq:
7479  * str.ord -> integer
7480  *
7481  * Return the <code>Integer</code> ordinal of a one-character string.
7482  *
7483  * "a".ord #=> 97
7484  */
7485 
7486 VALUE
7488 {
7489  unsigned int c;
7490 
7492  return UINT2NUM(c);
7493 }
7494 /*
7495  * call-seq:
7496  * str.sum(n=16) -> integer
7497  *
7498  * Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
7499  * where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting
7500  * to 16. The result is simply the sum of the binary value of each character in
7501  * <i>str</i> modulo <code>2**n - 1</code>. This is not a particularly good
7502  * checksum.
7503  */
7504 
7505 static VALUE
7507 {
7508  VALUE vbits;
7509  int bits;
7510  char *ptr, *p, *pend;
7511  long len;
7512  VALUE sum = INT2FIX(0);
7513  unsigned long sum0 = 0;
7514 
7515  if (argc == 0) {
7516  bits = 16;
7517  }
7518  else {
7519  rb_scan_args(argc, argv, "01", &vbits);
7520  bits = NUM2INT(vbits);
7521  }
7522  ptr = p = RSTRING_PTR(str);
7523  len = RSTRING_LEN(str);
7524  pend = p + len;
7525 
7526  while (p < pend) {
7527  if (FIXNUM_MAX - UCHAR_MAX < sum0) {
7528  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7529  str_mod_check(str, ptr, len);
7530  sum0 = 0;
7531  }
7532  sum0 += (unsigned char)*p;
7533  p++;
7534  }
7535 
7536  if (bits == 0) {
7537  if (sum0) {
7538  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7539  }
7540  }
7541  else {
7542  if (sum == INT2FIX(0)) {
7543  if (bits < (int)sizeof(long)*CHAR_BIT) {
7544  sum0 &= (((unsigned long)1)<<bits)-1;
7545  }
7546  sum = LONG2FIX(sum0);
7547  }
7548  else {
7549  VALUE mod;
7550 
7551  if (sum0) {
7552  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7553  }
7554 
7555  mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
7556  mod = rb_funcall(mod, '-', 1, INT2FIX(1));
7557  sum = rb_funcall(sum, '&', 1, mod);
7558  }
7559  }
7560  return sum;
7561 }
7562 
7563 static VALUE
7564 rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
7565 {
7566  rb_encoding *enc;
7567  VALUE w;
7568  long width, len, flen = 1, fclen = 1;
7569  VALUE res;
7570  char *p;
7571  const char *f = " ";
7572  long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
7573  volatile VALUE pad;
7574  int singlebyte = 1, cr;
7575 
7576  rb_scan_args(argc, argv, "11", &w, &pad);
7577  enc = STR_ENC_GET(str);
7578  width = NUM2LONG(w);
7579  if (argc == 2) {
7580  StringValue(pad);
7581  enc = rb_enc_check(str, pad);
7582  f = RSTRING_PTR(pad);
7583  flen = RSTRING_LEN(pad);
7584  fclen = str_strlen(pad, enc);
7585  singlebyte = single_byte_optimizable(pad);
7586  if (flen == 0 || fclen == 0) {
7587  rb_raise(rb_eArgError, "zero width padding");
7588  }
7589  }
7590  len = str_strlen(str, enc);
7591  if (width < 0 || len >= width) return rb_str_dup(str);
7592  n = width - len;
7593  llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n/2);
7594  rlen = n - llen;
7595  cr = ENC_CODERANGE(str);
7596  if (flen > 1) {
7597  llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
7598  rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
7599  }
7600  size = RSTRING_LEN(str);
7601  if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
7602  (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
7603  (len += llen2 + rlen2) >= LONG_MAX - size) {
7604  rb_raise(rb_eArgError, "argument too big");
7605  }
7606  len += size;
7607  res = rb_str_new5(str, 0, len);
7608  p = RSTRING_PTR(res);
7609  if (flen <= 1) {
7610  memset(p, *f, llen);
7611  p += llen;
7612  }
7613  else {
7614  while (llen >= fclen) {
7615  memcpy(p,f,flen);
7616  p += flen;
7617  llen -= fclen;
7618  }
7619  if (llen > 0) {
7620  memcpy(p, f, llen2);
7621  p += llen2;
7622  }
7623  }
7624  memcpy(p, RSTRING_PTR(str), size);
7625  p += size;
7626  if (flen <= 1) {
7627  memset(p, *f, rlen);
7628  p += rlen;
7629  }
7630  else {
7631  while (rlen >= fclen) {
7632  memcpy(p,f,flen);
7633  p += flen;
7634  rlen -= fclen;
7635  }
7636  if (rlen > 0) {
7637  memcpy(p, f, rlen2);
7638  p += rlen2;
7639  }
7640  }
7641  *p = '\0';
7642  STR_SET_LEN(res, p-RSTRING_PTR(res));
7643  OBJ_INFECT(res, str);
7644  if (!NIL_P(pad)) OBJ_INFECT(res, pad);
7645  rb_enc_associate(res, enc);
7646  if (argc == 2)
7647  cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad));
7648  if (cr != ENC_CODERANGE_BROKEN)
7649  ENC_CODERANGE_SET(res, cr);
7650  return res;
7651 }
7652 
7653 
7654 /*
7655  * call-seq:
7656  * str.ljust(integer, padstr=' ') -> new_str
7657  *
7658  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7659  * <code>String</code> of length <i>integer</i> with <i>str</i> left justified
7660  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7661  *
7662  * "hello".ljust(4) #=> "hello"
7663  * "hello".ljust(20) #=> "hello "
7664  * "hello".ljust(20, '1234') #=> "hello123412341234123"
7665  */
7666 
7667 static VALUE
7669 {
7670  return rb_str_justify(argc, argv, str, 'l');
7671 }
7672 
7673 
7674 /*
7675  * call-seq:
7676  * str.rjust(integer, padstr=' ') -> new_str
7677  *
7678  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7679  * <code>String</code> of length <i>integer</i> with <i>str</i> right justified
7680  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7681  *
7682  * "hello".rjust(4) #=> "hello"
7683  * "hello".rjust(20) #=> " hello"
7684  * "hello".rjust(20, '1234') #=> "123412341234123hello"
7685  */
7686 
7687 static VALUE
7689 {
7690  return rb_str_justify(argc, argv, str, 'r');
7691 }
7692 
7693 
7694 /*
7695  * call-seq:
7696  * str.center(width, padstr=' ') -> new_str
7697  *
7698  * Centers +str+ in +width+. If +width+ is greater than the length of +str+,
7699  * returns a new String of length +width+ with +str+ centered and padded with
7700  * +padstr+; otherwise, returns +str+.
7701  *
7702  * "hello".center(4) #=> "hello"
7703  * "hello".center(20) #=> " hello "
7704  * "hello".center(20, '123') #=> "1231231hello12312312"
7705  */
7706 
7707 static VALUE
7709 {
7710  return rb_str_justify(argc, argv, str, 'c');
7711 }
7712 
7713 /*
7714  * call-seq:
7715  * str.partition(sep) -> [head, sep, tail]
7716  * str.partition(regexp) -> [head, match, tail]
7717  *
7718  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string
7719  * and returns the part before it, the match, and the part
7720  * after it.
7721  * If it is not found, returns two empty strings and <i>str</i>.
7722  *
7723  * "hello".partition("l") #=> ["he", "l", "lo"]
7724  * "hello".partition("x") #=> ["hello", "", ""]
7725  * "hello".partition(/.l/) #=> ["h", "el", "lo"]
7726  */
7727 
7728 static VALUE
7730 {
7731  long pos;
7732  int regex = FALSE;
7733 
7734  if (RB_TYPE_P(sep, T_REGEXP)) {
7735  pos = rb_reg_search(sep, str, 0, 0);
7736  regex = TRUE;
7737  }
7738  else {
7739  VALUE tmp;
7740 
7741  tmp = rb_check_string_type(sep);
7742  if (NIL_P(tmp)) {
7743  rb_raise(rb_eTypeError, "type mismatch: %s given",
7744  rb_obj_classname(sep));
7745  }
7746  sep = tmp;
7747  pos = rb_str_index(str, sep, 0);
7748  }
7749  if (pos < 0) {
7750  failed:
7751  return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
7752  }
7753  if (regex) {
7754  sep = rb_str_subpat(str, sep, INT2FIX(0));
7755  if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
7756  }
7757  return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
7758  sep,
7759  rb_str_subseq(str, pos+RSTRING_LEN(sep),
7760  RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
7761 }
7762 
7763 /*
7764  * call-seq:
7765  * str.rpartition(sep) -> [head, sep, tail]
7766  * str.rpartition(regexp) -> [head, match, tail]
7767  *
7768  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string from the end
7769  * of the string, and returns the part before it, the match, and the part
7770  * after it.
7771  * If it is not found, returns two empty strings and <i>str</i>.
7772  *
7773  * "hello".rpartition("l") #=> ["hel", "l", "o"]
7774  * "hello".rpartition("x") #=> ["", "", "hello"]
7775  * "hello".rpartition(/.l/) #=> ["he", "ll", "o"]
7776  */
7777 
7778 static VALUE
7780 {
7781  long pos = RSTRING_LEN(str);
7782  int regex = FALSE;
7783 
7784  if (RB_TYPE_P(sep, T_REGEXP)) {
7785  pos = rb_reg_search(sep, str, pos, 1);
7786  regex = TRUE;
7787  }
7788  else {
7789  VALUE tmp;
7790 
7791  tmp = rb_check_string_type(sep);
7792  if (NIL_P(tmp)) {
7793  rb_raise(rb_eTypeError, "type mismatch: %s given",
7794  rb_obj_classname(sep));
7795  }
7796  sep = tmp;
7797  pos = rb_str_sublen(str, pos);
7798  pos = rb_str_rindex(str, sep, pos);
7799  }
7800  if (pos < 0) {
7801  return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), str);
7802  }
7803  if (regex) {
7804  sep = rb_reg_nth_match(0, rb_backref_get());
7805  }
7806  else {
7807  pos = rb_str_offset(str, pos);
7808  }
7809  return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
7810  sep,
7811  rb_str_subseq(str, pos+RSTRING_LEN(sep),
7812  RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
7813 }
7814 
7815 /*
7816  * call-seq:
7817  * str.start_with?([prefixes]+) -> true or false
7818  *
7819  * Returns true if +str+ starts with one of the +prefixes+ given.
7820  *
7821  * "hello".start_with?("hell") #=> true
7822  *
7823  * # returns true if one of the prefixes matches.
7824  * "hello".start_with?("heaven", "hell") #=> true
7825  * "hello".start_with?("heaven", "paradise") #=> false
7826  */
7827 
7828 static VALUE
7830 {
7831  int i;
7832 
7833  for (i=0; i<argc; i++) {
7834  VALUE tmp = argv[i];
7835  StringValue(tmp);
7836  rb_enc_check(str, tmp);
7837  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7838  if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7839  return Qtrue;
7840  }
7841  return Qfalse;
7842 }
7843 
7844 /*
7845  * call-seq:
7846  * str.end_with?([suffixes]+) -> true or false
7847  *
7848  * Returns true if +str+ ends with one of the +suffixes+ given.
7849  */
7850 
7851 static VALUE
7853 {
7854  int i;
7855  char *p, *s, *e;
7856  rb_encoding *enc;
7857 
7858  for (i=0; i<argc; i++) {
7859  VALUE tmp = argv[i];
7860  StringValue(tmp);
7861  enc = rb_enc_check(str, tmp);
7862  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7863  p = RSTRING_PTR(str);
7864  e = p + RSTRING_LEN(str);
7865  s = e - RSTRING_LEN(tmp);
7866  if (rb_enc_left_char_head(p, s, e, enc) != s)
7867  continue;
7868  if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7869  return Qtrue;
7870  }
7871  return Qfalse;
7872 }
7873 
7874 void
7876 {
7877  if (!NIL_P(val) && !RB_TYPE_P(val, T_STRING)) {
7878  rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
7879  }
7880  *var = val;
7881 }
7882 
7883 
7884 /*
7885  * call-seq:
7886  * str.force_encoding(encoding) -> str
7887  *
7888  * Changes the encoding to +encoding+ and returns self.
7889  */
7890 
7891 static VALUE
7893 {
7894  str_modifiable(str);
7895  rb_enc_associate(str, rb_to_encoding(enc));
7896  ENC_CODERANGE_CLEAR(str);
7897  return str;
7898 }
7899 
7900 /*
7901  * call-seq:
7902  * str.b -> str
7903  *
7904  * Returns a copied string whose encoding is ASCII-8BIT.
7905  */
7906 
7907 static VALUE
7909 {
7910  VALUE str2 = str_alloc(rb_cString);
7911  str_replace_shared_without_enc(str2, str);
7912  OBJ_INFECT(str2, str);
7913  ENC_CODERANGE_CLEAR(str2);
7914  return str2;
7915 }
7916 
7917 /*
7918  * call-seq:
7919  * str.valid_encoding? -> true or false
7920  *
7921  * Returns true for a string which encoded correctly.
7922  *
7923  * "\xc2\xa1".force_encoding("UTF-8").valid_encoding? #=> true
7924  * "\xc2".force_encoding("UTF-8").valid_encoding? #=> false
7925  * "\x80".force_encoding("UTF-8").valid_encoding? #=> false
7926  */
7927 
7928 static VALUE
7930 {
7931  int cr = rb_enc_str_coderange(str);
7932 
7933  return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
7934 }
7935 
7936 /*
7937  * call-seq:
7938  * str.ascii_only? -> true or false
7939  *
7940  * Returns true for a string which has only ASCII characters.
7941  *
7942  * "abc".force_encoding("UTF-8").ascii_only? #=> true
7943  * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false
7944  */
7945 
7946 static VALUE
7948 {
7949  int cr = rb_enc_str_coderange(str);
7950 
7951  return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse;
7952 }
7953 
7968 VALUE
7969 rb_str_ellipsize(VALUE str, long len)
7970 {
7971  static const char ellipsis[] = "...";
7972  const long ellipsislen = sizeof(ellipsis) - 1;
7973  rb_encoding *const enc = rb_enc_get(str);
7974  const long blen = RSTRING_LEN(str);
7975  const char *const p = RSTRING_PTR(str), *e = p + blen;
7976  VALUE estr, ret = 0;
7977 
7978  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
7979  if (len * rb_enc_mbminlen(enc) >= blen ||
7980  (e = rb_enc_nth(p, e, len, enc)) - p == blen) {
7981  ret = str;
7982  }
7983  else if (len <= ellipsislen ||
7984  !(e = rb_enc_step_back(p, e, e, len = ellipsislen, enc))) {
7985  if (rb_enc_asciicompat(enc)) {
7986  ret = rb_str_new_with_class(str, ellipsis, len);
7987  rb_enc_associate(ret, enc);
7988  }
7989  else {
7990  estr = rb_usascii_str_new(ellipsis, len);
7991  ret = rb_str_encode(estr, rb_enc_from_encoding(enc), 0, Qnil);
7992  }
7993  }
7994  else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
7995  rb_str_cat(ret, ellipsis, ellipsislen);
7996  }
7997  else {
7998  estr = rb_str_encode(rb_usascii_str_new(ellipsis, ellipsislen),
7999  rb_enc_from_encoding(enc), 0, Qnil);
8000  rb_str_append(ret, estr);
8001  }
8002  return ret;
8003 }
8004 
8005 static VALUE
8007 {
8008  int cr;
8009  str = StringValue(str);
8010  cr = rb_enc_str_coderange(str);
8011  if (cr == ENC_CODERANGE_BROKEN) {
8012  rb_raise(rb_eArgError, "replacement must be valid byte sequence '%+"PRIsVALUE"'", str);
8013  }
8014  else if (cr == ENC_CODERANGE_7BIT) {
8015  rb_encoding *e = STR_ENC_GET(str);
8016  if (!rb_enc_asciicompat(enc)) {
8017  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
8018  rb_enc_name(enc), rb_enc_name(e));
8019  }
8020  }
8021  else { /* ENC_CODERANGE_VALID */
8022  rb_encoding *e = STR_ENC_GET(str);
8023  if (enc != e) {
8024  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
8025  rb_enc_name(enc), rb_enc_name(e));
8026  }
8027  }
8028  return str;
8029 }
8030 
8036 VALUE
8038 {
8039  int cr = ENC_CODERANGE(str);
8040  rb_encoding *enc;
8041  int encidx;
8042 
8043  if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID)
8044  return Qnil;
8045 
8046  enc = STR_ENC_GET(str);
8047  if (!NIL_P(repl)) {
8048  repl = str_compat_and_valid(repl, enc);
8049  }
8050 
8051  if (rb_enc_dummy_p(enc)) {
8052  return Qnil;
8053  }
8054  encidx = rb_enc_to_index(enc);
8055 
8056 #define DEFAULT_REPLACE_CHAR(str) do { \
8057  static const char replace[sizeof(str)-1] = str; \
8058  rep = replace; replen = (int)sizeof(replace); \
8059  } while (0)
8060 
8061  if (rb_enc_asciicompat(enc)) {
8062  const char *p = RSTRING_PTR(str);
8063  const char *e = RSTRING_END(str);
8064  const char *p1 = p;
8065  const char *rep;
8066  long replen;
8067  int rep7bit_p;
8068  VALUE buf = Qnil;
8069  if (rb_block_given_p()) {
8070  rep = NULL;
8071  replen = 0;
8072  rep7bit_p = FALSE;
8073  }
8074  else if (!NIL_P(repl)) {
8075  rep = RSTRING_PTR(repl);
8076  replen = RSTRING_LEN(repl);
8077  rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT);
8078  }
8079  else if (encidx == rb_utf8_encindex()) {
8080  DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD");
8081  rep7bit_p = FALSE;
8082  }
8083  else {
8084  DEFAULT_REPLACE_CHAR("?");
8085  rep7bit_p = TRUE;
8086  }
8087  cr = ENC_CODERANGE_7BIT;
8088 
8089  p = search_nonascii(p, e);
8090  if (!p) {
8091  p = e;
8092  }
8093  while (p < e) {
8094  int ret = rb_enc_precise_mbclen(p, e, enc);
8095  if (MBCLEN_NEEDMORE_P(ret)) {
8096  break;
8097  }
8098  else if (MBCLEN_CHARFOUND_P(ret)) {
8099  cr = ENC_CODERANGE_VALID;
8100  p += MBCLEN_CHARFOUND_LEN(ret);
8101  }
8102  else if (MBCLEN_INVALID_P(ret)) {
8103  /*
8104  * p1~p: valid ascii/multibyte chars
8105  * p ~e: invalid bytes + unknown bytes
8106  */
8107  long clen = rb_enc_mbmaxlen(enc);
8108  if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
8109  if (p > p1) {
8110  rb_str_buf_cat(buf, p1, p - p1);
8111  }
8112 
8113  if (e - p < clen) clen = e - p;
8114  if (clen <= 2) {
8115  clen = 1;
8116  }
8117  else {
8118  const char *q = p;
8119  clen--;
8120  for (; clen > 1; clen--) {
8121  ret = rb_enc_precise_mbclen(q, q + clen, enc);
8122  if (MBCLEN_NEEDMORE_P(ret)) break;
8123  if (MBCLEN_INVALID_P(ret)) continue;
8124  UNREACHABLE;
8125  }
8126  }
8127  if (rep) {
8128  rb_str_buf_cat(buf, rep, replen);
8129  if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
8130  }
8131  else {
8132  repl = rb_yield(rb_enc_str_new(p, clen, enc));
8133  repl = str_compat_and_valid(repl, enc);
8134  rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
8135  if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
8136  cr = ENC_CODERANGE_VALID;
8137  }
8138  p += clen;
8139  p1 = p;
8140  p = search_nonascii(p, e);
8141  if (!p) {
8142  p = e;
8143  break;
8144  }
8145  }
8146  else {
8147  UNREACHABLE;
8148  }
8149  }
8150  if (NIL_P(buf)) {
8151  if (p == e) {
8152  ENC_CODERANGE_SET(str, cr);
8153  return Qnil;
8154  }
8155  buf = rb_str_buf_new(RSTRING_LEN(str));
8156  }
8157  if (p1 < p) {
8158  rb_str_buf_cat(buf, p1, p - p1);
8159  }
8160  if (p < e) {
8161  if (rep) {
8162  rb_str_buf_cat(buf, rep, replen);
8163  if (!rep7bit_p) cr = ENC_CODERANGE_VALID;
8164  }
8165  else {
8166  repl = rb_yield(rb_enc_str_new(p, e-p, enc));
8167  repl = str_compat_and_valid(repl, enc);
8168  rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
8169  if (ENC_CODERANGE(repl) == ENC_CODERANGE_VALID)
8170  cr = ENC_CODERANGE_VALID;
8171  }
8172  }
8173  ENCODING_CODERANGE_SET(buf, rb_enc_to_index(enc), cr);
8174  return buf;
8175  }
8176  else {
8177  /* ASCII incompatible */
8178  const char *p = RSTRING_PTR(str);
8179  const char *e = RSTRING_END(str);
8180  const char *p1 = p;
8181  VALUE buf = Qnil;
8182  const char *rep;
8183  long replen;
8184  long mbminlen = rb_enc_mbminlen(enc);
8185  if (!NIL_P(repl)) {
8186  rep = RSTRING_PTR(repl);
8187  replen = RSTRING_LEN(repl);
8188  }
8189  else if (encidx == ENCINDEX_UTF_16BE) {
8190  DEFAULT_REPLACE_CHAR("\xFF\xFD");
8191  }
8192  else if (encidx == ENCINDEX_UTF_16LE) {
8193  DEFAULT_REPLACE_CHAR("\xFD\xFF");
8194  }
8195  else if (encidx == ENCINDEX_UTF_32BE) {
8196  DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD");
8197  }
8198  else if (encidx == ENCINDEX_UTF_32LE) {
8199  DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00");
8200  }
8201  else {
8202  DEFAULT_REPLACE_CHAR("?");
8203  }
8204 
8205  while (p < e) {
8206  int ret = rb_enc_precise_mbclen(p, e, enc);
8207  if (MBCLEN_NEEDMORE_P(ret)) {
8208  break;
8209  }
8210  else if (MBCLEN_CHARFOUND_P(ret)) {
8211  p += MBCLEN_CHARFOUND_LEN(ret);
8212  }
8213  else if (MBCLEN_INVALID_P(ret)) {
8214  const char *q = p;
8215  long clen = rb_enc_mbmaxlen(enc);
8216  if (NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
8217  if (p > p1) rb_str_buf_cat(buf, p1, p - p1);
8218 
8219  if (e - p < clen) clen = e - p;
8220  if (clen <= mbminlen * 2) {
8221  clen = mbminlen;
8222  }
8223  else {
8224  clen -= mbminlen;
8225  for (; clen > mbminlen; clen-=mbminlen) {
8226  ret = rb_enc_precise_mbclen(q, q + clen, enc);
8227  if (MBCLEN_NEEDMORE_P(ret)) break;
8228  if (MBCLEN_INVALID_P(ret)) continue;
8229  UNREACHABLE;
8230  }
8231  }
8232  if (rep) {
8233  rb_str_buf_cat(buf, rep, replen);
8234  }
8235  else {
8236  repl = rb_yield(rb_enc_str_new(p, e-p, enc));
8237  repl = str_compat_and_valid(repl, enc);
8238  rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
8239  }
8240  p += clen;
8241  p1 = p;
8242  }
8243  else {
8244  UNREACHABLE;
8245  }
8246  }
8247  if (NIL_P(buf)) {
8248  if (p == e) {
8250  return Qnil;
8251  }
8252  buf = rb_str_buf_new(RSTRING_LEN(str));
8253  }
8254  if (p1 < p) {
8255  rb_str_buf_cat(buf, p1, p - p1);
8256  }
8257  if (p < e) {
8258  if (rep) {
8259  rb_str_buf_cat(buf, rep, replen);
8260  }
8261  else {
8262  repl = rb_yield(rb_enc_str_new(p, e-p, enc));
8263  repl = str_compat_and_valid(repl, enc);
8264  rb_str_buf_cat(buf, RSTRING_PTR(repl), RSTRING_LEN(repl));
8265  }
8266  }
8268  return buf;
8269  }
8270 }
8271 
8272 /*
8273  * call-seq:
8274  * str.scrub -> new_str
8275  * str.scrub(repl) -> new_str
8276  * str.scrub{|bytes|} -> new_str
8277  *
8278  * If the string is invalid byte sequence then replace invalid bytes with given replacement
8279  * character, else returns self.
8280  * If block is given, replace invalid bytes with returned value of the block.
8281  *
8282  * "abc\u3042\x81".scrub #=> "abc\u3042\uFFFD"
8283  * "abc\u3042\x81".scrub("*") #=> "abc\u3042*"
8284  * "abc\u3042\xE3\x80".scrub{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
8285  */
8286 static VALUE
8288 {
8289  VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
8290  VALUE new = rb_str_scrub(str, repl);
8291  return NIL_P(new) ? rb_str_dup(str): new;
8292 }
8293 
8294 /*
8295  * call-seq:
8296  * str.scrub! -> str
8297  * str.scrub!(repl) -> str
8298  * str.scrub!{|bytes|} -> str
8299  *
8300  * If the string is invalid byte sequence then replace invalid bytes with given replacement
8301  * character, else returns self.
8302  * If block is given, replace invalid bytes with returned value of the block.
8303  *
8304  * "abc\u3042\x81".scrub! #=> "abc\u3042\uFFFD"
8305  * "abc\u3042\x81".scrub!("*") #=> "abc\u3042*"
8306  * "abc\u3042\xE3\x80".scrub!{|bytes| '<'+bytes.unpack('H*')[0]+'>' } #=> "abc\u3042<e380>"
8307  */
8308 static VALUE
8310 {
8311  VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) : Qnil;
8312  VALUE new = rb_str_scrub(str, repl);
8313  if (!NIL_P(new)) rb_str_replace(str, new);
8314  return str;
8315 }
8316 
8317 /**********************************************************************
8318  * Document-class: Symbol
8319  *
8320  * <code>Symbol</code> objects represent names and some strings
8321  * inside the Ruby
8322  * interpreter. They are generated using the <code>:name</code> and
8323  * <code>:"string"</code> literals
8324  * syntax, and by the various <code>to_sym</code> methods. The same
8325  * <code>Symbol</code> object will be created for a given name or string
8326  * for the duration of a program's execution, regardless of the context
8327  * or meaning of that name. Thus if <code>Fred</code> is a constant in
8328  * one context, a method in another, and a class in a third, the
8329  * <code>Symbol</code> <code>:Fred</code> will be the same object in
8330  * all three contexts.
8331  *
8332  * module One
8333  * class Fred
8334  * end
8335  * $f1 = :Fred
8336  * end
8337  * module Two
8338  * Fred = 1
8339  * $f2 = :Fred
8340  * end
8341  * def Fred()
8342  * end
8343  * $f3 = :Fred
8344  * $f1.object_id #=> 2514190
8345  * $f2.object_id #=> 2514190
8346  * $f3.object_id #=> 2514190
8347  *
8348  */
8349 
8350 
8351 /*
8352  * call-seq:
8353  * sym == obj -> true or false
8354  *
8355  * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
8356  * symbol, returns <code>true</code>.
8357  */
8358 
8359 static VALUE
8360 sym_equal(VALUE sym1, VALUE sym2)
8361 {
8362  if (sym1 == sym2) return Qtrue;
8363  return Qfalse;
8364 }
8365 
8366 
8367 static int
8368 sym_printable(const char *s, const char *send, rb_encoding *enc)
8369 {
8370  while (s < send) {
8371  int n;
8372  int c = rb_enc_codepoint_len(s, send, &n, enc);
8373 
8374  if (!rb_enc_isprint(c, enc)) return FALSE;
8375  s += n;
8376  }
8377  return TRUE;
8378 }
8379 
8380 int
8382 {
8383  rb_encoding *enc;
8384  const char *ptr;
8385  long len;
8387 
8388  if (resenc == NULL) resenc = rb_default_external_encoding();
8389  enc = STR_ENC_GET(sym);
8390  ptr = RSTRING_PTR(sym);
8391  len = RSTRING_LEN(sym);
8392  if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (long)strlen(ptr) ||
8393  !rb_enc_symname_p(ptr, enc) || !sym_printable(ptr, ptr + len, enc)) {
8394  return FALSE;
8395  }
8396  return TRUE;
8397 }
8398 
8399 VALUE
8401 {
8402  rb_encoding *enc;
8403  const char *ptr;
8404  long len;
8405  rb_encoding *resenc;
8406 
8407  Check_Type(str, T_STRING);
8408  resenc = rb_default_internal_encoding();
8409  if (resenc == NULL) resenc = rb_default_external_encoding();
8410  enc = STR_ENC_GET(str);
8411  ptr = RSTRING_PTR(str);
8412  len = RSTRING_LEN(str);
8413  if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
8414  !sym_printable(ptr, ptr + len, enc)) {
8415  return rb_str_inspect(str);
8416  }
8417  return str;
8418 }
8419 
8420 VALUE
8422 {
8423  return rb_str_quote_unprintable(rb_id2str(id));
8424 }
8425 
8426 /*
8427  * call-seq:
8428  * sym.inspect -> string
8429  *
8430  * Returns the representation of <i>sym</i> as a symbol literal.
8431  *
8432  * :fred.inspect #=> ":fred"
8433  */
8434 
8435 static VALUE
8437 {
8438  VALUE str;
8439  const char *ptr;
8440  long len;
8441  ID id = SYM2ID(sym);
8442  char *dest;
8443 
8444  sym = rb_id2str(id);
8445  if (!rb_str_symname_p(sym)) {
8446  str = rb_str_inspect(sym);
8447  len = RSTRING_LEN(str);
8448  rb_str_resize(str, len + 1);
8449  dest = RSTRING_PTR(str);
8450  memmove(dest + 1, dest, len);
8451  dest[0] = ':';
8452  }
8453  else {
8454  rb_encoding *enc = STR_ENC_GET(sym);
8455  ptr = RSTRING_PTR(sym);
8456  len = RSTRING_LEN(sym);
8457  str = rb_enc_str_new(0, len + 1, enc);
8458  dest = RSTRING_PTR(str);
8459  dest[0] = ':';
8460  memcpy(dest + 1, ptr, len);
8461  }
8462  return str;
8463 }
8464 
8465 
8466 /*
8467  * call-seq:
8468  * sym.id2name -> string
8469  * sym.to_s -> string
8470  *
8471  * Returns the name or string corresponding to <i>sym</i>.
8472  *
8473  * :fred.id2name #=> "fred"
8474  */
8475 
8476 
8477 VALUE
8479 {
8480  ID id = SYM2ID(sym);
8481 
8482  return str_new3(rb_cString, rb_id2str(id));
8483 }
8484 
8485 
8486 /*
8487  * call-seq:
8488  * sym.to_sym -> sym
8489  * sym.intern -> sym
8490  *
8491  * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
8492  * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
8493  * in this case.
8494  */
8495 
8496 static VALUE
8498 {
8499  return sym;
8500 }
8501 
8502 static VALUE
8503 sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
8504 {
8505  VALUE obj;
8506 
8507  if (argc < 1) {
8508  rb_raise(rb_eArgError, "no receiver given");
8509  }
8510  obj = argv[0];
8511  return rb_funcall_with_block(obj, (ID)sym, argc - 1, argv + 1, passed_proc);
8512 }
8513 
8514 /*
8515  * call-seq:
8516  * sym.to_proc
8517  *
8518  * Returns a _Proc_ object which respond to the given method by _sym_.
8519  *
8520  * (1..3).collect(&:to_s) #=> ["1", "2", "3"]
8521  */
8522 
8523 static VALUE
8525 {
8526  static VALUE sym_proc_cache = Qfalse;
8527  enum {SYM_PROC_CACHE_SIZE = 67};
8528  VALUE proc;
8529  long id, index;
8530  VALUE *aryp;
8531 
8532  if (!sym_proc_cache) {
8533  sym_proc_cache = rb_ary_tmp_new(SYM_PROC_CACHE_SIZE * 2);
8534  rb_gc_register_mark_object(sym_proc_cache);
8535  rb_ary_store(sym_proc_cache, SYM_PROC_CACHE_SIZE*2 - 1, Qnil);
8536  }
8537 
8538  id = SYM2ID(sym);
8539  index = (id % SYM_PROC_CACHE_SIZE) << 1;
8540 
8541  aryp = RARRAY_PTR(sym_proc_cache);
8542  if (aryp[index] == sym) {
8543  return aryp[index + 1];
8544  }
8545  else {
8546  proc = rb_proc_new(sym_call, (VALUE)id);
8547  aryp[index] = sym;
8548  aryp[index + 1] = proc;
8549  return proc;
8550  }
8551 }
8552 
8553 /*
8554  * call-seq:
8555  *
8556  * sym.succ
8557  *
8558  * Same as <code>sym.to_s.succ.intern</code>.
8559  */
8560 
8561 static VALUE
8563 {
8564  return rb_str_intern(rb_str_succ(rb_sym_to_s(sym)));
8565 }
8566 
8567 /*
8568  * call-seq:
8569  *
8570  * symbol <=> other_symbol -> -1, 0, +1 or nil
8571  *
8572  * Compares +symbol+ with +other_symbol+ after calling #to_s on each of the
8573  * symbols. Returns -1, 0, +1 or nil depending on whether +symbol+ is less
8574  * than, equal to, or greater than +other_symbol+.
8575  *
8576  * +nil+ is returned if the two values are incomparable.
8577  *
8578  * See String#<=> for more information.
8579  */
8580 
8581 static VALUE
8583 {
8584  if (!SYMBOL_P(other)) {
8585  return Qnil;
8586  }
8587  return rb_str_cmp_m(rb_sym_to_s(sym), rb_sym_to_s(other));
8588 }
8589 
8590 /*
8591  * call-seq:
8592  *
8593  * sym.casecmp(other) -> -1, 0, +1 or nil
8594  *
8595  * Case-insensitive version of <code>Symbol#<=></code>.
8596  */
8597 
8598 static VALUE
8600 {
8601  if (!SYMBOL_P(other)) {
8602  return Qnil;
8603  }
8604  return rb_str_casecmp(rb_sym_to_s(sym), rb_sym_to_s(other));
8605 }
8606 
8607 /*
8608  * call-seq:
8609  * sym =~ obj -> fixnum or nil
8610  * sym.match(obj) -> fixnum or nil
8611  *
8612  * Returns <code>sym.to_s =~ obj</code>.
8613  */
8614 
8615 static VALUE
8617 {
8618  return rb_str_match(rb_sym_to_s(sym), other);
8619 }
8620 
8621 /*
8622  * call-seq:
8623  * sym[idx] -> char
8624  * sym[b, n] -> string
8625  * sym.slice(idx) -> char
8626  * sym.slice(b, n) -> string
8627  *
8628  * Returns <code>sym.to_s[]</code>.
8629  */
8630 
8631 static VALUE
8633 {
8634  return rb_str_aref_m(argc, argv, rb_sym_to_s(sym));
8635 }
8636 
8637 /*
8638  * call-seq:
8639  * sym.length -> integer
8640  * sym.size -> integer
8641  *
8642  * Same as <code>sym.to_s.length</code>.
8643  */
8644 
8645 static VALUE
8647 {
8648  return rb_str_length(rb_id2str(SYM2ID(sym)));
8649 }
8650 
8651 /*
8652  * call-seq:
8653  * sym.empty? -> true or false
8654  *
8655  * Returns that _sym_ is :"" or not.
8656  */
8657 
8658 static VALUE
8660 {
8661  return rb_str_empty(rb_id2str(SYM2ID(sym)));
8662 }
8663 
8664 /*
8665  * call-seq:
8666  * sym.upcase -> symbol
8667  *
8668  * Same as <code>sym.to_s.upcase.intern</code>.
8669  */
8670 
8671 static VALUE
8673 {
8675 }
8676 
8677 /*
8678  * call-seq:
8679  * sym.downcase -> symbol
8680  *
8681  * Same as <code>sym.to_s.downcase.intern</code>.
8682  */
8683 
8684 static VALUE
8686 {
8688 }
8689 
8690 /*
8691  * call-seq:
8692  * sym.capitalize -> symbol
8693  *
8694  * Same as <code>sym.to_s.capitalize.intern</code>.
8695  */
8696 
8697 static VALUE
8699 {
8701 }
8702 
8703 /*
8704  * call-seq:
8705  * sym.swapcase -> symbol
8706  *
8707  * Same as <code>sym.to_s.swapcase.intern</code>.
8708  */
8709 
8710 static VALUE
8712 {
8714 }
8715 
8716 /*
8717  * call-seq:
8718  * sym.encoding -> encoding
8719  *
8720  * Returns the Encoding object that represents the encoding of _sym_.
8721  */
8722 
8723 static VALUE
8725 {
8726  return rb_obj_encoding(rb_id2str(SYM2ID(sym)));
8727 }
8728 
8729 ID
8731 {
8732  VALUE tmp;
8733 
8734  if (SYMBOL_P(name)) {
8735  return SYM2ID(name);
8736  }
8737  if (!RB_TYPE_P(name, T_STRING)) {
8738  tmp = rb_check_string_type(name);
8739  if (NIL_P(tmp)) {
8740  rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol",
8741  name);
8742  }
8743  name = tmp;
8744  }
8745  return rb_intern_str(name);
8746 }
8747 
8748 /*
8749  * A <code>String</code> object holds and manipulates an arbitrary sequence of
8750  * bytes, typically representing characters. String objects may be created
8751  * using <code>String::new</code> or as literals.
8752  *
8753  * Because of aliasing issues, users of strings should be aware of the methods
8754  * that modify the contents of a <code>String</code> object. Typically,
8755  * methods with names ending in ``!'' modify their receiver, while those
8756  * without a ``!'' return a new <code>String</code>. However, there are
8757  * exceptions, such as <code>String#[]=</code>.
8758  *
8759  */
8760 
8761 void
8763 {
8764 #undef rb_intern
8765 #define rb_intern(str) rb_intern_const(str)
8766 
8767  rb_cString = rb_define_class("String", rb_cObject);
8771  rb_define_method(rb_cString, "initialize", rb_str_init, -1);
8772  rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
8776  rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
8778  rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
8784  rb_define_method(rb_cString, "insert", rb_str_insert, 2);
8785  rb_define_method(rb_cString, "length", rb_str_length, 0);
8787  rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
8788  rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
8795  rb_define_method(rb_cString, "upto", rb_str_upto, -1);
8798  rb_define_method(rb_cString, "replace", rb_str_replace, 1);
8801  rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
8802  rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
8803  rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
8804  rb_define_method(rb_cString, "scrub", str_scrub, -1);
8805  rb_define_method(rb_cString, "scrub!", str_scrub_bang, -1);
8806  rb_define_method(rb_cString, "freeze", rb_obj_freeze, 0);
8807 
8808  rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
8811  rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
8812  rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
8814 
8815  rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
8816  rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
8817  rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
8818  rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
8819 
8824 
8828  rb_define_method(rb_cString, "lines", rb_str_lines, -1);
8831  rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
8832  rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
8834  rb_define_method(rb_cString, "concat", rb_str_concat, 1);
8836  rb_define_method(rb_cString, "prepend", rb_str_prepend, 1);
8838  rb_define_method(rb_cString, "intern", rb_str_intern, 0);
8839  rb_define_method(rb_cString, "to_sym", rb_str_intern, 0);
8841 
8842  rb_define_method(rb_cString, "include?", rb_str_include, 1);
8843  rb_define_method(rb_cString, "start_with?", rb_str_start_with, -1);
8844  rb_define_method(rb_cString, "end_with?", rb_str_end_with, -1);
8845 
8847 
8848  rb_define_method(rb_cString, "ljust", rb_str_ljust, -1);
8849  rb_define_method(rb_cString, "rjust", rb_str_rjust, -1);
8850  rb_define_method(rb_cString, "center", rb_str_center, -1);
8851 
8852  rb_define_method(rb_cString, "sub", rb_str_sub, -1);
8853  rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
8855  rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
8857  rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
8858  rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
8859 
8867 
8870  rb_define_method(rb_cString, "delete", rb_str_delete, -1);
8871  rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
8872  rb_define_method(rb_cString, "count", rb_str_count, -1);
8873 
8878 
8879  rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
8880  rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
8881  rb_define_method(rb_cString, "each_char", rb_str_each_char, 0);
8882  rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0);
8883 
8884  rb_define_method(rb_cString, "sum", rb_str_sum, -1);
8885 
8886  rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
8888 
8889  rb_define_method(rb_cString, "partition", rb_str_partition, 1);
8890  rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
8891 
8892  rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
8893  rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
8895  rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
8897 
8898  id_to_s = rb_intern("to_s");
8899 
8900  rb_fs = Qnil;
8901  rb_define_variable("$;", &rb_fs);
8902  rb_define_variable("$-F", &rb_fs);
8903 
8904  rb_cSymbol = rb_define_class("Symbol", rb_cObject);
8908  rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
8909 
8912  rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
8914  rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0);
8915  rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
8916  rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
8917  rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
8918  rb_define_method(rb_cSymbol, "succ", sym_succ, 0);
8919  rb_define_method(rb_cSymbol, "next", sym_succ, 0);
8920 
8921  rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
8922  rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1);
8924 
8925  rb_define_method(rb_cSymbol, "[]", sym_aref, -1);
8926  rb_define_method(rb_cSymbol, "slice", sym_aref, -1);
8927  rb_define_method(rb_cSymbol, "length", sym_length, 0);
8928  rb_define_method(rb_cSymbol, "size", sym_length, 0);
8929  rb_define_method(rb_cSymbol, "empty?", sym_empty, 0);
8930  rb_define_method(rb_cSymbol, "match", sym_match, 1);
8931 
8932  rb_define_method(rb_cSymbol, "upcase", sym_upcase, 0);
8933  rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0);
8934  rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0);
8935  rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0);
8936 
8937  rb_define_method(rb_cSymbol, "encoding", sym_encoding, 0);
8938 
8939  if (frozen_strings)
8940  st_foreach(frozen_strings, fstring_set_class_i, rb_cString);
8941 }
static int str_independent(VALUE str)
Definition: string.c:1452
#define ELTS_SHARED
Definition: ruby.h:817
#define RBASIC_CLEAR_CLASS(obj)
Definition: internal.h:607
int rb_enc_str_asciionly_p(VALUE str)
Definition: string.c:448
static VALUE sym_upcase(VALUE sym)
Definition: string.c:8672
#define ONIGENC_MBCLEN_CHARFOUND_P(r)
Definition: oniguruma.h:246
static VALUE str_new4(VALUE klass, VALUE str)
Definition: string.c:808
static long chopped_length(VALUE str)
Definition: string.c:6876
VALUE rb_str_resize(VALUE str, long len)
Definition: string.c:2025
#define ISDIGIT(c)
Definition: ruby.h:1775
VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc)
Definition: string.c:695
static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str)
Definition: string.c:761
Definition: string.c:5340
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:1014
static VALUE rb_str_bytesize(VALUE str)
Definition: string.c:1317
int rb_reg_backref_number(VALUE match, VALUE backref)
Definition: re.c:1100
static VALUE str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:2077
static const struct st_hash_type fstring_hash_type
Definition: string.c:168
#define is_broken_string(str)
Definition: internal.h:730
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:139
static long rb_str_rindex(VALUE str, VALUE sub, long pos)
Definition: string.c:2862
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:838
VALUE rb_ary_pop(VALUE ary)
Definition: array.c:940
#define rb_str_new4
Definition: intern.h:842
rb_econv_result_t
Definition: encoding.h:252
void rb_str_fill_terminator(VALUE str, const int newminlen)
Definition: string.c:1670
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:140
#define RESIZE_CAPA(str, capacity)
Definition: string.c:96
#define RARRAY_LEN(a)
Definition: ruby.h:878
void rb_bug(const char *fmt,...)
Definition: error.c:327
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:156
VALUE rb_ary_new_capa(long capa)
Definition: array.c:489
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:916
#define FALSE
Definition: nkf.h:174
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
Definition: oniguruma.h:589
#define rb_hash_lookup
Definition: tcltklib.c:269
#define RSTRING(obj)
Definition: ruby.h:1121
#define rb_intern(str)
#define RSTRING_FSTR
Definition: ruby.h:835
size_t strlen(const char *)
#define INT2NUM(x)
Definition: ruby.h:1288
#define CHECK_IF_ASCII(c)
void rb_backref_set(VALUE)
Definition: vm.c:918
#define T_FIXNUM
Definition: ruby.h:489
Definition: st.h:69
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Definition: encoding.h:64
VALUE rb_id2str(ID id)
Definition: ripper.c:17157
Definition: st.h:100
static int sym_printable(const char *s, const char *send, rb_encoding *enc)
Definition: string.c:8368
VALUE rb_str_equal(VALUE str1, VALUE str2)
Definition: string.c:2543
#define NUM2INT(x)
Definition: ruby.h:630
static rb_encoding * get_actual_encoding(const int encidx, VALUE str)
Definition: string.c:129
static int max(int a, int b)
Definition: strftime.c:141
VALUE rb_locale_str_new_cstr(const char *ptr)
Definition: string.c:725
VALUE rb_sym_to_s(VALUE sym)
Definition: string.c:8478
#define ascii_isspace(c)
Definition: string.c:6146
static int coderange_scan(const char *p, long len, rb_encoding *enc)
Definition: string.c:291
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
Definition: string.c:686
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:518
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1655
static VALUE rb_str_to_f(VALUE str)
Definition: string.c:4709
VALUE rb_str_new_frozen(VALUE orig)
Definition: string.c:833
static VALUE rb_str_oct(VALUE str)
Definition: string.c:7386
static VALUE str_compat_and_valid(VALUE str, rb_encoding *enc)
Definition: string.c:8006
st_index_t rb_str_hash(VALUE str)
Definition: string.c:2422
#define FL_TAINT
Definition: ruby.h:1137
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
Definition: string.c:2251
#define CLASS_OF(v)
Definition: ruby.h:440
static VALUE rb_str_scan(VALUE str, VALUE pat)
Definition: string.c:7319
VALUE rb_locale_str_new(const char *ptr, long len)
Definition: string.c:719
static VALUE rb_str_gsub(int argc, VALUE *argv, VALUE str)
Definition: string.c:4296
static VALUE rb_str_match(VALUE x, VALUE y)
Definition: string.c:2990
#define FIXNUM_MAX
Definition: ruby.h:228
#define Qtrue
Definition: ruby.h:426
void rb_str_set_len(VALUE str, long len)
Definition: string.c:2008
static void rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src)
Definition: string.c:398
#define is_ascii_string(str)
Definition: internal.h:729
unsigned char * USTR
Definition: string.c:5338
static unsigned int trnext(struct tr *t, rb_encoding *enc)
Definition: string.c:5347
#define ENC_CODERANGE_SET(obj, cr)
Definition: encoding.h:54
static VALUE sym_swapcase(VALUE sym)
Definition: string.c:8711
static VALUE rb_str_b(VALUE str)
Definition: string.c:7908
char * pend
Definition: string.c:5343
const int id
Definition: nkf.c:209
void Init_String(void)
Definition: string.c:8762
static VALUE rb_str_clear(VALUE str)
Definition: string.c:4335
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:219
#define ENC_CODERANGE_CLEAR(obj)
Definition: encoding.h:56
void rb_econv_close(rb_econv_t *ec)
Definition: transcode.c:1702
#define STR_UNSET_NOCAPA(s)
Definition: string.c:52
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:102
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:1037
VALUE rb_eTypeError
Definition: error.c:548
#define rb_check_arity
Definition: intern.h:296
static VALUE str_gsub(int argc, VALUE *argv, VALUE str, int bang)
Definition: string.c:4120
#define UNREACHABLE
Definition: ruby.h:42
VALUE rb_reg_match(VALUE, VALUE)
Definition: re.c:2765
long rb_memsearch(const void *, long, const void *, long, rb_encoding *)
Definition: re.c:227
static VALUE rb_str_succ_bang(VALUE str)
Definition: string.c:3355
static VALUE rb_str_enumerate_bytes(VALUE str, int wantarray)
Definition: string.c:6601
static VALUE rb_str_each_line(int argc, VALUE *argv, VALUE str)
Definition: string.c:6571
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1436
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:896
#define rb_long2int(n)
Definition: ruby.h:317
static VALUE str_new3(VALUE klass, VALUE str)
Definition: string.c:793
VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE)
Definition: re.c:3308
#define ONIGENC_MBCLEN_CHARFOUND_LEN(r)
Definition: oniguruma.h:247
#define SYM2ID(x)
Definition: ruby.h:356
RUBY_EXTERN char * crypt(const char *, const char *)
Definition: crypt.c:500
st_index_t rb_memhash(const void *ptr, long len)
Definition: random.c:1302
VALUE rb_str_split(VALUE str, const char *sep0)
Definition: string.c:6411
static VALUE rb_str_each_byte_size(VALUE str, VALUE args, VALUE eobj)
Definition: string.c:6595
static VALUE rb_str_prepend(VALUE str, VALUE str2)
Definition: string.c:2413
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:849
static VALUE rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:4245
VALUE rb_ary_tmp_new(long capa)
Definition: array.c:534
VALUE rb_str_export_to_enc(VALUE str, rb_encoding *enc)
Definition: string.c:755
static int fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
Definition: string.c:174
void ruby_sized_xfree(void *x, size_t size)
Definition: gc.c:6234
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:775
static VALUE rb_str_codepoints(VALUE str)
Definition: string.c:6869
#define str_buf_cat2(str, ptr)
Definition: string.c:2121
static VALUE rb_str_swapcase_bang(VALUE str)
Definition: string.c:5286
static VALUE rb_str_rstrip(VALUE str)
Definition: string.c:7204
VALUE rb_filesystem_str_new(const char *ptr, long len)
Definition: string.c:731
VALUE rb_str_export(VALUE str)
Definition: string.c:743
static VALUE rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
Definition: string.c:7564
#define RGENGC_WB_PROTECTED_STRING
Definition: ruby.h:720
static VALUE rb_str_include(VALUE str, VALUE arg)
Definition: string.c:4642
static void rb_str_check_dummy_enc(rb_encoding *enc)
Definition: string.c:5031
#define RBASIC_SET_CLASS(obj, cls)
Definition: internal.h:609
VALUE rb_backref_get(void)
Definition: vm.c:912
#define str_make_independent(str)
Definition: string.c:1481
VALUE rb_str_freeze(VALUE str)
Definition: string.c:1968
long rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
Definition: string.c:1141
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:993
static VALUE str_new0(VALUE klass, const char *ptr, long len, int termlen)
Definition: string.c:498
#define Check_Type(v, t)
Definition: ruby.h:532
long rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:1147
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1854
char * p
Definition: string.c:5343
static VALUE sym_downcase(VALUE sym)
Definition: string.c:8685
static VALUE str_replace(VALUE str, VALUE str2)
Definition: string.c:1026
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Definition: ruby.h:854
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:826
VALUE rb_funcall_with_block(VALUE, ID, int, const VALUE *, VALUE)
Definition: vm_eval.c:835
static VALUE rb_str_to_i(int argc, VALUE *argv, VALUE str)
Definition: string.c:4676
#define rb_utf8_encindex()
Definition: internal.h:403
char * rb_string_value_ptr(volatile VALUE *ptr)
Definition: string.c:1600
VALUE rb_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2617
static VALUE rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3918
VALUE rb_str_intern(VALUE s)
Definition: string.c:7467
#define RB_GC_GUARD(v)
Definition: ruby.h:523
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:946
static rb_encoding * get_encoding(VALUE str)
Definition: string.c:159
static VALUE rb_str_empty(VALUE str)
Definition: string.c:1334
static VALUE rb_str_chars(VALUE str)
Definition: string.c:6775
static VALUE rb_str_reverse_bang(VALUE str)
Definition: string.c:4603
#define DATA_PTR(dta)
Definition: ruby.h:992
void rb_include_module(VALUE klass, VALUE module)
Definition: class.c:827
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: oniguruma.h:189
static VALUE rb_str_center(int argc, VALUE *argv, VALUE str)
Definition: string.c:7708
st_data_t st_index_t
Definition: st.h:48
VALUE rb_range_beg_len(VALUE, long *, long *, long, int)
Definition: range.c:1005
#define DEFAULT_REPLACE_CHAR(str)
double rb_str_to_dbl(VALUE, int)
Definition: object.c:2868
#define rb_enc_islower(c, enc)
Definition: encoding.h:180
RUBY_FUNC_EXPORTED size_t rb_str_memsize(VALUE str)
Definition: string.c:953
int st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg)
Definition: st.c:867
static VALUE rb_str_subpat(VALUE str, VALUE re, VALUE backref)
Definition: string.c:3492
VALUE rb_str_new(const char *ptr, long len)
Definition: string.c:534
static VALUE rb_str_aset_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3850
#define rb_enc_mbmaxlen(enc)
Definition: encoding.h:129
static const char * str_null_char(const char *s, long len, const int minlen, rb_encoding *enc)
Definition: string.c:1616
static char * str_fill_term(VALUE str, char *s, long len, int oldtermlen, int termlen)
Definition: string.c:1627
static VALUE rb_str_upcase_bang(VALUE str)
Definition: string.c:5049
unsigned int last
Definition: nkf.c:4310
static VALUE rb_str_format_m(VALUE str, VALUE arg)
Definition: string.c:1432
#define STR_SET_NOEMBED(str)
Definition: string.c:56
#define STR_DEC_LEN(str)
Definition: string.c:76
static long str_strlen(VALUE str, rb_encoding *enc)
Definition: string.c:1238
#define FIXNUM_P(f)
Definition: ruby.h:347
static VALUE rb_str_chomp(int argc, VALUE *argv, VALUE str)
Definition: string.c:7073
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1242
VALUE rb_str_export_locale(VALUE str)
Definition: string.c:749
#define BEG(no)
Definition: string.c:22
static VALUE sym_length(VALUE sym)
Definition: string.c:8646
VALUE rb_str_new_shared(VALUE str)
Definition: string.c:799
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1506
#define CHAR_ESC_LEN
Definition: string.c:4744
VALUE rb_sym_all_symbols(void)
Definition: ripper.c:17259
static VALUE empty_str_alloc(VALUE klass)
Definition: string.c:489
static VALUE rb_str_upcase(VALUE str)
Definition: string.c:5114
#define ONIGENC_CTYPE_ALPHA
Definition: oniguruma.h:195
static VALUE rb_str_hash_m(VALUE str)
Definition: string.c:2452
static int fstring_cmp(VALUE a, VALUE b)
Definition: string.c:224
VALUE rb_cString
Definition: string.c:47
static VALUE rb_str_aset(VALUE str, VALUE indx, VALUE val)
Definition: string.c:3783
#define OBJ_TAINTED(x)
Definition: ruby.h:1176
#define ENC_CODERANGE_7BIT
Definition: encoding.h:49
VALUE rb_eRangeError
Definition: error.c:552
const char * rb_obj_classname(VALUE)
Definition: variable.c:406
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1231
void rb_gc_force_recycle(VALUE p)
Definition: gc.c:4897
#define rb_ary_new2
Definition: intern.h:90
RUBY_EXTERN void * memmove(void *, const void *, size_t)
Definition: memmove.c:7
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:1031
#define sym(x)
Definition: date_core.c:3695
static VALUE rb_str_insert(VALUE str, VALUE idx, VALUE str2)
Definition: string.c:3883
VALUE rb_str_append(VALUE str, VALUE str2)
Definition: string.c:2298
RUBY_SYMBOL_EXPORT_BEGIN typedef unsigned long st_data_t
Definition: st.h:20
#define NEWOBJ_OF(obj, type, klass, flags)
Definition: ruby.h:694
#define ISALPHA(c)
Definition: ruby.h:1774
static VALUE sym_equal(VALUE sym1, VALUE sym2)
Definition: string.c:8360
static VALUE sym_inspect(VALUE sym)
Definition: string.c:8436
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Definition: string.c:2432
static VALUE rb_str_partition(VALUE str, VALUE sep)
Definition: string.c:7729
static long str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1773
#define rb_enc_isctype(c, t, enc)
Definition: encoding.h:177
static VALUE rb_str_ljust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7668
#define RB_TYPE_P(obj, type)
Definition: ruby.h:1664
int rb_enc_str_coderange(VALUE str)
Definition: string.c:435
static int fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
Definition: string.c:217
#define MEMZERO(p, type, n)
Definition: ruby.h:1351
Definition: ruby.h:820
VALUE rb_str_plus(VALUE str1, VALUE str2)
Definition: string.c:1352
static VALUE rb_str_setbyte(VALUE str, VALUE index, VALUE value)
Definition: string.c:4390
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1351
#define FL_TEST(x, f)
Definition: ruby.h:1169
#define ONIGENC_CTYPE_DIGIT
Definition: oniguruma.h:198
static st_table * frozen_strings
Definition: string.c:166
VALUE rb_mComparable
Definition: compar.c:14
neighbor_char
Definition: string.c:3054
static VALUE rb_str_capitalize_bang(VALUE str)
Definition: string.c:5220
static VALUE rb_str_strip(VALUE str)
Definition: string.c:7242
#define rb_intern_str(string)
Definition: generator.h:17
unsigned int now
Definition: string.c:5342
#define ALLOC_N(type, n)
Definition: ruby.h:1333
int rb_block_given_p(void)
Definition: eval.c:712
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:1393
static VALUE rb_str_split_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:6196
#define val
static int single_byte_optimizable(VALUE str)
Definition: string.c:234
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:940
static void rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3663
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:1553
#define TERM_LEN(str)
Definition: string.c:87
VALUE rb_eRuntimeError
Definition: error.c:547
static VALUE sym_to_sym(VALUE sym)
Definition: string.c:8497
#define rb_enc_isascii(c, enc)
Definition: encoding.h:178
void * rb_alloc_tmp_buffer(volatile VALUE *store, long len)
Definition: string.c:925
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Definition: bignum.c:4127
static VALUE str_new_shared(VALUE klass, VALUE str)
Definition: string.c:787
#define MBCLEN_NEEDMORE_P(ret)
Definition: encoding.h:142
VALUE rb_str_length(VALUE str)
Definition: string.c:1298
#define RSTRING_END(str)
Definition: ruby.h:849
static VALUE rb_str_rpartition(VALUE str, VALUE sep)
Definition: string.c:7779
int rb_isspace(int c)
Definition: encoding.c:1930
static VALUE rb_str_crypt(VALUE str, VALUE salt)
Definition: string.c:7410
static VALUE rb_str_cmp_m(VALUE str1, VALUE str2)
Definition: string.c:2596
int rb_str_symname_p(VALUE sym)
Definition: string.c:8381
VALUE rb_ary_new(void)
Definition: array.c:495
VALUE rb_str_new_cstr(const char *ptr)
Definition: string.c:560
static void str_modify_keep_cr(VALUE str)
Definition: string.c:1518
#define dp(v)
Definition: vm_debug.h:21
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Definition: transcode.c:2578
#define UINT2NUM(x)
Definition: ruby.h:1298
#define STR_BUF_MIN_SIZE
Definition: string.c:888
#define STR_SET_EMBED(str)
Definition: string.c:60
static VALUE rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
Definition: string.c:6022
#define snprintf
Definition: subst.h:6
#define NIL_P(v)
Definition: ruby.h:438
#define ISASCII(c)
Definition: ruby.h:1766
#define add(x, y)
Definition: date_strftime.c:23
static VALUE rb_str_delete(int argc, VALUE *argv, VALUE str)
Definition: string.c:5878
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:630
#define rb_enc_step_back(s, p, e, n, enc)
Definition: encoding.h:172
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:790
int st_delete(st_table *, st_data_t *, st_data_t *)
static VALUE rb_str_enumerate_chars(VALUE str, int wantarray)
Definition: string.c:6681
static VALUE rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
Definition: string.c:6422
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:790
static VALUE rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:6002
#define RUBY_DTRACE_STRING_CREATE_ENABLED()
Definition: probes.h:48
VALUE rb_str_concat(VALUE str1, VALUE str2)
Definition: string.c:2340
#define TOUPPER(c)
Definition: ruby.h:1778
#define END(no)
Definition: string.c:23
#define OBJ_FROZEN(x)
Definition: ruby.h:1185
RUBY_ALIAS_FUNCTION(rb_str_dup_frozen(VALUE str), rb_str_new_frozen,(str))
Definition: string.c:1977
#define STR_ENC_GET(str)
Definition: string.c:124
static VALUE rb_str_strip_bang(VALUE str)
Definition: string.c:7221
#define TYPE(x)
Definition: ruby.h:505
int argc
Definition: ruby.c:131
VALUE rb_str_scrub(VALUE str, VALUE repl)
Definition: string.c:8037
#define Qfalse
Definition: ruby.h:425
VALUE rb_cEncodingConverter
Definition: transcode.c:25
long rb_str_offset(VALUE str, long pos)
Definition: string.c:1781
#define rb_sourcefile()
Definition: tcltklib.c:98
#define STR_SET_EMBED_LEN(str, n)
Definition: string.c:61
#define STR_ASSOC_P(s)
Definition: internal.h:725
#define ALLOCA_N(type, n)
Definition: ruby.h:1337
#define T_BIGNUM
Definition: ruby.h:487
#define range(low, item, hi)
Definition: date_strftime.c:21
#define ENC_CODERANGE_UNKNOWN
Definition: encoding.h:48
#define LONG_MAX
Definition: ruby.h:191
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:4920
static VALUE rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5657
#define rb_enc_isprint(c, enc)
Definition: encoding.h:184
#define RUBY_FUNC_EXPORTED
Definition: defines.h:246
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1352
#define ENC_CODERANGE_BROKEN
Definition: encoding.h:51
#define rb_enc_isupper(c, enc)
Definition: encoding.h:181
VALUE rb_eEncCompatError
Definition: error.c:555
#define rb_str_new2
Definition: intern.h:840
VALUE rb_obj_alloc(VALUE)
Definition: object.c:1802
#define rb_enc_codepoint(p, e, enc)
Definition: encoding.h:155
#define OBJ_FREEZE(x)
Definition: ruby.h:1186
void rb_str_update(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3739
#define rb_enc_mbminlen(enc)
Definition: encoding.h:128
unsigned int max
Definition: string.c:5342
#define STR_SHARED_P(s)
Definition: internal.h:724
static VALUE rb_str_each_char_size(VALUE str, VALUE args, VALUE eobj)
Definition: string.c:6675
static VALUE sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
Definition: string.c:8503
VALUE rb_eIndexError
Definition: error.c:550
static VALUE rb_str_rjust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7688
#define ENC_CODERANGE_VALID
Definition: encoding.h:50
#define numberof(array)
Definition: etc.c:595
long rb_str_sublen(VALUE str, long pos)
Definition: string.c:1828
static VALUE sym_capitalize(VALUE sym)
Definition: string.c:8698
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: oniguruma.h:267
VALUE rb_str_times(VALUE str, VALUE times)
Definition: string.c:1384
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:970
static VALUE sym_cmp(VALUE sym, VALUE other)
Definition: string.c:8582
void rb_str_modify_expand(VALUE str, long expand)
Definition: string.c:1492
#define sub(x, y)
Definition: date_strftime.c:24
static void rb_str_splice(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3696
static VALUE str_eql(const VALUE str1, const VALUE str2)
Definition: string.c:2513
#define RSTRING_LEN(str)
Definition: ruby.h:841
static VALUE sym_encoding(VALUE sym)
Definition: string.c:8724
VALUE rb_yield(VALUE)
Definition: vm_eval.c:942
static VALUE rb_str_swapcase(VALUE str)
Definition: string.c:5331
VALUE rb_obj_as_string(VALUE obj)
Definition: string.c:1011
#define RARRAY_CONST_PTR(a)
Definition: ruby.h:886
VALUE rb_str_subseq(VALUE str, long beg, long len)
Definition: string.c:1839
#define REALLOC_N(var, type, n)
Definition: ruby.h:1335
char * rb_string_value_cstr(volatile VALUE *ptr)
Definition: string.c:1644
#define RUBY_MAX_CHAR_LEN
Definition: string.c:50
SSL_METHOD *(* func)(void)
Definition: ossl_ssl.c:113
#define TRUE
Definition: nkf.h:175
static VALUE rb_str_byteslice(int argc, VALUE *argv, VALUE str)
Definition: string.c:4520
static long str_rindex(VALUE str, VALUE sub, const char *s, long pos, rb_encoding *enc)
Definition: string.c:2838
VALUE rb_str_format(int, const VALUE *, VALUE)
Definition: sprintf.c:421
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:958
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:496
#define STR_TMPLOCK
Definition: string.c:51
#define rb_enc_name(enc)
Definition: encoding.h:125
#define RSTRING_EMBED_LEN_MAX
Definition: ruby.h:819
int rb_enc_symname_p(const char *name, rb_encoding *enc)
Definition: ripper.c:16830
static VALUE rb_str_tr(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5699
static VALUE rb_str_chop_bang(VALUE str)
Definition: string.c:6903
static VALUE str_new_empty(VALUE str)
Definition: string.c:880
VALUE rb_hash_new(void)
Definition: hash.c:298
void ruby_xfree(void *x)
Definition: gc.c:6242
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1728
static VALUE rb_str_enumerate_codepoints(VALUE str, int wantarray)
Definition: string.c:6782
static VALUE rb_str_squeeze(int argc, VALUE *argv, VALUE str)
Definition: string.c:5985
long rb_reg_search(VALUE, VALUE, long, int)
Definition: re.c:1378
static VALUE str_duplicate(VALUE klass, VALUE str)
Definition: string.c:1054
VALUE rb_check_hash_type(VALUE hash)
Definition: hash.c:588
int rb_str_cmp(VALUE str1, VALUE str2)
Definition: string.c:2486
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
#define PRIsVALUE
Definition: ruby.h:137
#define no_digits()
unsigned long ID
Definition: ruby.h:89
VALUE rb_str_buf_new_cstr(const char *ptr)
Definition: string.c:907
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1257
static VALUE rb_str_aref_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3622
static VALUE str_scrub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:8309
static VALUE sym_to_proc(VALUE sym)
Definition: string.c:8524
#define Qnil
Definition: ruby.h:427
static VALUE rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:5895
static VALUE get_pat(VALUE, int)
Definition: string.c:3938
const char * name
Definition: oniguruma.h:160
VALUE rb_str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:2124
#define BUILTIN_TYPE(x)
Definition: ruby.h:502
#define OBJ_TAINT(x)
Definition: ruby.h:1177
unsigned long VALUE
Definition: ruby.h:88
static enum neighbor_char enc_pred_char(char *p, long len, rb_encoding *enc)
Definition: string.c:3113
VALUE rb_cSymbol
Definition: string.c:48
static int tr_find(unsigned int c, const char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
Definition: string.c:5772
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1294
#define rb_funcall2
Definition: ruby.h:1456
static VALUE result
Definition: nkf.c:40
VALUE rb_str_replace(VALUE str, VALUE str2)
Definition: string.c:4314
static VALUE rb_str_lstrip_bang(VALUE str)
Definition: string.c:7093
#define rb_enc_is_newline(p, end, enc)
Definition: encoding.h:175
static VALUE str_new(VALUE klass, const char *ptr, long len)
Definition: string.c:528
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
Definition: intern.h:237
static VALUE str_alloc(VALUE klass)
Definition: string.c:482
#define UNINITIALIZED_VAR(x)
Definition: vm_core.h:121
#define RBASIC(obj)
Definition: ruby.h:1116
static VALUE rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3977
VALUE rb_str_buf_cat2(VALUE str, const char *ptr)
Definition: string.c:2134
#define ENC_CODERANGE_AND(a, b)
Definition: encoding.h:59
static VALUE rb_str_is_ascii_only_p(VALUE str)
Definition: string.c:7947
void rb_str_shared_replace(VALUE str, VALUE str2)
Definition: string.c:972
#define rb_usascii_encindex()
Definition: internal.h:404
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:930
#define rb_ary_new3
Definition: intern.h:91
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.h:245
VALUE rb_check_funcall(VALUE, ID, int, const VALUE *)
Definition: vm_eval.c:409
#define TERM_FILL(ptr, termlen)
Definition: string.c:88
#define RUBY_DTRACE_STRING_CREATE(arg0, arg1, arg2)
Definition: probes.h:49
#define rb_enc_asciicompat(enc)
Definition: encoding.h:188
static VALUE rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:6953
VALUE rb_ensure(VALUE(*b_proc)(ANYARGS), VALUE data1, VALUE(*e_proc)(ANYARGS), VALUE data2)
Definition: eval.c:839
VALUE rb_str_buf_cat_ascii(VALUE str, const char *ptr)
Definition: string.c:2258
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
VALUE rb_str_quote_unprintable(VALUE str)
Definition: string.c:8400
static VALUE sym_casecmp(VALUE sym, VALUE other)
Definition: string.c:8599
long rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:340
static int zero_filled(const char *s, int n)
Definition: string.c:1607
static char * str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
Definition: string.c:1703
#define RARRAY_LENINT(ary)
Definition: ruby.h:884
RUBY_EXTERN VALUE rb_rs
Definition: intern.h:518
static VALUE rb_str_getbyte(VALUE str, VALUE index)
Definition: string.c:4371
static void rb_enc_cr_str_exact_copy(VALUE dest, VALUE src)
Definition: string.c:428
void rb_sys_fail(const char *mesg)
Definition: error.c:1973
static VALUE rb_str_chr(VALUE str)
Definition: string.c:4359
#define ENCODING_IS_ASCII8BIT(obj)
Definition: encoding.h:43
static const char * search_nonascii(const char *p, const char *e)
Definition: string.c:254
static VALUE str_scrub(int argc, VALUE *argv, VALUE str)
Definition: string.c:8287
static void str_modifiable(VALUE str)
Definition: string.c:1443
static VALUE rb_str_bytes(VALUE str)
Definition: string.c:6669
static VALUE rb_str_index_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2748
VALUE rb_fstring(VALUE str)
Definition: string.c:201
#define CHAR_BIT
Definition: ruby.h:198
VALUE rb_str_to_str(VALUE str)
Definition: string.c:964
static VALUE rb_str_match_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3040
#define FL_UNSET(x, f)
Definition: ruby.h:1173
static void str_mod_check(VALUE s, const char *p, long len)
Definition: string.c:460
VALUE rb_string_value(volatile VALUE *ptr)
Definition: string.c:1589
static VALUE rb_str_lines(int argc, VALUE *argv, VALUE str)
Definition: string.c:6589
VALUE rb_tainted_str_new_cstr(const char *ptr)
Definition: string.c:598
#define LONG2NUM(x)
Definition: ruby.h:1309
static const char isspacetable[256]
Definition: string.c:6127
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:1638
static VALUE scan_once(VALUE str, VALUE pat, long *start)
Definition: string.c:7250
static VALUE rb_str_sub(int argc, VALUE *argv, VALUE str)
Definition: string.c:4112
VALUE rb_usascii_str_new(const char *ptr, long len)
Definition: string.c:540
VALUE rb_str_buf_append(VALUE str, VALUE str2)
Definition: string.c:2282
static VALUE rb_str_s_try_convert(VALUE dummy, VALUE str)
Definition: string.c:1697
#define RMATCH_REGS(obj)
Definition: re.h:52
RUBY_EXTERN VALUE rb_default_rs
Definition: intern.h:519
static VALUE sym_succ(VALUE sym)
Definition: string.c:8562
void rb_str_free(VALUE str)
Definition: string.c:941
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Definition: string.c:737
static VALUE rb_str_end_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7852
#define RSTRING_PTR(str)
Definition: ruby.h:845
#define rb_enc_right_char_head(s, p, e, enc)
Definition: encoding.h:171
static void str_enc_copy(VALUE str1, VALUE str2)
Definition: string.c:392
rb_encoding * rb_enc_get_from_index(int index)
Definition: encoding.c:602
#define ENCODING_GET(obj)
Definition: encoding.h:38
VALUE rb_equal(VALUE, VALUE)
Definition: object.c:89
static ID id_to_s
Definition: string.c:1008
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:832
#define STR_SET_SHARED(str, shared_str)
Definition: string.c:116
#define STR_ASSOC
#define STR_HEAP_PTR(str)
Definition: string.c:121
int size
Definition: encoding.c:49
static VALUE rb_str_hex(VALUE str)
Definition: string.c:7365
char * rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
Definition: string.c:1753
static char * str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1759
#define f
static VALUE rb_str_reverse(VALUE str)
Definition: string.c:4539
#define INT2FIX(i)
Definition: ruby.h:231
static VALUE rb_str_downcase(VALUE str)
Definition: string.c:5197
#define UNLIMITED_ARGUMENTS
Definition: intern.h:44
char * rb_str_subpos(VALUE str, long beg, long *lenp)
Definition: string.c:1860
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
Definition: oniguruma.h:234
int rb_sourceline(void)
Definition: vm.c:966
VALUE rb_str_unlocktmp(VALUE str)
Definition: string.c:1991
VALUE rb_tainted_str_new(const char *ptr, long len)
Definition: string.c:589
#define MBCLEN_INVALID_P(ret)
Definition: encoding.h:141
static VALUE rb_str_valid_encoding_p(VALUE str)
Definition: string.c:7929
#define RARRAY_AREF(a, i)
Definition: ruby.h:901
static VALUE rb_str_each_byte(VALUE str)
Definition: string.c:6652
static VALUE rb_str_chop(VALUE str)
Definition: string.c:6938
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Definition: transcode.c:1446
VALUE rb_check_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2632
static VALUE rb_str_count(int argc, VALUE *argv, VALUE str)
Definition: string.c:6058
#define STR_SET_LEN(str, n)
Definition: string.c:67
static VALUE rb_str_eql(VALUE str1, VALUE str2)
Definition: string.c:2563
static void rb_str_subpat_set(VALUE str, VALUE re, VALUE backref, VALUE val)
Definition: string.c:3745
static long enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
Definition: string.c:1095
static VALUE rb_str_lstrip(VALUE str)
Definition: string.c:7134
#define lesser(a, b)
Definition: string.c:2458
VALUE rb_check_array_type(VALUE ary)
Definition: array.c:628
VALUE rb_hash_aref(VALUE hash, VALUE key)
Definition: hash.c:697
static enum neighbor_char enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
Definition: string.c:3175
#define RARRAY_PTR(a)
Definition: ruby.h:907
static VALUE sym_match(VALUE sym, VALUE other)
Definition: string.c:8616
VALUE rb_reg_quote(VALUE)
Definition: re.c:2984
static long rb_str_index(VALUE str, VALUE sub, long offset)
Definition: string.c:2686
#define FL_WB_PROTECTED
Definition: ruby.h:1134
#define ENC_CODERANGE(obj)
Definition: encoding.h:52
static VALUE rb_str_upto(int argc, VALUE *argv, VALUE beg)
Definition: string.c:3396
static VALUE str_byte_substr(VALUE str, long beg, long len)
Definition: string.c:4408
VALUE rb_str_cat(VALUE str, const char *ptr, long len)
Definition: string.c:2140
uint8_t key[16]
Definition: random.c:1250
VALUE rb_any_to_s(VALUE)
Definition: object.c:453
long rb_str_strlen(VALUE str)
Definition: string.c:1284
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Definition: string.c:607
#define LONG2FIX(i)
Definition: ruby.h:232
#define SIZEOF_VALUE
Definition: ruby.h:91
static VALUE tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
Definition: string.c:5403
#define RTEST(v)
Definition: ruby.h:437
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
Definition: string.c:4747
VALUE rb_proc_new(VALUE(*)(ANYARGS), VALUE)
Definition: proc.c:2321
#define T_STRING
Definition: ruby.h:482
VALUE rb_str_locktmp(VALUE)
void rb_gc_resurrect(VALUE obj)
Definition: gc.c:3612
#define OBJ_INFECT(x, s)
Definition: ruby.h:1180
#define RREGEXP(obj)
Definition: ruby.h:1122
static VALUE rb_str_capitalize(VALUE str)
Definition: string.c:5268
VALUE rb_str_drop_bytes(VALUE str, long len)
Definition: string.c:3635
size_t rb_str_capacity(VALUE str)
Definition: string.c:468
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Definition: string.c:577
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1309
static VALUE rb_str_init(int argc, VALUE *argv, VALUE str)
Definition: string.c:1085
void rb_define_variable(const char *, VALUE *)
Definition: variable.c:604
void rb_str_setter(VALUE val, ID id, VALUE *var)
Definition: string.c:7875
static VALUE rb_str_rstrip_bang(VALUE str)
Definition: string.c:7155
VALUE rb_str_tmp_new(long len)
Definition: string.c:919
static VALUE rb_str_each_char(VALUE str)
Definition: string.c:6758
VALUE rb_fs
Definition: string.c:251
#define ISPRINT(c)
Definition: ruby.h:1768
#define rb_enc_left_char_head(s, p, e, enc)
Definition: encoding.h:170
static VALUE str_replace_shared(VALUE str2, VALUE str)
Definition: string.c:779
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Definition: string.c:680
static void str_make_independent_expand(VALUE str, long expand)
Definition: string.c:1461
VALUE rb_ary_concat(VALUE x, VALUE y)
Definition: array.c:3541
static unsigned int hash(const char *str, unsigned int len)
Definition: lex.c:56
#define RETURN_ENUMERATOR(obj, argc, argv)
Definition: intern.h:242
static VALUE rb_str_start_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7829
VALUE rb_str_substr(VALUE str, long beg, long len)
Definition: string.c:1945
static void str_discard(VALUE str)
Definition: string.c:1528
#define RREGEXP_SRC_LEN(r)
Definition: ruby.h:917
void rb_must_asciicompat(VALUE str)
Definition: string.c:1580
#define assert(condition)
Definition: ossl.h:45
const char * name
Definition: nkf.c:208
#define FL_SET(x, f)
Definition: ruby.h:1172
VALUE rb_str_associated(VALUE str)
Definition: string.c:1570
#define ID2SYM(x)
Definition: ruby.h:355
const char * rb_id2name(ID id)
Definition: ripper.c:17227
int gen
Definition: string.c:5341
#define STR_NOCAPA_P(s)
Definition: internal.h:727
static VALUE sym_empty(VALUE sym)
Definition: string.c:8659
static VALUE rb_str_to_s(VALUE str)
Definition: string.c:4724
static VALUE str_byte_aref(VALUE str, VALUE indx)
Definition: string.c:4463
VALUE rb_external_str_new(const char *ptr, long len)
Definition: string.c:707
void rb_str_associate(VALUE str, VALUE add)
Definition: string.c:1539
#define rb_enc_to_index(enc)
Definition: encoding.h:77
VALUE rb_str_succ(VALUE orig)
Definition: string.c:3257
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1227
static VALUE rb_str_downcase_bang(VALUE str)
Definition: string.c:5132
static VALUE rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len, int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
Definition: string.c:2165
void rb_warning(const char *fmt,...)
Definition: error.c:236
#define rb_check_frozen(obj)
Definition: intern.h:277
#define CONST_ID(var, str)
Definition: ruby.h:1428
st_table * st_init_table(const struct st_hash_type *)
Definition: st.c:266
static VALUE rb_str_sum(int argc, VALUE *argv, VALUE str)
Definition: string.c:7506
VALUE rb_str_inspect(VALUE str)
Definition: string.c:4792
void rb_free_tmp_buffer(volatile VALUE *store)
Definition: string.c:933
static void tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
Definition: string.c:5708
VALUE rb_obj_freeze(VALUE)
Definition: object.c:1077
#define SPECIAL_CONST_P(x)
Definition: ruby.h:1165
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Definition: transcode.c:2885
void void xfree(void *)
VALUE rb_str_buf_new(long capa)
Definition: string.c:891
int rb_num_to_uint(VALUE val, unsigned int *ret)
Definition: numeric.c:132
static VALUE rb_str_casecmp(VALUE str1, VALUE str2)
Definition: string.c:2628
#define ONIGERR_INVALID_CODE_POINT_VALUE
Definition: oniguruma.h:587
int rb_str_comparable(VALUE str1, VALUE str2)
Definition: string.c:2461
#define rb_enc_mbcput(c, buf, enc)
Definition: encoding.h:165
VALUE rb_str_cat2(VALUE str, const char *ptr)
Definition: string.c:2159
#define SYMBOL_P(x)
Definition: ruby.h:354
#define mod(x, y)
Definition: date_strftime.c:28
VALUE rb_str_ord(VALUE s)
Definition: string.c:7487
VALUE rb_str_locktmp_ensure(VALUE str, VALUE(*func)(VALUE), VALUE arg)
Definition: string.c:2001
#define rb_str_dup_frozen
static VALUE sym_aref(int argc, VALUE *argv, VALUE sym)
Definition: string.c:8632
#define NULL
Definition: _sdbm.c:103
#define FIX2LONG(x)
Definition: ruby.h:345
VALUE rb_invcmp(VALUE x, VALUE y)
Definition: compar.c:42
VALUE rb_str_resurrect(VALUE str)
Definition: string.c:1068
static VALUE rb_str_aref(VALUE str, VALUE indx)
Definition: string.c:3503
VALUE rb_check_string_type(VALUE str)
Definition: string.c:1679
VALUE rb_usascii_str_new_cstr(const char *ptr)
Definition: string.c:569
VALUE rb_id_quote_unprintable(ID id)
Definition: string.c:8421
VALUE rb_reg_regcomp(VALUE)
Definition: re.c:2565
static int match(VALUE str, VALUE pat, VALUE hash, int(*cb)(VALUE, VALUE))
Definition: date_parse.c:273
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1488
int st_foreach(st_table *, int(*)(ANYARGS), st_data_t)
Definition: st.c:1034
static VALUE rb_str_delete_bang(int, VALUE *, VALUE)
Definition: string.c:5802
#define STR_NOCAPA
Definition: internal.h:726
void rb_warn(const char *fmt,...)
Definition: error.c:223
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition: string.c:7969
VALUE rb_eArgError
Definition: error.c:549
static ID cmp
Definition: compar.c:16
#define rb_enc_prev_char(s, p, e, enc)
Definition: encoding.h:168
static VALUE rb_str_force_encoding(VALUE str, VALUE enc)
Definition: string.c:7892
#define T_REGEXP
Definition: ruby.h:483
#define STR_HEAP_SIZE(str)
Definition: string.c:122
#define IS_EVSTR(p, e)
Definition: string.c:4886
VALUE rb_str_dump(VALUE str)
Definition: string.c:4899
#define NUM2LONG(x)
Definition: ruby.h:600
#define STR_NOEMBED
Definition: internal.h:721
static VALUE rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2913
#define TR_TABLE_SIZE
Definition: string.c:5706
#define RB_OBJ_WRITE(a, slot, b)
Definition: ruby.h:1213
VALUE rb_reg_nth_match(int, VALUE)
Definition: re.c:1483
#define rb_enc_code_to_mbclen(c, enc)
Definition: encoding.h:162
static VALUE rb_str_each_codepoint(VALUE str)
Definition: string.c:6851
void rb_str_modify(VALUE str)
Definition: string.c:1484
#define STR_EMBED_P(str)
Definition: internal.h:728
char ** argv
Definition: ruby.c:132
ID rb_to_id(VALUE name)
Definition: string.c:8730
#define DBL2NUM(dbl)
Definition: ruby.h:815
#define StringValue(v)
Definition: ruby.h:539
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
Definition: string.c:548
static enum neighbor_char enc_succ_char(char *p, long len, rb_encoding *enc)
Definition: string.c:3061
VALUE rb_external_str_new_cstr(const char *ptr)
Definition: string.c:713
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:590
#define rb_str_new5
Definition: intern.h:843
VALUE rb_obj_class(VALUE)
Definition: object.c:227
VALUE rb_str_dup(VALUE str)
Definition: string.c:1062
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Definition: string.c:874